1"""Functions to convert NetworkX graphs to and from common data containers
2like numpy arrays, scipy sparse arrays, and pandas DataFrames.
3
4The preferred way of converting data to a NetworkX graph is through the
5graph constructor. The constructor calls the `~networkx.convert.to_networkx_graph`
6function which attempts to guess the input type and convert it automatically.
7
8Examples
9--------
10Create a 10 node random graph from a numpy array
11
12>>> import numpy as np
13>>> rng = np.random.default_rng()
14>>> a = rng.integers(low=0, high=2, size=(10, 10))
15>>> DG = nx.from_numpy_array(a, create_using=nx.DiGraph)
16
17or equivalently:
18
19>>> DG = nx.DiGraph(a)
20
21which calls `from_numpy_array` internally based on the type of ``a``.
22
23See Also
24--------
25nx_agraph, nx_pydot
26"""
27
28import itertools
29from collections import defaultdict
30
31import networkx as nx
32
33__all__ = [
34 "from_pandas_adjacency",
35 "to_pandas_adjacency",
36 "from_pandas_edgelist",
37 "to_pandas_edgelist",
38 "from_scipy_sparse_array",
39 "to_scipy_sparse_array",
40 "from_numpy_array",
41 "to_numpy_array",
42]
43
44
45@nx._dispatchable(edge_attrs="weight")
46def to_pandas_adjacency(
47 G,
48 nodelist=None,
49 dtype=None,
50 order=None,
51 multigraph_weight=sum,
52 weight="weight",
53 nonedge=0.0,
54):
55 """Returns the graph adjacency matrix as a Pandas DataFrame.
56
57 Parameters
58 ----------
59 G : graph
60 The NetworkX graph used to construct the Pandas DataFrame.
61
62 nodelist : list, optional
63 The rows and columns are ordered according to the nodes in `nodelist`.
64 If `nodelist` is None, then the ordering is produced by G.nodes().
65
66 multigraph_weight : {sum, min, max}, optional
67 An operator that determines how weights in multigraphs are handled.
68 The default is to sum the weights of the multiple edges.
69
70 weight : string or None, optional
71 The edge attribute that holds the numerical value used for
72 the edge weight. If an edge does not have that attribute, then the
73 value 1 is used instead.
74
75 nonedge : float, optional
76 The matrix values corresponding to nonedges are typically set to zero.
77 However, this could be undesirable if there are matrix values
78 corresponding to actual edges that also have the value zero. If so,
79 one might prefer nonedges to have some other value, such as nan.
80
81 Returns
82 -------
83 df : Pandas DataFrame
84 Graph adjacency matrix
85
86 Notes
87 -----
88 For directed graphs, entry i,j corresponds to an edge from i to j.
89
90 The DataFrame entries are assigned to the weight edge attribute. When
91 an edge does not have a weight attribute, the value of the entry is set to
92 the number 1. For multiple (parallel) edges, the values of the entries
93 are determined by the 'multigraph_weight' parameter. The default is to
94 sum the weight attributes for each of the parallel edges.
95
96 When `nodelist` does not contain every node in `G`, the matrix is built
97 from the subgraph of `G` that is induced by the nodes in `nodelist`.
98
99 The convention used for self-loop edges in graphs is to assign the
100 diagonal matrix entry value to the weight attribute of the edge
101 (or the number 1 if the edge has no weight attribute). If the
102 alternate convention of doubling the edge weight is desired the
103 resulting Pandas DataFrame can be modified as follows::
104
105 >>> import pandas as pd
106 >>> G = nx.Graph([(1, 1), (2, 2)])
107 >>> df = nx.to_pandas_adjacency(G)
108 >>> df
109 1 2
110 1 1.0 0.0
111 2 0.0 1.0
112 >>> diag_idx = list(range(len(df)))
113 >>> df.iloc[diag_idx, diag_idx] *= 2
114 >>> df
115 1 2
116 1 2.0 0.0
117 2 0.0 2.0
118
119 Examples
120 --------
121 >>> G = nx.MultiDiGraph()
122 >>> G.add_edge(0, 1, weight=2)
123 0
124 >>> G.add_edge(1, 0)
125 0
126 >>> G.add_edge(2, 2, weight=3)
127 0
128 >>> G.add_edge(2, 2)
129 1
130 >>> nx.to_pandas_adjacency(G, nodelist=[0, 1, 2], dtype=int)
131 0 1 2
132 0 0 2 0
133 1 1 0 0
134 2 0 0 4
135
136 """
137 import pandas as pd
138
139 M = to_numpy_array(
140 G,
141 nodelist=nodelist,
142 dtype=dtype,
143 order=order,
144 multigraph_weight=multigraph_weight,
145 weight=weight,
146 nonedge=nonedge,
147 )
148 if nodelist is None:
149 nodelist = list(G)
150 return pd.DataFrame(data=M, index=nodelist, columns=nodelist)
151
152
153@nx._dispatchable(graphs=None, returns_graph=True)
154def from_pandas_adjacency(df, create_using=None):
155 r"""Returns a graph from Pandas DataFrame.
156
157 The Pandas DataFrame is interpreted as an adjacency matrix for the graph.
158
159 Parameters
160 ----------
161 df : Pandas DataFrame
162 An adjacency matrix representation of a graph
163
164 create_using : NetworkX graph constructor, optional (default=nx.Graph)
165 Graph type to create. If graph instance, then cleared before populated.
166
167 Notes
168 -----
169 For directed graphs, explicitly mention create_using=nx.DiGraph,
170 and entry i,j of df corresponds to an edge from i to j.
171
172 If `df` has a single data type for each entry it will be converted to an
173 appropriate Python data type.
174
175 If you have node attributes stored in a separate dataframe `df_nodes`,
176 you can load those attributes to the graph `G` using the following code::
177
178 df_nodes = pd.DataFrame({"node_id": [1, 2, 3], "attribute1": ["A", "B", "C"]})
179 G.add_nodes_from((n, dict(d)) for n, d in df_nodes.iterrows())
180
181 If `df` has a user-specified compound data type the names
182 of the data fields will be used as attribute keys in the resulting
183 NetworkX graph.
184
185 See Also
186 --------
187 to_pandas_adjacency
188
189 Examples
190 --------
191 Simple integer weights on edges:
192
193 >>> import pandas as pd
194 >>> pd.options.display.max_columns = 20
195 >>> df = pd.DataFrame([[1, 1], [2, 1]])
196 >>> df
197 0 1
198 0 1 1
199 1 2 1
200 >>> G = nx.from_pandas_adjacency(df)
201 >>> G.name = "Graph from pandas adjacency matrix"
202 >>> print(G)
203 Graph named 'Graph from pandas adjacency matrix' with 2 nodes and 3 edges
204 """
205
206 try:
207 df = df[df.index]
208 except Exception as err:
209 missing = list(set(df.index).difference(set(df.columns)))
210 msg = f"{missing} not in columns"
211 raise nx.NetworkXError("Columns must match Indices.", msg) from err
212
213 A = df.values
214 G = from_numpy_array(A, create_using=create_using, nodelist=df.columns)
215
216 return G
217
218
219@nx._dispatchable(preserve_edge_attrs=True)
220def to_pandas_edgelist(
221 G,
222 source="source",
223 target="target",
224 nodelist=None,
225 dtype=None,
226 edge_key=None,
227):
228 """Returns the graph edge list as a Pandas DataFrame.
229
230 Parameters
231 ----------
232 G : graph
233 The NetworkX graph used to construct the Pandas DataFrame.
234
235 source : str or int, optional
236 A valid column name (string or integer) for the source nodes (for the
237 directed case).
238
239 target : str or int, optional
240 A valid column name (string or integer) for the target nodes (for the
241 directed case).
242
243 nodelist : list, optional
244 Use only nodes specified in nodelist
245
246 dtype : dtype, default None
247 Use to create the DataFrame. Data type to force.
248 Only a single dtype is allowed. If None, infer.
249
250 edge_key : str or int or None, optional (default=None)
251 A valid column name (string or integer) for the edge keys (for the
252 multigraph case). If None, edge keys are not stored in the DataFrame.
253
254 Returns
255 -------
256 df : Pandas DataFrame
257 Graph edge list
258
259 Examples
260 --------
261 >>> G = nx.Graph(
262 ... [
263 ... ("A", "B", {"cost": 1, "weight": 7}),
264 ... ("C", "E", {"cost": 9, "weight": 10}),
265 ... ]
266 ... )
267 >>> df = nx.to_pandas_edgelist(G, nodelist=["A", "C"])
268 >>> df[["source", "target", "cost", "weight"]]
269 source target cost weight
270 0 A B 1 7
271 1 C E 9 10
272
273 >>> G = nx.MultiGraph([("A", "B", {"cost": 1}), ("A", "B", {"cost": 9})])
274 >>> df = nx.to_pandas_edgelist(G, nodelist=["A", "C"], edge_key="ekey")
275 >>> df[["source", "target", "cost", "ekey"]]
276 source target cost ekey
277 0 A B 1 0
278 1 A B 9 1
279
280 """
281 import pandas as pd
282
283 if nodelist is None:
284 edgelist = G.edges(data=True)
285 else:
286 edgelist = G.edges(nodelist, data=True)
287 source_nodes = [s for s, _, _ in edgelist]
288 target_nodes = [t for _, t, _ in edgelist]
289
290 all_attrs = set().union(*(d.keys() for _, _, d in edgelist))
291 if source in all_attrs:
292 raise nx.NetworkXError(f"Source name {source!r} is an edge attr name")
293 if target in all_attrs:
294 raise nx.NetworkXError(f"Target name {target!r} is an edge attr name")
295
296 nan = float("nan")
297 edge_attr = {k: [d.get(k, nan) for _, _, d in edgelist] for k in all_attrs}
298
299 if G.is_multigraph() and edge_key is not None:
300 if edge_key in all_attrs:
301 raise nx.NetworkXError(f"Edge key name {edge_key!r} is an edge attr name")
302 edge_keys = [k for _, _, k in G.edges(keys=True)]
303 edgelistdict = {source: source_nodes, target: target_nodes, edge_key: edge_keys}
304 else:
305 edgelistdict = {source: source_nodes, target: target_nodes}
306
307 edgelistdict.update(edge_attr)
308 return pd.DataFrame(edgelistdict, dtype=dtype)
309
310
311@nx._dispatchable(graphs=None, returns_graph=True)
312def from_pandas_edgelist(
313 df,
314 source="source",
315 target="target",
316 edge_attr=None,
317 create_using=None,
318 edge_key=None,
319):
320 """Returns a graph from Pandas DataFrame containing an edge list.
321
322 The Pandas DataFrame should contain at least two columns of node names and
323 zero or more columns of edge attributes. Each row will be processed as one
324 edge instance.
325
326 Note: This function iterates over DataFrame.values, which is not
327 guaranteed to retain the data type across columns in the row. This is only
328 a problem if your row is entirely numeric and a mix of ints and floats. In
329 that case, all values will be returned as floats. See the
330 DataFrame.iterrows documentation for an example.
331
332 Parameters
333 ----------
334 df : Pandas DataFrame
335 An edge list representation of a graph
336
337 source : str or int
338 A valid column name (string or integer) for the source nodes (for the
339 directed case).
340
341 target : str or int
342 A valid column name (string or integer) for the target nodes (for the
343 directed case).
344
345 edge_attr : str or int, iterable, True, or None
346 A valid column name (str or int) or iterable of column names that are
347 used to retrieve items and add them to the graph as edge attributes.
348 If `True`, all columns will be added except `source`, `target` and `edge_key`.
349 If `None`, no edge attributes are added to the graph.
350
351 create_using : NetworkX graph constructor, optional (default=nx.Graph)
352 Graph type to create. If graph instance, then cleared before populated.
353
354 edge_key : str or None, optional (default=None)
355 A valid column name for the edge keys (for a MultiGraph). The values in
356 this column are used for the edge keys when adding edges if create_using
357 is a multigraph.
358
359 Notes
360 -----
361 If you have node attributes stored in a separate dataframe `df_nodes`,
362 you can load those attributes to the graph `G` using the following code::
363
364 df_nodes = pd.DataFrame({"node_id": [1, 2, 3], "attribute1": ["A", "B", "C"]})
365 G.add_nodes_from((n, dict(d)) for n, d in df_nodes.iterrows())
366
367 See Also
368 --------
369 to_pandas_edgelist
370
371 Examples
372 --------
373 Simple integer weights on edges:
374
375 >>> import pandas as pd
376 >>> pd.options.display.max_columns = 20
377 >>> import numpy as np
378 >>> rng = np.random.RandomState(seed=5)
379 >>> ints = rng.randint(1, 11, size=(3, 2))
380 >>> a = ["A", "B", "C"]
381 >>> b = ["D", "A", "E"]
382 >>> df = pd.DataFrame(ints, columns=["weight", "cost"])
383 >>> df[0] = a
384 >>> df["b"] = b
385 >>> df[["weight", "cost", 0, "b"]]
386 weight cost 0 b
387 0 4 7 A D
388 1 7 1 B A
389 2 10 9 C E
390 >>> G = nx.from_pandas_edgelist(df, 0, "b", ["weight", "cost"])
391 >>> G["E"]["C"]["weight"]
392 10
393 >>> G["E"]["C"]["cost"]
394 9
395 >>> edges = pd.DataFrame(
396 ... {
397 ... "source": [0, 1, 2],
398 ... "target": [2, 2, 3],
399 ... "weight": [3, 4, 5],
400 ... "color": ["red", "blue", "blue"],
401 ... }
402 ... )
403 >>> G = nx.from_pandas_edgelist(edges, edge_attr=True)
404 >>> G[0][2]["color"]
405 'red'
406
407 Build multigraph with custom keys:
408
409 >>> edges = pd.DataFrame(
410 ... {
411 ... "source": [0, 1, 2, 0],
412 ... "target": [2, 2, 3, 2],
413 ... "my_edge_key": ["A", "B", "C", "D"],
414 ... "weight": [3, 4, 5, 6],
415 ... "color": ["red", "blue", "blue", "blue"],
416 ... }
417 ... )
418 >>> G = nx.from_pandas_edgelist(
419 ... edges,
420 ... edge_key="my_edge_key",
421 ... edge_attr=["weight", "color"],
422 ... create_using=nx.MultiGraph(),
423 ... )
424 >>> G[0][2]
425 AtlasView({'A': {'weight': 3, 'color': 'red'}, 'D': {'weight': 6, 'color': 'blue'}})
426
427
428 """
429 g = nx.empty_graph(0, create_using)
430
431 if edge_attr is None:
432 if g.is_multigraph() and edge_key is not None:
433 for u, v, k in zip(df[source], df[target], df[edge_key]):
434 g.add_edge(u, v, k)
435 else:
436 g.add_edges_from(zip(df[source], df[target]))
437 return g
438
439 reserved_columns = [source, target]
440 if g.is_multigraph() and edge_key is not None:
441 reserved_columns.append(edge_key)
442
443 # Additional columns requested
444 attr_col_headings = []
445 attribute_data = []
446 if edge_attr is True:
447 attr_col_headings = [c for c in df.columns if c not in reserved_columns]
448 elif isinstance(edge_attr, list | tuple):
449 attr_col_headings = edge_attr
450 else:
451 attr_col_headings = [edge_attr]
452 if len(attr_col_headings) == 0:
453 raise nx.NetworkXError(
454 f"Invalid edge_attr argument: No columns found with name: {attr_col_headings}"
455 )
456
457 try:
458 attribute_data = zip(*[df[col] for col in attr_col_headings])
459 except (KeyError, TypeError) as err:
460 msg = f"Invalid edge_attr argument: {edge_attr}"
461 raise nx.NetworkXError(msg) from err
462
463 if g.is_multigraph():
464 # => append the edge keys from the df to the bundled data
465 if edge_key is not None:
466 try:
467 multigraph_edge_keys = df[edge_key]
468 attribute_data = zip(attribute_data, multigraph_edge_keys)
469 except (KeyError, TypeError) as err:
470 msg = f"Invalid edge_key argument: {edge_key}"
471 raise nx.NetworkXError(msg) from err
472
473 for s, t, attrs in zip(df[source], df[target], attribute_data):
474 if edge_key is not None:
475 attrs, multigraph_edge_key = attrs
476 key = g.add_edge(s, t, key=multigraph_edge_key)
477 else:
478 key = g.add_edge(s, t)
479
480 g[s][t][key].update(zip(attr_col_headings, attrs))
481 else:
482 for s, t, attrs in zip(df[source], df[target], attribute_data):
483 g.add_edge(s, t)
484 g[s][t].update(zip(attr_col_headings, attrs))
485
486 return g
487
488
489@nx._dispatchable(edge_attrs="weight")
490def to_scipy_sparse_array(G, nodelist=None, dtype=None, weight="weight", format="csr"):
491 """Returns the graph adjacency matrix as a SciPy sparse array.
492
493 Parameters
494 ----------
495 G : graph
496 The NetworkX graph used to construct the sparse array.
497
498 nodelist : list, optional
499 The rows and columns are ordered according to the nodes in `nodelist`.
500 If `nodelist` is None, then the ordering is produced by ``G.nodes()``.
501
502 dtype : NumPy data-type, optional
503 A valid NumPy dtype used to initialize the array. If None, then the
504 NumPy default is used.
505
506 weight : string or None, optional (default='weight')
507 The edge attribute that holds the numerical value used for
508 the edge weight. If None then all edge weights are 1.
509
510 format : str in {'bsr', 'csr', 'csc', 'coo', 'lil', 'dia', 'dok'}
511 The format of the sparse array to be returned (default 'csr'). For
512 some algorithms different implementations of sparse arrays
513 can perform better. See [1]_ for details.
514
515 Returns
516 -------
517 A : SciPy sparse array
518 Graph adjacency matrix.
519
520 Notes
521 -----
522 For directed graphs, matrix entry ``i, j`` corresponds to an edge from
523 ``i`` to ``j``.
524
525 The values of the adjacency matrix are populated using the edge attribute held in
526 parameter `weight`. When an edge does not have that attribute, the
527 value of the entry is 1.
528
529 For multiple edges the matrix values are the sums of the edge weights.
530
531 When `nodelist` does not contain every node in `G`, the adjacency matrix
532 is built from the subgraph of `G` that is induced by the nodes in
533 `nodelist`.
534
535 The convention used for self-loop edges in graphs is to assign the
536 diagonal matrix entry value to the weight attribute of the edge
537 (or the number 1 if the edge has no weight attribute). If the
538 alternate convention of doubling the edge weight is desired the
539 resulting array can be modified as follows::
540
541 >>> G = nx.Graph([(1, 1)])
542 >>> A = nx.to_scipy_sparse_array(G)
543 >>> A.toarray()
544 array([[1]])
545 >>> A.setdiag(A.diagonal() * 2)
546 >>> A.toarray()
547 array([[2]])
548
549 Examples
550 --------
551
552 Basic usage:
553
554 >>> G = nx.path_graph(4)
555 >>> A = nx.to_scipy_sparse_array(G)
556 >>> A # doctest: +SKIP
557 <Compressed Sparse Row sparse array of dtype 'int64'
558 with 6 stored elements and shape (4, 4)>
559
560 >>> A.toarray()
561 array([[0, 1, 0, 0],
562 [1, 0, 1, 0],
563 [0, 1, 0, 1],
564 [0, 0, 1, 0]])
565
566 .. note:: The `toarray` method is used in these examples to better visualize
567 the adjacency matrix. For a dense representation of the adjaceny matrix,
568 use `to_numpy_array` instead.
569
570 Directed graphs:
571
572 >>> G = nx.DiGraph([(0, 1), (1, 2), (2, 3)])
573 >>> nx.to_scipy_sparse_array(G).toarray()
574 array([[0, 1, 0, 0],
575 [0, 0, 1, 0],
576 [0, 0, 0, 1],
577 [0, 0, 0, 0]])
578
579 >>> H = G.reverse()
580 >>> H.edges
581 OutEdgeView([(1, 0), (2, 1), (3, 2)])
582 >>> nx.to_scipy_sparse_array(H).toarray()
583 array([[0, 0, 0, 0],
584 [1, 0, 0, 0],
585 [0, 1, 0, 0],
586 [0, 0, 1, 0]])
587
588 By default, the order of the rows/columns of the adjacency matrix is determined
589 by the ordering of the nodes in `G`:
590
591 >>> G = nx.Graph()
592 >>> G.add_nodes_from([3, 5, 0, 1])
593 >>> G.add_edges_from([(1, 3), (1, 5)])
594 >>> nx.to_scipy_sparse_array(G).toarray()
595 array([[0, 0, 0, 1],
596 [0, 0, 0, 1],
597 [0, 0, 0, 0],
598 [1, 1, 0, 0]])
599
600 The ordering of the rows can be changed with `nodelist`:
601
602 >>> ordered = [0, 1, 3, 5]
603 >>> nx.to_scipy_sparse_array(G, nodelist=ordered).toarray()
604 array([[0, 0, 0, 0],
605 [0, 0, 1, 1],
606 [0, 1, 0, 0],
607 [0, 1, 0, 0]])
608
609 If `nodelist` contains a subset of the nodes in `G`, the adjacency matrix
610 for the node-induced subgraph is produced:
611
612 >>> nx.to_scipy_sparse_array(G, nodelist=[1, 3, 5]).toarray()
613 array([[0, 1, 1],
614 [1, 0, 0],
615 [1, 0, 0]])
616
617 The values of the adjacency matrix are drawn from the edge attribute
618 specified by the `weight` parameter:
619
620 >>> G = nx.path_graph(4)
621 >>> nx.set_edge_attributes(
622 ... G, values={(0, 1): 1, (1, 2): 10, (2, 3): 2}, name="weight"
623 ... )
624 >>> nx.set_edge_attributes(
625 ... G, values={(0, 1): 50, (1, 2): 35, (2, 3): 10}, name="capacity"
626 ... )
627 >>> nx.to_scipy_sparse_array(G).toarray() # Default weight="weight"
628 array([[ 0, 1, 0, 0],
629 [ 1, 0, 10, 0],
630 [ 0, 10, 0, 2],
631 [ 0, 0, 2, 0]])
632 >>> nx.to_scipy_sparse_array(G, weight="capacity").toarray()
633 array([[ 0, 50, 0, 0],
634 [50, 0, 35, 0],
635 [ 0, 35, 0, 10],
636 [ 0, 0, 10, 0]])
637
638 Any edges that don't have a `weight` attribute default to 1:
639
640 >>> G[1][2].pop("capacity")
641 35
642 >>> nx.to_scipy_sparse_array(G, weight="capacity").toarray()
643 array([[ 0, 50, 0, 0],
644 [50, 0, 1, 0],
645 [ 0, 1, 0, 10],
646 [ 0, 0, 10, 0]])
647
648 When `G` is a multigraph, the values in the adjacency matrix are given by
649 the sum of the `weight` edge attribute over each edge key:
650
651 >>> G = nx.MultiDiGraph([(0, 1), (0, 1), (0, 1), (2, 0)])
652 >>> nx.to_scipy_sparse_array(G).toarray()
653 array([[0, 3, 0],
654 [0, 0, 0],
655 [1, 0, 0]])
656
657 References
658 ----------
659 .. [1] Scipy Dev. References, "Sparse Arrays",
660 https://docs.scipy.org/doc/scipy/reference/sparse.html
661 """
662 import scipy as sp
663
664 if len(G) == 0:
665 raise nx.NetworkXError("Graph has no nodes or edges")
666
667 if nodelist is None:
668 nodelist = list(G)
669 nlen = len(G)
670 else:
671 nlen = len(nodelist)
672 if nlen == 0:
673 raise nx.NetworkXError("nodelist has no nodes")
674 nodeset = set(G.nbunch_iter(nodelist))
675 if nlen != len(nodeset):
676 for n in nodelist:
677 if n not in G:
678 raise nx.NetworkXError(f"Node {n} in nodelist is not in G")
679 raise nx.NetworkXError("nodelist contains duplicates.")
680 if nlen < len(G):
681 G = G.subgraph(nodelist)
682
683 index = dict(zip(nodelist, range(nlen)))
684 coefficients = zip(
685 *((index[u], index[v], wt) for u, v, wt in G.edges(data=weight, default=1))
686 )
687 try:
688 row, col, data = coefficients
689 except ValueError:
690 # there is no edge in the subgraph
691 row, col, data = [], [], []
692
693 if G.is_directed():
694 A = sp.sparse.coo_array((data, (row, col)), shape=(nlen, nlen), dtype=dtype)
695 else:
696 # symmetrize matrix
697 d = data + data
698 r = row + col
699 c = col + row
700 # selfloop entries get double counted when symmetrizing
701 # so we subtract the data on the diagonal
702 selfloops = list(nx.selfloop_edges(G, data=weight, default=1))
703 if selfloops:
704 diag_index, diag_data = zip(*((index[u], -wt) for u, v, wt in selfloops))
705 d += diag_data
706 r += diag_index
707 c += diag_index
708 A = sp.sparse.coo_array((d, (r, c)), shape=(nlen, nlen), dtype=dtype)
709 try:
710 return A.asformat(format)
711 except ValueError as err:
712 raise nx.NetworkXError(f"Unknown sparse matrix format: {format}") from err
713
714
715def _csr_gen_triples(A):
716 """Converts a SciPy sparse array in **Compressed Sparse Row** format to
717 an iterable of weighted edge triples.
718
719 """
720 nrows = A.shape[0]
721 indptr, dst_indices, data = A.indptr, A.indices, A.data
722 import numpy as np
723
724 src_indices = np.repeat(np.arange(nrows), np.diff(indptr))
725 return zip(src_indices.tolist(), dst_indices.tolist(), A.data.tolist())
726
727
728def _csc_gen_triples(A):
729 """Converts a SciPy sparse array in **Compressed Sparse Column** format to
730 an iterable of weighted edge triples.
731
732 """
733 ncols = A.shape[1]
734 indptr, src_indices, data = A.indptr, A.indices, A.data
735 import numpy as np
736
737 dst_indices = np.repeat(np.arange(ncols), np.diff(indptr))
738 return zip(src_indices.tolist(), dst_indices.tolist(), A.data.tolist())
739
740
741def _coo_gen_triples(A):
742 """Converts a SciPy sparse array in **Coordinate** format to an iterable
743 of weighted edge triples.
744
745 """
746 return zip(A.row.tolist(), A.col.tolist(), A.data.tolist())
747
748
749def _dok_gen_triples(A):
750 """Converts a SciPy sparse array in **Dictionary of Keys** format to an
751 iterable of weighted edge triples.
752
753 """
754 for (r, c), v in A.items():
755 # Use `v.item()` to convert a NumPy scalar to the appropriate Python scalar
756 yield int(r), int(c), v.item()
757
758
759def _generate_weighted_edges(A):
760 """Returns an iterable over (u, v, w) triples, where u and v are adjacent
761 vertices and w is the weight of the edge joining u and v.
762
763 `A` is a SciPy sparse array (in any format).
764
765 """
766 if A.format == "csr":
767 return _csr_gen_triples(A)
768 if A.format == "csc":
769 return _csc_gen_triples(A)
770 if A.format == "dok":
771 return _dok_gen_triples(A)
772 # If A is in any other format (including COO), convert it to COO format.
773 return _coo_gen_triples(A.tocoo())
774
775
776@nx._dispatchable(graphs=None, returns_graph=True)
777def from_scipy_sparse_array(
778 A, parallel_edges=False, create_using=None, edge_attribute="weight"
779):
780 """Creates a new graph from an adjacency matrix given as a SciPy sparse
781 array.
782
783 Parameters
784 ----------
785 A: scipy.sparse array
786 An adjacency matrix representation of a graph
787
788 parallel_edges : Boolean
789 If this is True, `create_using` is a multigraph, and `A` is an
790 integer matrix, then entry *(i, j)* in the matrix is interpreted as the
791 number of parallel edges joining vertices *i* and *j* in the graph.
792 If it is False, then the entries in the matrix are interpreted as
793 the weight of a single edge joining the vertices.
794
795 create_using : NetworkX graph constructor, optional (default=nx.Graph)
796 Graph type to create. If graph instance, then cleared before populated.
797
798 edge_attribute: string
799 Name of edge attribute to store matrix numeric value. The data will
800 have the same type as the matrix entry (int, float, (real,imag)).
801
802 Notes
803 -----
804 For directed graphs, explicitly mention create_using=nx.DiGraph,
805 and entry i,j of A corresponds to an edge from i to j.
806
807 If `create_using` is :class:`networkx.MultiGraph` or
808 :class:`networkx.MultiDiGraph`, `parallel_edges` is True, and the
809 entries of `A` are of type :class:`int`, then this function returns a
810 multigraph (constructed from `create_using`) with parallel edges.
811 In this case, `edge_attribute` will be ignored.
812
813 If `create_using` indicates an undirected multigraph, then only the edges
814 indicated by the upper triangle of the matrix `A` will be added to the
815 graph.
816
817 Examples
818 --------
819 >>> import scipy as sp
820 >>> A = sp.sparse.eye(2, 2, 1)
821 >>> G = nx.from_scipy_sparse_array(A)
822
823 If `create_using` indicates a multigraph and the matrix has only integer
824 entries and `parallel_edges` is False, then the entries will be treated
825 as weights for edges joining the nodes (without creating parallel edges):
826
827 >>> A = sp.sparse.csr_array([[1, 1], [1, 2]])
828 >>> G = nx.from_scipy_sparse_array(A, create_using=nx.MultiGraph)
829 >>> G[1][1]
830 AtlasView({0: {'weight': 2}})
831
832 If `create_using` indicates a multigraph and the matrix has only integer
833 entries and `parallel_edges` is True, then the entries will be treated
834 as the number of parallel edges joining those two vertices:
835
836 >>> A = sp.sparse.csr_array([[1, 1], [1, 2]])
837 >>> G = nx.from_scipy_sparse_array(
838 ... A, parallel_edges=True, create_using=nx.MultiGraph
839 ... )
840 >>> G[1][1]
841 AtlasView({0: {'weight': 1}, 1: {'weight': 1}})
842
843 """
844 G = nx.empty_graph(0, create_using)
845 n, m = A.shape
846 if n != m:
847 raise nx.NetworkXError(f"Adjacency matrix not square: nx,ny={A.shape}")
848 # Make sure we get even the isolated nodes of the graph.
849 G.add_nodes_from(range(n))
850 # Create an iterable over (u, v, w) triples and for each triple, add an
851 # edge from u to v with weight w.
852 triples = _generate_weighted_edges(A)
853 # If the entries in the adjacency matrix are integers, the graph is a
854 # multigraph, and parallel_edges is True, then create parallel edges, each
855 # with weight 1, for each entry in the adjacency matrix. Otherwise, create
856 # one edge for each positive entry in the adjacency matrix and set the
857 # weight of that edge to be the entry in the matrix.
858 if A.dtype.kind in ("i", "u") and G.is_multigraph() and parallel_edges:
859 chain = itertools.chain.from_iterable
860 # The following line is equivalent to:
861 #
862 # for (u, v) in edges:
863 # for d in range(A[u, v]):
864 # G.add_edge(u, v, weight=1)
865 #
866 triples = chain(((u, v, 1) for d in range(w)) for (u, v, w) in triples)
867 # If we are creating an undirected multigraph, only add the edges from the
868 # upper triangle of the matrix. Otherwise, add all the edges. This relies
869 # on the fact that the vertices created in the
870 # `_generated_weighted_edges()` function are actually the row/column
871 # indices for the matrix `A`.
872 #
873 # Without this check, we run into a problem where each edge is added twice
874 # when `G.add_weighted_edges_from()` is invoked below.
875 if G.is_multigraph() and not G.is_directed():
876 triples = ((u, v, d) for u, v, d in triples if u <= v)
877 G.add_weighted_edges_from(triples, weight=edge_attribute)
878 return G
879
880
881@nx._dispatchable(edge_attrs="weight") # edge attrs may also be obtained from `dtype`
882def to_numpy_array(
883 G,
884 nodelist=None,
885 dtype=None,
886 order=None,
887 multigraph_weight=sum,
888 weight="weight",
889 nonedge=0.0,
890):
891 """Returns the graph adjacency matrix as a NumPy array.
892
893 Parameters
894 ----------
895 G : graph
896 The NetworkX graph used to construct the NumPy array.
897
898 nodelist : list, optional
899 The rows and columns are ordered according to the nodes in `nodelist`.
900 If `nodelist` is ``None``, then the ordering is produced by ``G.nodes()``.
901
902 dtype : NumPy data type, optional
903 A NumPy data type used to initialize the array. If None, then the NumPy
904 default is used. The dtype can be structured if `weight=None`, in which
905 case the dtype field names are used to look up edge attributes. The
906 result is a structured array where each named field in the dtype
907 corresponds to the adjacency for that edge attribute. See examples for
908 details.
909
910 order : {'C', 'F'}, optional
911 Whether to store multidimensional data in C- or Fortran-contiguous
912 (row- or column-wise) order in memory. If None, then the NumPy default
913 is used.
914
915 multigraph_weight : callable, optional
916 An function that determines how weights in multigraphs are handled.
917 The function should accept a sequence of weights and return a single
918 value. The default is to sum the weights of the multiple edges.
919
920 weight : string or None optional (default = 'weight')
921 The edge attribute that holds the numerical value used for
922 the edge weight. If an edge does not have that attribute, then the
923 value 1 is used instead. `weight` must be ``None`` if a structured
924 dtype is used.
925
926 nonedge : array_like (default = 0.0)
927 The value used to represent non-edges in the adjacency matrix.
928 The array values corresponding to nonedges are typically set to zero.
929 However, this could be undesirable if there are array values
930 corresponding to actual edges that also have the value zero. If so,
931 one might prefer nonedges to have some other value, such as ``nan``.
932
933 Returns
934 -------
935 A : NumPy ndarray
936 Graph adjacency matrix
937
938 Raises
939 ------
940 NetworkXError
941 If `dtype` is a structured dtype and `G` is a multigraph
942 ValueError
943 If `dtype` is a structured dtype and `weight` is not `None`
944
945 See Also
946 --------
947 from_numpy_array
948
949 Notes
950 -----
951 For directed graphs, entry ``i, j`` corresponds to an edge from ``i`` to ``j``.
952
953 Entries in the adjacency matrix are given by the `weight` edge attribute.
954 When an edge does not have a weight attribute, the value of the entry is
955 set to the number 1. For multiple (parallel) edges, the values of the
956 entries are determined by the `multigraph_weight` parameter. The default is
957 to sum the weight attributes for each of the parallel edges.
958
959 When `nodelist` does not contain every node in `G`, the adjacency matrix is
960 built from the subgraph of `G` that is induced by the nodes in `nodelist`.
961
962 The convention used for self-loop edges in graphs is to assign the
963 diagonal array entry value to the weight attribute of the edge
964 (or the number 1 if the edge has no weight attribute). If the
965 alternate convention of doubling the edge weight is desired the
966 resulting NumPy array can be modified as follows:
967
968 >>> import numpy as np
969 >>> G = nx.Graph([(1, 1)])
970 >>> A = nx.to_numpy_array(G)
971 >>> A
972 array([[1.]])
973 >>> A[np.diag_indices_from(A)] *= 2
974 >>> A
975 array([[2.]])
976
977 Examples
978 --------
979 >>> G = nx.MultiDiGraph()
980 >>> G.add_edge(0, 1, weight=2)
981 0
982 >>> G.add_edge(1, 0)
983 0
984 >>> G.add_edge(2, 2, weight=3)
985 0
986 >>> G.add_edge(2, 2)
987 1
988 >>> nx.to_numpy_array(G, nodelist=[0, 1, 2])
989 array([[0., 2., 0.],
990 [1., 0., 0.],
991 [0., 0., 4.]])
992
993 When `nodelist` argument is used, nodes of `G` which do not appear in the `nodelist`
994 and their edges are not included in the adjacency matrix. Here is an example:
995
996 >>> G = nx.Graph()
997 >>> G.add_edge(3, 1)
998 >>> G.add_edge(2, 0)
999 >>> G.add_edge(2, 1)
1000 >>> G.add_edge(3, 0)
1001 >>> nx.to_numpy_array(G, nodelist=[1, 2, 3])
1002 array([[0., 1., 1.],
1003 [1., 0., 0.],
1004 [1., 0., 0.]])
1005
1006 This function can also be used to create adjacency matrices for multiple
1007 edge attributes with structured dtypes:
1008
1009 >>> G = nx.Graph()
1010 >>> G.add_edge(0, 1, weight=10)
1011 >>> G.add_edge(1, 2, cost=5)
1012 >>> G.add_edge(2, 3, weight=3, cost=-4.0)
1013 >>> dtype = np.dtype([("weight", int), ("cost", float)])
1014 >>> A = nx.to_numpy_array(G, dtype=dtype, weight=None)
1015 >>> A["weight"]
1016 array([[ 0, 10, 0, 0],
1017 [10, 0, 1, 0],
1018 [ 0, 1, 0, 3],
1019 [ 0, 0, 3, 0]])
1020 >>> A["cost"]
1021 array([[ 0., 1., 0., 0.],
1022 [ 1., 0., 5., 0.],
1023 [ 0., 5., 0., -4.],
1024 [ 0., 0., -4., 0.]])
1025
1026 As stated above, the argument "nonedge" is useful especially when there are
1027 actually edges with weight 0 in the graph. Setting a nonedge value different than 0,
1028 makes it much clearer to differentiate such 0-weighted edges and actual nonedge values.
1029
1030 >>> G = nx.Graph()
1031 >>> G.add_edge(3, 1, weight=2)
1032 >>> G.add_edge(2, 0, weight=0)
1033 >>> G.add_edge(2, 1, weight=0)
1034 >>> G.add_edge(3, 0, weight=1)
1035 >>> nx.to_numpy_array(G, nonedge=-1.0)
1036 array([[-1., 2., -1., 1.],
1037 [ 2., -1., 0., -1.],
1038 [-1., 0., -1., 0.],
1039 [ 1., -1., 0., -1.]])
1040 """
1041 import numpy as np
1042
1043 if nodelist is None:
1044 nodelist = list(G)
1045 nlen = len(nodelist)
1046
1047 # Input validation
1048 nodeset = set(nodelist)
1049 if nodeset - set(G):
1050 raise nx.NetworkXError(f"Nodes {nodeset - set(G)} in nodelist is not in G")
1051 if len(nodeset) < nlen:
1052 raise nx.NetworkXError("nodelist contains duplicates.")
1053
1054 A = np.full((nlen, nlen), fill_value=nonedge, dtype=dtype, order=order)
1055
1056 # Corner cases: empty nodelist or graph without any edges
1057 if nlen == 0 or G.number_of_edges() == 0:
1058 return A
1059
1060 # If dtype is structured and weight is None, use dtype field names as
1061 # edge attributes
1062 edge_attrs = None # Only single edge attribute by default
1063 if A.dtype.names:
1064 if weight is None:
1065 edge_attrs = dtype.names
1066 else:
1067 raise ValueError(
1068 "Specifying `weight` not supported for structured dtypes\n."
1069 "To create adjacency matrices from structured dtypes, use `weight=None`."
1070 )
1071
1072 # Map nodes to row/col in matrix
1073 idx = dict(zip(nodelist, range(nlen)))
1074 if len(nodelist) < len(G):
1075 G = G.subgraph(nodelist).copy()
1076
1077 # Collect all edge weights and reduce with `multigraph_weights`
1078 if G.is_multigraph():
1079 if edge_attrs:
1080 raise nx.NetworkXError(
1081 "Structured arrays are not supported for MultiGraphs"
1082 )
1083 d = defaultdict(list)
1084 for u, v, wt in G.edges(data=weight, default=1.0):
1085 d[(idx[u], idx[v])].append(wt)
1086 i, j = np.array(list(d.keys())).T # indices
1087 wts = [multigraph_weight(ws) for ws in d.values()] # reduced weights
1088 else:
1089 i, j, wts = [], [], []
1090
1091 # Special branch: multi-attr adjacency from structured dtypes
1092 if edge_attrs:
1093 # Extract edges with all data
1094 for u, v, data in G.edges(data=True):
1095 i.append(idx[u])
1096 j.append(idx[v])
1097 wts.append(data)
1098 # Map each attribute to the appropriate named field in the
1099 # structured dtype
1100 for attr in edge_attrs:
1101 attr_data = [wt.get(attr, 1.0) for wt in wts]
1102 A[attr][i, j] = attr_data
1103 if not G.is_directed():
1104 A[attr][j, i] = attr_data
1105 return A
1106
1107 for u, v, wt in G.edges(data=weight, default=1.0):
1108 i.append(idx[u])
1109 j.append(idx[v])
1110 wts.append(wt)
1111
1112 # Set array values with advanced indexing
1113 A[i, j] = wts
1114 if not G.is_directed():
1115 A[j, i] = wts
1116
1117 return A
1118
1119
1120@nx._dispatchable(graphs=None, returns_graph=True)
1121def from_numpy_array(
1122 A, parallel_edges=False, create_using=None, edge_attr="weight", *, nodelist=None
1123):
1124 """Returns a graph from a 2D NumPy array.
1125
1126 The 2D NumPy array is interpreted as an adjacency matrix for the graph.
1127
1128 Parameters
1129 ----------
1130 A : a 2D numpy.ndarray
1131 An adjacency matrix representation of a graph
1132
1133 parallel_edges : Boolean
1134 If this is True, `create_using` is a multigraph, and `A` is an
1135 integer array, then entry *(i, j)* in the array is interpreted as the
1136 number of parallel edges joining vertices *i* and *j* in the graph.
1137 If it is False, then the entries in the array are interpreted as
1138 the weight of a single edge joining the vertices.
1139
1140 create_using : NetworkX graph constructor, optional (default=nx.Graph)
1141 Graph type to create. If graph instance, then cleared before populated.
1142
1143 edge_attr : String, optional (default="weight")
1144 The attribute to which the array values are assigned on each edge. If
1145 it is None, edge attributes will not be assigned.
1146
1147 nodelist : sequence of nodes, optional
1148 A sequence of objects to use as the nodes in the graph. If provided, the
1149 list of nodes must be the same length as the dimensions of `A`. The
1150 default is `None`, in which case the nodes are drawn from ``range(n)``.
1151
1152 Notes
1153 -----
1154 For directed graphs, explicitly mention create_using=nx.DiGraph,
1155 and entry i,j of A corresponds to an edge from i to j.
1156
1157 If `create_using` is :class:`networkx.MultiGraph` or
1158 :class:`networkx.MultiDiGraph`, `parallel_edges` is True, and the
1159 entries of `A` are of type :class:`int`, then this function returns a
1160 multigraph (of the same type as `create_using`) with parallel edges.
1161
1162 If `create_using` indicates an undirected multigraph, then only the edges
1163 indicated by the upper triangle of the array `A` will be added to the
1164 graph.
1165
1166 If `edge_attr` is Falsy (False or None), edge attributes will not be
1167 assigned, and the array data will be treated like a binary mask of
1168 edge presence or absence. Otherwise, the attributes will be assigned
1169 as follows:
1170
1171 If the NumPy array has a single data type for each array entry it
1172 will be converted to an appropriate Python data type.
1173
1174 If the NumPy array has a user-specified compound data type the names
1175 of the data fields will be used as attribute keys in the resulting
1176 NetworkX graph.
1177
1178 See Also
1179 --------
1180 to_numpy_array
1181
1182 Examples
1183 --------
1184 Simple integer weights on edges:
1185
1186 >>> import numpy as np
1187 >>> A = np.array([[1, 1], [2, 1]])
1188 >>> G = nx.from_numpy_array(A)
1189 >>> G.edges(data=True)
1190 EdgeDataView([(0, 0, {'weight': 1}), (0, 1, {'weight': 2}), (1, 1, {'weight': 1})])
1191
1192 If `create_using` indicates a multigraph and the array has only integer
1193 entries and `parallel_edges` is False, then the entries will be treated
1194 as weights for edges joining the nodes (without creating parallel edges):
1195
1196 >>> A = np.array([[1, 1], [1, 2]])
1197 >>> G = nx.from_numpy_array(A, create_using=nx.MultiGraph)
1198 >>> G[1][1]
1199 AtlasView({0: {'weight': 2}})
1200
1201 If `create_using` indicates a multigraph and the array has only integer
1202 entries and `parallel_edges` is True, then the entries will be treated
1203 as the number of parallel edges joining those two vertices:
1204
1205 >>> A = np.array([[1, 1], [1, 2]])
1206 >>> temp = nx.MultiGraph()
1207 >>> G = nx.from_numpy_array(A, parallel_edges=True, create_using=temp)
1208 >>> G[1][1]
1209 AtlasView({0: {'weight': 1}, 1: {'weight': 1}})
1210
1211 User defined compound data type on edges:
1212
1213 >>> dt = [("weight", float), ("cost", int)]
1214 >>> A = np.array([[(1.0, 2)]], dtype=dt)
1215 >>> G = nx.from_numpy_array(A)
1216 >>> G.edges()
1217 EdgeView([(0, 0)])
1218 >>> G[0][0]["cost"]
1219 2
1220 >>> G[0][0]["weight"]
1221 1.0
1222
1223 """
1224 kind_to_python_type = {
1225 "f": float,
1226 "i": int,
1227 "u": int,
1228 "b": bool,
1229 "c": complex,
1230 "S": str,
1231 "U": str,
1232 "V": "void",
1233 }
1234 G = nx.empty_graph(0, create_using)
1235 if A.ndim != 2:
1236 raise nx.NetworkXError(f"Input array must be 2D, not {A.ndim}")
1237 n, m = A.shape
1238 if n != m:
1239 raise nx.NetworkXError(f"Adjacency matrix not square: nx,ny={A.shape}")
1240 dt = A.dtype
1241 try:
1242 python_type = kind_to_python_type[dt.kind]
1243 except Exception as err:
1244 raise TypeError(f"Unknown numpy data type: {dt}") from err
1245 if _default_nodes := (nodelist is None):
1246 nodelist = range(n)
1247 else:
1248 if len(nodelist) != n:
1249 raise ValueError("nodelist must have the same length as A.shape[0]")
1250
1251 # Make sure we get even the isolated nodes of the graph.
1252 G.add_nodes_from(nodelist)
1253 # Get a list of all the entries in the array with nonzero entries. These
1254 # coordinates become edges in the graph. (convert to int from np.int64)
1255 edges = ((int(e[0]), int(e[1])) for e in zip(*A.nonzero()))
1256 # handle numpy constructed data type
1257 if python_type == "void":
1258 # Sort the fields by their offset, then by dtype, then by name.
1259 fields = sorted(
1260 (offset, dtype, name) for name, (dtype, offset) in A.dtype.fields.items()
1261 )
1262 triples = (
1263 (
1264 u,
1265 v,
1266 {}
1267 if edge_attr in [False, None]
1268 else {
1269 name: kind_to_python_type[dtype.kind](val)
1270 for (_, dtype, name), val in zip(fields, A[u, v])
1271 },
1272 )
1273 for u, v in edges
1274 )
1275 # If the entries in the adjacency matrix are integers, the graph is a
1276 # multigraph, and parallel_edges is True, then create parallel edges, each
1277 # with weight 1, for each entry in the adjacency matrix. Otherwise, create
1278 # one edge for each positive entry in the adjacency matrix and set the
1279 # weight of that edge to be the entry in the matrix.
1280 elif python_type is int and G.is_multigraph() and parallel_edges:
1281 chain = itertools.chain.from_iterable
1282 # The following line is equivalent to:
1283 #
1284 # for (u, v) in edges:
1285 # for d in range(A[u, v]):
1286 # G.add_edge(u, v, weight=1)
1287 #
1288 if edge_attr in [False, None]:
1289 triples = chain(((u, v, {}) for d in range(A[u, v])) for (u, v) in edges)
1290 else:
1291 triples = chain(
1292 ((u, v, {edge_attr: 1}) for d in range(A[u, v])) for (u, v) in edges
1293 )
1294 else: # basic data type
1295 if edge_attr in [False, None]:
1296 triples = ((u, v, {}) for u, v in edges)
1297 else:
1298 triples = ((u, v, {edge_attr: python_type(A[u, v])}) for u, v in edges)
1299 # If we are creating an undirected multigraph, only add the edges from the
1300 # upper triangle of the matrix. Otherwise, add all the edges. This relies
1301 # on the fact that the vertices created in the
1302 # `_generated_weighted_edges()` function are actually the row/column
1303 # indices for the matrix `A`.
1304 #
1305 # Without this check, we run into a problem where each edge is added twice
1306 # when `G.add_edges_from()` is invoked below.
1307 if G.is_multigraph() and not G.is_directed():
1308 triples = ((u, v, d) for u, v, d in triples if u <= v)
1309 # Remap nodes if user provided custom `nodelist`
1310 if not _default_nodes:
1311 idx_to_node = dict(enumerate(nodelist))
1312 triples = ((idx_to_node[u], idx_to_node[v], d) for u, v, d in triples)
1313 G.add_edges_from(triples)
1314 return G