Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/networkx/convert_matrix.py: 10%

248 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-10-20 07:00 +0000

1"""Functions to convert NetworkX graphs to and from common data containers 

2like numpy arrays, scipy sparse arrays, and pandas DataFrames. 

3 

4The preferred way of converting data to a NetworkX graph is through the 

5graph constructor. The constructor calls the `~networkx.convert.to_networkx_graph` 

6function which attempts to guess the input type and convert it automatically. 

7 

8Examples 

9-------- 

10Create a 10 node random graph from a numpy array 

11 

12>>> import numpy as np 

13>>> rng = np.random.default_rng() 

14>>> a = rng.integers(low=0, high=2, size=(10, 10)) 

15>>> DG = nx.from_numpy_array(a, create_using=nx.DiGraph) 

16 

17or equivalently: 

18 

19>>> DG = nx.DiGraph(a) 

20 

21which calls `from_numpy_array` internally based on the type of ``a``. 

22 

23See Also 

24-------- 

25nx_agraph, nx_pydot 

26""" 

27 

28import itertools 

29from collections import defaultdict 

30 

31import networkx as nx 

32from networkx.utils import not_implemented_for 

33 

34__all__ = [ 

35 "from_pandas_adjacency", 

36 "to_pandas_adjacency", 

37 "from_pandas_edgelist", 

38 "to_pandas_edgelist", 

39 "from_scipy_sparse_array", 

40 "to_scipy_sparse_array", 

41 "from_numpy_array", 

42 "to_numpy_array", 

43] 

44 

45 

46@nx._dispatch(edge_attrs="weight") 

47def to_pandas_adjacency( 

48 G, 

49 nodelist=None, 

50 dtype=None, 

51 order=None, 

52 multigraph_weight=sum, 

53 weight="weight", 

54 nonedge=0.0, 

55): 

56 """Returns the graph adjacency matrix as a Pandas DataFrame. 

57 

58 Parameters 

59 ---------- 

60 G : graph 

61 The NetworkX graph used to construct the Pandas DataFrame. 

62 

63 nodelist : list, optional 

64 The rows and columns are ordered according to the nodes in `nodelist`. 

65 If `nodelist` is None, then the ordering is produced by G.nodes(). 

66 

67 multigraph_weight : {sum, min, max}, optional 

68 An operator that determines how weights in multigraphs are handled. 

69 The default is to sum the weights of the multiple edges. 

70 

71 weight : string or None, optional 

72 The edge attribute that holds the numerical value used for 

73 the edge weight. If an edge does not have that attribute, then the 

74 value 1 is used instead. 

75 

76 nonedge : float, optional 

77 The matrix values corresponding to nonedges are typically set to zero. 

78 However, this could be undesirable if there are matrix values 

79 corresponding to actual edges that also have the value zero. If so, 

80 one might prefer nonedges to have some other value, such as nan. 

81 

82 Returns 

83 ------- 

84 df : Pandas DataFrame 

85 Graph adjacency matrix 

86 

87 Notes 

88 ----- 

89 For directed graphs, entry i,j corresponds to an edge from i to j. 

90 

91 The DataFrame entries are assigned to the weight edge attribute. When 

92 an edge does not have a weight attribute, the value of the entry is set to 

93 the number 1. For multiple (parallel) edges, the values of the entries 

94 are determined by the 'multigraph_weight' parameter. The default is to 

95 sum the weight attributes for each of the parallel edges. 

96 

97 When `nodelist` does not contain every node in `G`, the matrix is built 

98 from the subgraph of `G` that is induced by the nodes in `nodelist`. 

99 

100 The convention used for self-loop edges in graphs is to assign the 

101 diagonal matrix entry value to the weight attribute of the edge 

102 (or the number 1 if the edge has no weight attribute). If the 

103 alternate convention of doubling the edge weight is desired the 

104 resulting Pandas DataFrame can be modified as follows: 

105 

106 >>> import pandas as pd 

107 >>> pd.options.display.max_columns = 20 

108 >>> import numpy as np 

109 >>> G = nx.Graph([(1, 1)]) 

110 >>> df = nx.to_pandas_adjacency(G, dtype=int) 

111 >>> df 

112 1 

113 1 1 

114 >>> df.values[np.diag_indices_from(df)] *= 2 

115 >>> df 

116 1 

117 1 2 

118 

119 Examples 

120 -------- 

121 >>> G = nx.MultiDiGraph() 

122 >>> G.add_edge(0, 1, weight=2) 

123 0 

124 >>> G.add_edge(1, 0) 

125 0 

126 >>> G.add_edge(2, 2, weight=3) 

127 0 

128 >>> G.add_edge(2, 2) 

129 1 

130 >>> nx.to_pandas_adjacency(G, nodelist=[0, 1, 2], dtype=int) 

131 0 1 2 

132 0 0 2 0 

133 1 1 0 0 

134 2 0 0 4 

135 

136 """ 

137 import pandas as pd 

138 

139 M = to_numpy_array( 

140 G, 

141 nodelist=nodelist, 

142 dtype=dtype, 

143 order=order, 

144 multigraph_weight=multigraph_weight, 

145 weight=weight, 

146 nonedge=nonedge, 

147 ) 

148 if nodelist is None: 

149 nodelist = list(G) 

150 return pd.DataFrame(data=M, index=nodelist, columns=nodelist) 

151 

152 

153@nx._dispatch(graphs=None) 

154def from_pandas_adjacency(df, create_using=None): 

155 r"""Returns a graph from Pandas DataFrame. 

156 

157 The Pandas DataFrame is interpreted as an adjacency matrix for the graph. 

158 

159 Parameters 

160 ---------- 

161 df : Pandas DataFrame 

162 An adjacency matrix representation of a graph 

163 

164 create_using : NetworkX graph constructor, optional (default=nx.Graph) 

165 Graph type to create. If graph instance, then cleared before populated. 

166 

167 Notes 

168 ----- 

169 For directed graphs, explicitly mention create_using=nx.DiGraph, 

170 and entry i,j of df corresponds to an edge from i to j. 

171 

172 If `df` has a single data type for each entry it will be converted to an 

173 appropriate Python data type. 

174 

175 If `df` has a user-specified compound data type the names 

176 of the data fields will be used as attribute keys in the resulting 

177 NetworkX graph. 

178 

179 See Also 

180 -------- 

181 to_pandas_adjacency 

182 

183 Examples 

184 -------- 

185 Simple integer weights on edges: 

186 

187 >>> import pandas as pd 

188 >>> pd.options.display.max_columns = 20 

189 >>> df = pd.DataFrame([[1, 1], [2, 1]]) 

190 >>> df 

191 0 1 

192 0 1 1 

193 1 2 1 

194 >>> G = nx.from_pandas_adjacency(df) 

195 >>> G.name = "Graph from pandas adjacency matrix" 

196 >>> print(G) 

197 Graph named 'Graph from pandas adjacency matrix' with 2 nodes and 3 edges 

198 """ 

199 

200 try: 

201 df = df[df.index] 

202 except Exception as err: 

203 missing = list(set(df.index).difference(set(df.columns))) 

204 msg = f"{missing} not in columns" 

205 raise nx.NetworkXError("Columns must match Indices.", msg) from err 

206 

207 A = df.values 

208 G = from_numpy_array(A, create_using=create_using) 

209 

210 nx.relabel.relabel_nodes(G, dict(enumerate(df.columns)), copy=False) 

211 return G 

212 

213 

214@nx._dispatch(preserve_edge_attrs=True) 

215def to_pandas_edgelist( 

216 G, 

217 source="source", 

218 target="target", 

219 nodelist=None, 

220 dtype=None, 

221 edge_key=None, 

222): 

223 """Returns the graph edge list as a Pandas DataFrame. 

224 

225 Parameters 

226 ---------- 

227 G : graph 

228 The NetworkX graph used to construct the Pandas DataFrame. 

229 

230 source : str or int, optional 

231 A valid column name (string or integer) for the source nodes (for the 

232 directed case). 

233 

234 target : str or int, optional 

235 A valid column name (string or integer) for the target nodes (for the 

236 directed case). 

237 

238 nodelist : list, optional 

239 Use only nodes specified in nodelist 

240 

241 dtype : dtype, default None 

242 Use to create the DataFrame. Data type to force. 

243 Only a single dtype is allowed. If None, infer. 

244 

245 edge_key : str or int or None, optional (default=None) 

246 A valid column name (string or integer) for the edge keys (for the 

247 multigraph case). If None, edge keys are not stored in the DataFrame. 

248 

249 Returns 

250 ------- 

251 df : Pandas DataFrame 

252 Graph edge list 

253 

254 Examples 

255 -------- 

256 >>> G = nx.Graph( 

257 ... [ 

258 ... ("A", "B", {"cost": 1, "weight": 7}), 

259 ... ("C", "E", {"cost": 9, "weight": 10}), 

260 ... ] 

261 ... ) 

262 >>> df = nx.to_pandas_edgelist(G, nodelist=["A", "C"]) 

263 >>> df[["source", "target", "cost", "weight"]] 

264 source target cost weight 

265 0 A B 1 7 

266 1 C E 9 10 

267 

268 >>> G = nx.MultiGraph([('A', 'B', {'cost': 1}), ('A', 'B', {'cost': 9})]) 

269 >>> df = nx.to_pandas_edgelist(G, nodelist=['A', 'C'], edge_key='ekey') 

270 >>> df[['source', 'target', 'cost', 'ekey']] 

271 source target cost ekey 

272 0 A B 1 0 

273 1 A B 9 1 

274 

275 """ 

276 import pandas as pd 

277 

278 if nodelist is None: 

279 edgelist = G.edges(data=True) 

280 else: 

281 edgelist = G.edges(nodelist, data=True) 

282 source_nodes = [s for s, _, _ in edgelist] 

283 target_nodes = [t for _, t, _ in edgelist] 

284 

285 all_attrs = set().union(*(d.keys() for _, _, d in edgelist)) 

286 if source in all_attrs: 

287 raise nx.NetworkXError(f"Source name {source!r} is an edge attr name") 

288 if target in all_attrs: 

289 raise nx.NetworkXError(f"Target name {target!r} is an edge attr name") 

290 

291 nan = float("nan") 

292 edge_attr = {k: [d.get(k, nan) for _, _, d in edgelist] for k in all_attrs} 

293 

294 if G.is_multigraph() and edge_key is not None: 

295 if edge_key in all_attrs: 

296 raise nx.NetworkXError(f"Edge key name {edge_key!r} is an edge attr name") 

297 edge_keys = [k for _, _, k in G.edges(keys=True)] 

298 edgelistdict = {source: source_nodes, target: target_nodes, edge_key: edge_keys} 

299 else: 

300 edgelistdict = {source: source_nodes, target: target_nodes} 

301 

302 edgelistdict.update(edge_attr) 

303 return pd.DataFrame(edgelistdict, dtype=dtype) 

304 

305 

306@nx._dispatch(graphs=None) 

307def from_pandas_edgelist( 

308 df, 

309 source="source", 

310 target="target", 

311 edge_attr=None, 

312 create_using=None, 

313 edge_key=None, 

314): 

315 """Returns a graph from Pandas DataFrame containing an edge list. 

316 

317 The Pandas DataFrame should contain at least two columns of node names and 

318 zero or more columns of edge attributes. Each row will be processed as one 

319 edge instance. 

320 

321 Note: This function iterates over DataFrame.values, which is not 

322 guaranteed to retain the data type across columns in the row. This is only 

323 a problem if your row is entirely numeric and a mix of ints and floats. In 

324 that case, all values will be returned as floats. See the 

325 DataFrame.iterrows documentation for an example. 

326 

327 Parameters 

328 ---------- 

329 df : Pandas DataFrame 

330 An edge list representation of a graph 

331 

332 source : str or int 

333 A valid column name (string or integer) for the source nodes (for the 

334 directed case). 

335 

336 target : str or int 

337 A valid column name (string or integer) for the target nodes (for the 

338 directed case). 

339 

340 edge_attr : str or int, iterable, True, or None 

341 A valid column name (str or int) or iterable of column names that are 

342 used to retrieve items and add them to the graph as edge attributes. 

343 If `True`, all of the remaining columns will be added. 

344 If `None`, no edge attributes are added to the graph. 

345 

346 create_using : NetworkX graph constructor, optional (default=nx.Graph) 

347 Graph type to create. If graph instance, then cleared before populated. 

348 

349 edge_key : str or None, optional (default=None) 

350 A valid column name for the edge keys (for a MultiGraph). The values in 

351 this column are used for the edge keys when adding edges if create_using 

352 is a multigraph. 

353 

354 See Also 

355 -------- 

356 to_pandas_edgelist 

357 

358 Examples 

359 -------- 

360 Simple integer weights on edges: 

361 

362 >>> import pandas as pd 

363 >>> pd.options.display.max_columns = 20 

364 >>> import numpy as np 

365 >>> rng = np.random.RandomState(seed=5) 

366 >>> ints = rng.randint(1, 11, size=(3, 2)) 

367 >>> a = ["A", "B", "C"] 

368 >>> b = ["D", "A", "E"] 

369 >>> df = pd.DataFrame(ints, columns=["weight", "cost"]) 

370 >>> df[0] = a 

371 >>> df["b"] = b 

372 >>> df[["weight", "cost", 0, "b"]] 

373 weight cost 0 b 

374 0 4 7 A D 

375 1 7 1 B A 

376 2 10 9 C E 

377 >>> G = nx.from_pandas_edgelist(df, 0, "b", ["weight", "cost"]) 

378 >>> G["E"]["C"]["weight"] 

379 10 

380 >>> G["E"]["C"]["cost"] 

381 9 

382 >>> edges = pd.DataFrame( 

383 ... { 

384 ... "source": [0, 1, 2], 

385 ... "target": [2, 2, 3], 

386 ... "weight": [3, 4, 5], 

387 ... "color": ["red", "blue", "blue"], 

388 ... } 

389 ... ) 

390 >>> G = nx.from_pandas_edgelist(edges, edge_attr=True) 

391 >>> G[0][2]["color"] 

392 'red' 

393 

394 Build multigraph with custom keys: 

395 

396 >>> edges = pd.DataFrame( 

397 ... { 

398 ... "source": [0, 1, 2, 0], 

399 ... "target": [2, 2, 3, 2], 

400 ... "my_edge_key": ["A", "B", "C", "D"], 

401 ... "weight": [3, 4, 5, 6], 

402 ... "color": ["red", "blue", "blue", "blue"], 

403 ... } 

404 ... ) 

405 >>> G = nx.from_pandas_edgelist( 

406 ... edges, 

407 ... edge_key="my_edge_key", 

408 ... edge_attr=["weight", "color"], 

409 ... create_using=nx.MultiGraph(), 

410 ... ) 

411 >>> G[0][2] 

412 AtlasView({'A': {'weight': 3, 'color': 'red'}, 'D': {'weight': 6, 'color': 'blue'}}) 

413 

414 

415 """ 

416 g = nx.empty_graph(0, create_using) 

417 

418 if edge_attr is None: 

419 g.add_edges_from(zip(df[source], df[target])) 

420 return g 

421 

422 reserved_columns = [source, target] 

423 

424 # Additional columns requested 

425 attr_col_headings = [] 

426 attribute_data = [] 

427 if edge_attr is True: 

428 attr_col_headings = [c for c in df.columns if c not in reserved_columns] 

429 elif isinstance(edge_attr, (list, tuple)): 

430 attr_col_headings = edge_attr 

431 else: 

432 attr_col_headings = [edge_attr] 

433 if len(attr_col_headings) == 0: 

434 raise nx.NetworkXError( 

435 f"Invalid edge_attr argument: No columns found with name: {attr_col_headings}" 

436 ) 

437 

438 try: 

439 attribute_data = zip(*[df[col] for col in attr_col_headings]) 

440 except (KeyError, TypeError) as err: 

441 msg = f"Invalid edge_attr argument: {edge_attr}" 

442 raise nx.NetworkXError(msg) from err 

443 

444 if g.is_multigraph(): 

445 # => append the edge keys from the df to the bundled data 

446 if edge_key is not None: 

447 try: 

448 multigraph_edge_keys = df[edge_key] 

449 attribute_data = zip(attribute_data, multigraph_edge_keys) 

450 except (KeyError, TypeError) as err: 

451 msg = f"Invalid edge_key argument: {edge_key}" 

452 raise nx.NetworkXError(msg) from err 

453 

454 for s, t, attrs in zip(df[source], df[target], attribute_data): 

455 if edge_key is not None: 

456 attrs, multigraph_edge_key = attrs 

457 key = g.add_edge(s, t, key=multigraph_edge_key) 

458 else: 

459 key = g.add_edge(s, t) 

460 

461 g[s][t][key].update(zip(attr_col_headings, attrs)) 

462 else: 

463 for s, t, attrs in zip(df[source], df[target], attribute_data): 

464 g.add_edge(s, t) 

465 g[s][t].update(zip(attr_col_headings, attrs)) 

466 

467 return g 

468 

469 

470@nx._dispatch(edge_attrs="weight") 

471def to_scipy_sparse_array(G, nodelist=None, dtype=None, weight="weight", format="csr"): 

472 """Returns the graph adjacency matrix as a SciPy sparse array. 

473 

474 Parameters 

475 ---------- 

476 G : graph 

477 The NetworkX graph used to construct the sparse matrix. 

478 

479 nodelist : list, optional 

480 The rows and columns are ordered according to the nodes in `nodelist`. 

481 If `nodelist` is None, then the ordering is produced by G.nodes(). 

482 

483 dtype : NumPy data-type, optional 

484 A valid NumPy dtype used to initialize the array. If None, then the 

485 NumPy default is used. 

486 

487 weight : string or None optional (default='weight') 

488 The edge attribute that holds the numerical value used for 

489 the edge weight. If None then all edge weights are 1. 

490 

491 format : str in {'bsr', 'csr', 'csc', 'coo', 'lil', 'dia', 'dok'} 

492 The type of the matrix to be returned (default 'csr'). For 

493 some algorithms different implementations of sparse matrices 

494 can perform better. See [1]_ for details. 

495 

496 Returns 

497 ------- 

498 A : SciPy sparse array 

499 Graph adjacency matrix. 

500 

501 Notes 

502 ----- 

503 For directed graphs, matrix entry i,j corresponds to an edge from i to j. 

504 

505 The matrix entries are populated using the edge attribute held in 

506 parameter weight. When an edge does not have that attribute, the 

507 value of the entry is 1. 

508 

509 For multiple edges the matrix values are the sums of the edge weights. 

510 

511 When `nodelist` does not contain every node in `G`, the adjacency matrix 

512 is built from the subgraph of `G` that is induced by the nodes in 

513 `nodelist`. 

514 

515 The convention used for self-loop edges in graphs is to assign the 

516 diagonal matrix entry value to the weight attribute of the edge 

517 (or the number 1 if the edge has no weight attribute). If the 

518 alternate convention of doubling the edge weight is desired the 

519 resulting SciPy sparse array can be modified as follows: 

520 

521 >>> G = nx.Graph([(1, 1)]) 

522 >>> A = nx.to_scipy_sparse_array(G) 

523 >>> print(A.todense()) 

524 [[1]] 

525 >>> A.setdiag(A.diagonal() * 2) 

526 >>> print(A.toarray()) 

527 [[2]] 

528 

529 Examples 

530 -------- 

531 >>> G = nx.MultiDiGraph() 

532 >>> G.add_edge(0, 1, weight=2) 

533 0 

534 >>> G.add_edge(1, 0) 

535 0 

536 >>> G.add_edge(2, 2, weight=3) 

537 0 

538 >>> G.add_edge(2, 2) 

539 1 

540 >>> S = nx.to_scipy_sparse_array(G, nodelist=[0, 1, 2]) 

541 >>> print(S.toarray()) 

542 [[0 2 0] 

543 [1 0 0] 

544 [0 0 4]] 

545 

546 References 

547 ---------- 

548 .. [1] Scipy Dev. References, "Sparse Matrices", 

549 https://docs.scipy.org/doc/scipy/reference/sparse.html 

550 """ 

551 import scipy as sp 

552 

553 if len(G) == 0: 

554 raise nx.NetworkXError("Graph has no nodes or edges") 

555 

556 if nodelist is None: 

557 nodelist = list(G) 

558 nlen = len(G) 

559 else: 

560 nlen = len(nodelist) 

561 if nlen == 0: 

562 raise nx.NetworkXError("nodelist has no nodes") 

563 nodeset = set(G.nbunch_iter(nodelist)) 

564 if nlen != len(nodeset): 

565 for n in nodelist: 

566 if n not in G: 

567 raise nx.NetworkXError(f"Node {n} in nodelist is not in G") 

568 raise nx.NetworkXError("nodelist contains duplicates.") 

569 if nlen < len(G): 

570 G = G.subgraph(nodelist) 

571 

572 index = dict(zip(nodelist, range(nlen))) 

573 coefficients = zip( 

574 *((index[u], index[v], wt) for u, v, wt in G.edges(data=weight, default=1)) 

575 ) 

576 try: 

577 row, col, data = coefficients 

578 except ValueError: 

579 # there is no edge in the subgraph 

580 row, col, data = [], [], [] 

581 

582 if G.is_directed(): 

583 A = sp.sparse.coo_array((data, (row, col)), shape=(nlen, nlen), dtype=dtype) 

584 else: 

585 # symmetrize matrix 

586 d = data + data 

587 r = row + col 

588 c = col + row 

589 # selfloop entries get double counted when symmetrizing 

590 # so we subtract the data on the diagonal 

591 selfloops = list(nx.selfloop_edges(G, data=weight, default=1)) 

592 if selfloops: 

593 diag_index, diag_data = zip(*((index[u], -wt) for u, v, wt in selfloops)) 

594 d += diag_data 

595 r += diag_index 

596 c += diag_index 

597 A = sp.sparse.coo_array((d, (r, c)), shape=(nlen, nlen), dtype=dtype) 

598 try: 

599 return A.asformat(format) 

600 except ValueError as err: 

601 raise nx.NetworkXError(f"Unknown sparse matrix format: {format}") from err 

602 

603 

604def _csr_gen_triples(A): 

605 """Converts a SciPy sparse array in **Compressed Sparse Row** format to 

606 an iterable of weighted edge triples. 

607 

608 """ 

609 nrows = A.shape[0] 

610 data, indices, indptr = A.data, A.indices, A.indptr 

611 for i in range(nrows): 

612 for j in range(indptr[i], indptr[i + 1]): 

613 yield i, int(indices[j]), data[j] 

614 

615 

616def _csc_gen_triples(A): 

617 """Converts a SciPy sparse array in **Compressed Sparse Column** format to 

618 an iterable of weighted edge triples. 

619 

620 """ 

621 ncols = A.shape[1] 

622 data, indices, indptr = A.data, A.indices, A.indptr 

623 for i in range(ncols): 

624 for j in range(indptr[i], indptr[i + 1]): 

625 yield int(indices[j]), i, data[j] 

626 

627 

628def _coo_gen_triples(A): 

629 """Converts a SciPy sparse array in **Coordinate** format to an iterable 

630 of weighted edge triples. 

631 

632 """ 

633 return ((int(i), int(j), d) for i, j, d in zip(A.row, A.col, A.data)) 

634 

635 

636def _dok_gen_triples(A): 

637 """Converts a SciPy sparse array in **Dictionary of Keys** format to an 

638 iterable of weighted edge triples. 

639 

640 """ 

641 for (r, c), v in A.items(): 

642 yield r, c, v 

643 

644 

645def _generate_weighted_edges(A): 

646 """Returns an iterable over (u, v, w) triples, where u and v are adjacent 

647 vertices and w is the weight of the edge joining u and v. 

648 

649 `A` is a SciPy sparse array (in any format). 

650 

651 """ 

652 if A.format == "csr": 

653 return _csr_gen_triples(A) 

654 if A.format == "csc": 

655 return _csc_gen_triples(A) 

656 if A.format == "dok": 

657 return _dok_gen_triples(A) 

658 # If A is in any other format (including COO), convert it to COO format. 

659 return _coo_gen_triples(A.tocoo()) 

660 

661 

662@nx._dispatch(graphs=None) 

663def from_scipy_sparse_array( 

664 A, parallel_edges=False, create_using=None, edge_attribute="weight" 

665): 

666 """Creates a new graph from an adjacency matrix given as a SciPy sparse 

667 array. 

668 

669 Parameters 

670 ---------- 

671 A: scipy.sparse array 

672 An adjacency matrix representation of a graph 

673 

674 parallel_edges : Boolean 

675 If this is True, `create_using` is a multigraph, and `A` is an 

676 integer matrix, then entry *(i, j)* in the matrix is interpreted as the 

677 number of parallel edges joining vertices *i* and *j* in the graph. 

678 If it is False, then the entries in the matrix are interpreted as 

679 the weight of a single edge joining the vertices. 

680 

681 create_using : NetworkX graph constructor, optional (default=nx.Graph) 

682 Graph type to create. If graph instance, then cleared before populated. 

683 

684 edge_attribute: string 

685 Name of edge attribute to store matrix numeric value. The data will 

686 have the same type as the matrix entry (int, float, (real,imag)). 

687 

688 Notes 

689 ----- 

690 For directed graphs, explicitly mention create_using=nx.DiGraph, 

691 and entry i,j of A corresponds to an edge from i to j. 

692 

693 If `create_using` is :class:`networkx.MultiGraph` or 

694 :class:`networkx.MultiDiGraph`, `parallel_edges` is True, and the 

695 entries of `A` are of type :class:`int`, then this function returns a 

696 multigraph (constructed from `create_using`) with parallel edges. 

697 In this case, `edge_attribute` will be ignored. 

698 

699 If `create_using` indicates an undirected multigraph, then only the edges 

700 indicated by the upper triangle of the matrix `A` will be added to the 

701 graph. 

702 

703 Examples 

704 -------- 

705 >>> import scipy as sp 

706 >>> A = sp.sparse.eye(2, 2, 1) 

707 >>> G = nx.from_scipy_sparse_array(A) 

708 

709 If `create_using` indicates a multigraph and the matrix has only integer 

710 entries and `parallel_edges` is False, then the entries will be treated 

711 as weights for edges joining the nodes (without creating parallel edges): 

712 

713 >>> A = sp.sparse.csr_array([[1, 1], [1, 2]]) 

714 >>> G = nx.from_scipy_sparse_array(A, create_using=nx.MultiGraph) 

715 >>> G[1][1] 

716 AtlasView({0: {'weight': 2}}) 

717 

718 If `create_using` indicates a multigraph and the matrix has only integer 

719 entries and `parallel_edges` is True, then the entries will be treated 

720 as the number of parallel edges joining those two vertices: 

721 

722 >>> A = sp.sparse.csr_array([[1, 1], [1, 2]]) 

723 >>> G = nx.from_scipy_sparse_array( 

724 ... A, parallel_edges=True, create_using=nx.MultiGraph 

725 ... ) 

726 >>> G[1][1] 

727 AtlasView({0: {'weight': 1}, 1: {'weight': 1}}) 

728 

729 """ 

730 G = nx.empty_graph(0, create_using) 

731 n, m = A.shape 

732 if n != m: 

733 raise nx.NetworkXError(f"Adjacency matrix not square: nx,ny={A.shape}") 

734 # Make sure we get even the isolated nodes of the graph. 

735 G.add_nodes_from(range(n)) 

736 # Create an iterable over (u, v, w) triples and for each triple, add an 

737 # edge from u to v with weight w. 

738 triples = _generate_weighted_edges(A) 

739 # If the entries in the adjacency matrix are integers, the graph is a 

740 # multigraph, and parallel_edges is True, then create parallel edges, each 

741 # with weight 1, for each entry in the adjacency matrix. Otherwise, create 

742 # one edge for each positive entry in the adjacency matrix and set the 

743 # weight of that edge to be the entry in the matrix. 

744 if A.dtype.kind in ("i", "u") and G.is_multigraph() and parallel_edges: 

745 chain = itertools.chain.from_iterable 

746 # The following line is equivalent to: 

747 # 

748 # for (u, v) in edges: 

749 # for d in range(A[u, v]): 

750 # G.add_edge(u, v, weight=1) 

751 # 

752 triples = chain(((u, v, 1) for d in range(w)) for (u, v, w) in triples) 

753 # If we are creating an undirected multigraph, only add the edges from the 

754 # upper triangle of the matrix. Otherwise, add all the edges. This relies 

755 # on the fact that the vertices created in the 

756 # `_generated_weighted_edges()` function are actually the row/column 

757 # indices for the matrix `A`. 

758 # 

759 # Without this check, we run into a problem where each edge is added twice 

760 # when `G.add_weighted_edges_from()` is invoked below. 

761 if G.is_multigraph() and not G.is_directed(): 

762 triples = ((u, v, d) for u, v, d in triples if u <= v) 

763 G.add_weighted_edges_from(triples, weight=edge_attribute) 

764 return G 

765 

766 

767@nx._dispatch(edge_attrs="weight") # edge attrs may also be obtained from `dtype` 

768def to_numpy_array( 

769 G, 

770 nodelist=None, 

771 dtype=None, 

772 order=None, 

773 multigraph_weight=sum, 

774 weight="weight", 

775 nonedge=0.0, 

776): 

777 """Returns the graph adjacency matrix as a NumPy array. 

778 

779 Parameters 

780 ---------- 

781 G : graph 

782 The NetworkX graph used to construct the NumPy array. 

783 

784 nodelist : list, optional 

785 The rows and columns are ordered according to the nodes in `nodelist`. 

786 If `nodelist` is ``None``, then the ordering is produced by ``G.nodes()``. 

787 

788 dtype : NumPy data type, optional 

789 A NumPy data type used to initialize the array. If None, then the NumPy 

790 default is used. The dtype can be structured if `weight=None`, in which 

791 case the dtype field names are used to look up edge attributes. The 

792 result is a structured array where each named field in the dtype 

793 corresponds to the adjacency for that edge attribute. See examples for 

794 details. 

795 

796 order : {'C', 'F'}, optional 

797 Whether to store multidimensional data in C- or Fortran-contiguous 

798 (row- or column-wise) order in memory. If None, then the NumPy default 

799 is used. 

800 

801 multigraph_weight : callable, optional 

802 An function that determines how weights in multigraphs are handled. 

803 The function should accept a sequence of weights and return a single 

804 value. The default is to sum the weights of the multiple edges. 

805 

806 weight : string or None optional (default = 'weight') 

807 The edge attribute that holds the numerical value used for 

808 the edge weight. If an edge does not have that attribute, then the 

809 value 1 is used instead. `weight` must be ``None`` if a structured 

810 dtype is used. 

811 

812 nonedge : array_like (default = 0.0) 

813 The value used to represent non-edges in the adjacency matrix. 

814 The array values corresponding to nonedges are typically set to zero. 

815 However, this could be undesirable if there are array values 

816 corresponding to actual edges that also have the value zero. If so, 

817 one might prefer nonedges to have some other value, such as ``nan``. 

818 

819 Returns 

820 ------- 

821 A : NumPy ndarray 

822 Graph adjacency matrix 

823 

824 Raises 

825 ------ 

826 NetworkXError 

827 If `dtype` is a structured dtype and `G` is a multigraph 

828 ValueError 

829 If `dtype` is a structured dtype and `weight` is not `None` 

830 

831 See Also 

832 -------- 

833 from_numpy_array 

834 

835 Notes 

836 ----- 

837 For directed graphs, entry ``i, j`` corresponds to an edge from ``i`` to ``j``. 

838 

839 Entries in the adjacency matrix are given by the `weight` edge attribute. 

840 When an edge does not have a weight attribute, the value of the entry is 

841 set to the number 1. For multiple (parallel) edges, the values of the 

842 entries are determined by the `multigraph_weight` parameter. The default is 

843 to sum the weight attributes for each of the parallel edges. 

844 

845 When `nodelist` does not contain every node in `G`, the adjacency matrix is 

846 built from the subgraph of `G` that is induced by the nodes in `nodelist`. 

847 

848 The convention used for self-loop edges in graphs is to assign the 

849 diagonal array entry value to the weight attribute of the edge 

850 (or the number 1 if the edge has no weight attribute). If the 

851 alternate convention of doubling the edge weight is desired the 

852 resulting NumPy array can be modified as follows: 

853 

854 >>> import numpy as np 

855 >>> G = nx.Graph([(1, 1)]) 

856 >>> A = nx.to_numpy_array(G) 

857 >>> A 

858 array([[1.]]) 

859 >>> A[np.diag_indices_from(A)] *= 2 

860 >>> A 

861 array([[2.]]) 

862 

863 Examples 

864 -------- 

865 >>> G = nx.MultiDiGraph() 

866 >>> G.add_edge(0, 1, weight=2) 

867 0 

868 >>> G.add_edge(1, 0) 

869 0 

870 >>> G.add_edge(2, 2, weight=3) 

871 0 

872 >>> G.add_edge(2, 2) 

873 1 

874 >>> nx.to_numpy_array(G, nodelist=[0, 1, 2]) 

875 array([[0., 2., 0.], 

876 [1., 0., 0.], 

877 [0., 0., 4.]]) 

878 

879 When `nodelist` argument is used, nodes of `G` which do not appear in the `nodelist` 

880 and their edges are not included in the adjacency matrix. Here is an example: 

881 

882 >>> G = nx.Graph() 

883 >>> G.add_edge(3, 1) 

884 >>> G.add_edge(2, 0) 

885 >>> G.add_edge(2, 1) 

886 >>> G.add_edge(3, 0) 

887 >>> nx.to_numpy_array(G, nodelist=[1, 2, 3]) 

888 array([[0., 1., 1.], 

889 [1., 0., 0.], 

890 [1., 0., 0.]]) 

891 

892 This function can also be used to create adjacency matrices for multiple 

893 edge attributes with structured dtypes: 

894 

895 >>> G = nx.Graph() 

896 >>> G.add_edge(0, 1, weight=10) 

897 >>> G.add_edge(1, 2, cost=5) 

898 >>> G.add_edge(2, 3, weight=3, cost=-4.0) 

899 >>> dtype = np.dtype([("weight", int), ("cost", float)]) 

900 >>> A = nx.to_numpy_array(G, dtype=dtype, weight=None) 

901 >>> A["weight"] 

902 array([[ 0, 10, 0, 0], 

903 [10, 0, 1, 0], 

904 [ 0, 1, 0, 3], 

905 [ 0, 0, 3, 0]]) 

906 >>> A["cost"] 

907 array([[ 0., 1., 0., 0.], 

908 [ 1., 0., 5., 0.], 

909 [ 0., 5., 0., -4.], 

910 [ 0., 0., -4., 0.]]) 

911 

912 As stated above, the argument "nonedge" is useful especially when there are 

913 actually edges with weight 0 in the graph. Setting a nonedge value different than 0, 

914 makes it much clearer to differentiate such 0-weighted edges and actual nonedge values. 

915 

916 >>> G = nx.Graph() 

917 >>> G.add_edge(3, 1, weight=2) 

918 >>> G.add_edge(2, 0, weight=0) 

919 >>> G.add_edge(2, 1, weight=0) 

920 >>> G.add_edge(3, 0, weight=1) 

921 >>> nx.to_numpy_array(G, nonedge=-1.) 

922 array([[-1., 2., -1., 1.], 

923 [ 2., -1., 0., -1.], 

924 [-1., 0., -1., 0.], 

925 [ 1., -1., 0., -1.]]) 

926 """ 

927 import numpy as np 

928 

929 if nodelist is None: 

930 nodelist = list(G) 

931 nlen = len(nodelist) 

932 

933 # Input validation 

934 nodeset = set(nodelist) 

935 if nodeset - set(G): 

936 raise nx.NetworkXError(f"Nodes {nodeset - set(G)} in nodelist is not in G") 

937 if len(nodeset) < nlen: 

938 raise nx.NetworkXError("nodelist contains duplicates.") 

939 

940 A = np.full((nlen, nlen), fill_value=nonedge, dtype=dtype, order=order) 

941 

942 # Corner cases: empty nodelist or graph without any edges 

943 if nlen == 0 or G.number_of_edges() == 0: 

944 return A 

945 

946 # If dtype is structured and weight is None, use dtype field names as 

947 # edge attributes 

948 edge_attrs = None # Only single edge attribute by default 

949 if A.dtype.names: 

950 if weight is None: 

951 edge_attrs = dtype.names 

952 else: 

953 raise ValueError( 

954 "Specifying `weight` not supported for structured dtypes\n." 

955 "To create adjacency matrices from structured dtypes, use `weight=None`." 

956 ) 

957 

958 # Map nodes to row/col in matrix 

959 idx = dict(zip(nodelist, range(nlen))) 

960 if len(nodelist) < len(G): 

961 G = G.subgraph(nodelist).copy() 

962 

963 # Collect all edge weights and reduce with `multigraph_weights` 

964 if G.is_multigraph(): 

965 if edge_attrs: 

966 raise nx.NetworkXError( 

967 "Structured arrays are not supported for MultiGraphs" 

968 ) 

969 d = defaultdict(list) 

970 for u, v, wt in G.edges(data=weight, default=1.0): 

971 d[(idx[u], idx[v])].append(wt) 

972 i, j = np.array(list(d.keys())).T # indices 

973 wts = [multigraph_weight(ws) for ws in d.values()] # reduced weights 

974 else: 

975 i, j, wts = [], [], [] 

976 

977 # Special branch: multi-attr adjacency from structured dtypes 

978 if edge_attrs: 

979 # Extract edges with all data 

980 for u, v, data in G.edges(data=True): 

981 i.append(idx[u]) 

982 j.append(idx[v]) 

983 wts.append(data) 

984 # Map each attribute to the appropriate named field in the 

985 # structured dtype 

986 for attr in edge_attrs: 

987 attr_data = [wt.get(attr, 1.0) for wt in wts] 

988 A[attr][i, j] = attr_data 

989 if not G.is_directed(): 

990 A[attr][j, i] = attr_data 

991 return A 

992 

993 for u, v, wt in G.edges(data=weight, default=1.0): 

994 i.append(idx[u]) 

995 j.append(idx[v]) 

996 wts.append(wt) 

997 

998 # Set array values with advanced indexing 

999 A[i, j] = wts 

1000 if not G.is_directed(): 

1001 A[j, i] = wts 

1002 

1003 return A 

1004 

1005 

1006@nx._dispatch(graphs=None) 

1007def from_numpy_array(A, parallel_edges=False, create_using=None, edge_attr="weight"): 

1008 """Returns a graph from a 2D NumPy array. 

1009 

1010 The 2D NumPy array is interpreted as an adjacency matrix for the graph. 

1011 

1012 Parameters 

1013 ---------- 

1014 A : a 2D numpy.ndarray 

1015 An adjacency matrix representation of a graph 

1016 

1017 parallel_edges : Boolean 

1018 If this is True, `create_using` is a multigraph, and `A` is an 

1019 integer array, then entry *(i, j)* in the array is interpreted as the 

1020 number of parallel edges joining vertices *i* and *j* in the graph. 

1021 If it is False, then the entries in the array are interpreted as 

1022 the weight of a single edge joining the vertices. 

1023 

1024 create_using : NetworkX graph constructor, optional (default=nx.Graph) 

1025 Graph type to create. If graph instance, then cleared before populated. 

1026 

1027 edge_attr : String, optional (default="weight") 

1028 The attribute to which the array values are assigned on each edge. If 

1029 it is None, edge attributes will not be assigned. 

1030 

1031 Notes 

1032 ----- 

1033 For directed graphs, explicitly mention create_using=nx.DiGraph, 

1034 and entry i,j of A corresponds to an edge from i to j. 

1035 

1036 If `create_using` is :class:`networkx.MultiGraph` or 

1037 :class:`networkx.MultiDiGraph`, `parallel_edges` is True, and the 

1038 entries of `A` are of type :class:`int`, then this function returns a 

1039 multigraph (of the same type as `create_using`) with parallel edges. 

1040 

1041 If `create_using` indicates an undirected multigraph, then only the edges 

1042 indicated by the upper triangle of the array `A` will be added to the 

1043 graph. 

1044 

1045 If `edge_attr` is Falsy (False or None), edge attributes will not be 

1046 assigned, and the array data will be treated like a binary mask of 

1047 edge presence or absence. Otherwise, the attributes will be assigned 

1048 as follows: 

1049 

1050 If the NumPy array has a single data type for each array entry it 

1051 will be converted to an appropriate Python data type. 

1052 

1053 If the NumPy array has a user-specified compound data type the names 

1054 of the data fields will be used as attribute keys in the resulting 

1055 NetworkX graph. 

1056 

1057 See Also 

1058 -------- 

1059 to_numpy_array 

1060 

1061 Examples 

1062 -------- 

1063 Simple integer weights on edges: 

1064 

1065 >>> import numpy as np 

1066 >>> A = np.array([[1, 1], [2, 1]]) 

1067 >>> G = nx.from_numpy_array(A) 

1068 >>> G.edges(data=True) 

1069 EdgeDataView([(0, 0, {'weight': 1}), (0, 1, {'weight': 2}), (1, 1, {'weight': 1})]) 

1070 

1071 If `create_using` indicates a multigraph and the array has only integer 

1072 entries and `parallel_edges` is False, then the entries will be treated 

1073 as weights for edges joining the nodes (without creating parallel edges): 

1074 

1075 >>> A = np.array([[1, 1], [1, 2]]) 

1076 >>> G = nx.from_numpy_array(A, create_using=nx.MultiGraph) 

1077 >>> G[1][1] 

1078 AtlasView({0: {'weight': 2}}) 

1079 

1080 If `create_using` indicates a multigraph and the array has only integer 

1081 entries and `parallel_edges` is True, then the entries will be treated 

1082 as the number of parallel edges joining those two vertices: 

1083 

1084 >>> A = np.array([[1, 1], [1, 2]]) 

1085 >>> temp = nx.MultiGraph() 

1086 >>> G = nx.from_numpy_array(A, parallel_edges=True, create_using=temp) 

1087 >>> G[1][1] 

1088 AtlasView({0: {'weight': 1}, 1: {'weight': 1}}) 

1089 

1090 User defined compound data type on edges: 

1091 

1092 >>> dt = [("weight", float), ("cost", int)] 

1093 >>> A = np.array([[(1.0, 2)]], dtype=dt) 

1094 >>> G = nx.from_numpy_array(A) 

1095 >>> G.edges() 

1096 EdgeView([(0, 0)]) 

1097 >>> G[0][0]["cost"] 

1098 2 

1099 >>> G[0][0]["weight"] 

1100 1.0 

1101 

1102 """ 

1103 kind_to_python_type = { 

1104 "f": float, 

1105 "i": int, 

1106 "u": int, 

1107 "b": bool, 

1108 "c": complex, 

1109 "S": str, 

1110 "U": str, 

1111 "V": "void", 

1112 } 

1113 G = nx.empty_graph(0, create_using) 

1114 if A.ndim != 2: 

1115 raise nx.NetworkXError(f"Input array must be 2D, not {A.ndim}") 

1116 n, m = A.shape 

1117 if n != m: 

1118 raise nx.NetworkXError(f"Adjacency matrix not square: nx,ny={A.shape}") 

1119 dt = A.dtype 

1120 try: 

1121 python_type = kind_to_python_type[dt.kind] 

1122 except Exception as err: 

1123 raise TypeError(f"Unknown numpy data type: {dt}") from err 

1124 

1125 # Make sure we get even the isolated nodes of the graph. 

1126 G.add_nodes_from(range(n)) 

1127 # Get a list of all the entries in the array with nonzero entries. These 

1128 # coordinates become edges in the graph. (convert to int from np.int64) 

1129 edges = ((int(e[0]), int(e[1])) for e in zip(*A.nonzero())) 

1130 # handle numpy constructed data type 

1131 if python_type == "void": 

1132 # Sort the fields by their offset, then by dtype, then by name. 

1133 fields = sorted( 

1134 (offset, dtype, name) for name, (dtype, offset) in A.dtype.fields.items() 

1135 ) 

1136 triples = ( 

1137 ( 

1138 u, 

1139 v, 

1140 { 

1141 name: kind_to_python_type[dtype.kind](val) 

1142 for (_, dtype, name), val in zip(fields, A[u, v]) 

1143 } 

1144 if edge_attr 

1145 else {}, 

1146 ) 

1147 for u, v in edges 

1148 ) 

1149 # If the entries in the adjacency matrix are integers, the graph is a 

1150 # multigraph, and parallel_edges is True, then create parallel edges, each 

1151 # with weight 1, for each entry in the adjacency matrix. Otherwise, create 

1152 # one edge for each positive entry in the adjacency matrix and set the 

1153 # weight of that edge to be the entry in the matrix. 

1154 elif python_type is int and G.is_multigraph() and parallel_edges: 

1155 chain = itertools.chain.from_iterable 

1156 # The following line is equivalent to: 

1157 # 

1158 # for (u, v) in edges: 

1159 # for d in range(A[u, v]): 

1160 # G.add_edge(u, v, weight=1) 

1161 # 

1162 if edge_attr: 

1163 triples = chain( 

1164 ((u, v, {edge_attr: 1}) for d in range(A[u, v])) for (u, v) in edges 

1165 ) 

1166 else: 

1167 triples = chain(((u, v, {}) for d in range(A[u, v])) for (u, v) in edges) 

1168 else: # basic data type 

1169 if edge_attr: 

1170 triples = ((u, v, {edge_attr: python_type(A[u, v])}) for u, v in edges) 

1171 else: 

1172 triples = ((u, v, {}) for u, v in edges) 

1173 # If we are creating an undirected multigraph, only add the edges from the 

1174 # upper triangle of the matrix. Otherwise, add all the edges. This relies 

1175 # on the fact that the vertices created in the 

1176 # `_generated_weighted_edges()` function are actually the row/column 

1177 # indices for the matrix `A`. 

1178 # 

1179 # Without this check, we run into a problem where each edge is added twice 

1180 # when `G.add_edges_from()` is invoked below. 

1181 if G.is_multigraph() and not G.is_directed(): 

1182 triples = ((u, v, d) for u, v, d in triples if u <= v) 

1183 G.add_edges_from(triples) 

1184 return G