Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/networkx/convert

1"""Functions to convert NetworkX graphs to and from common data containers

2like numpy arrays, scipy sparse arrays, and pandas DataFrames.

4The preferred way of converting data to a NetworkX graph is through the

5graph constructor. The constructor calls the `~networkx.convert.to_networkx_graph`

6function which attempts to guess the input type and convert it automatically.

8Examples

9--------

10Create a 10 node random graph from a numpy array

12>>> import numpy as np

13>>> rng = np.random.default_rng()

14>>> a = rng.integers(low=0, high=2, size=(10, 10))

15>>> DG = nx.from_numpy_array(a, create_using=nx.DiGraph)

17or equivalently:

19>>> DG = nx.DiGraph(a)

21which calls `from_numpy_array` internally based on the type of ``a``.

23See Also

24--------

25nx_agraph, nx_pydot

26"""

28import itertools

29from collections import defaultdict

31import networkx as nx

33__all__ = [

34 "from_pandas_adjacency",

35 "to_pandas_adjacency",

36 "from_pandas_edgelist",

37 "to_pandas_edgelist",

38 "from_scipy_sparse_array",

39 "to_scipy_sparse_array",

40 "from_numpy_array",

41 "to_numpy_array",

42]

45@nx._dispatchable(edge_attrs="weight")

46def to_pandas_adjacency(

47 G,

48 nodelist=None,

49 dtype=None,

50 order=None,

51 multigraph_weight=sum,

52 weight="weight",

53 nonedge=0.0,

54):

55 """Returns the graph adjacency matrix as a Pandas DataFrame.

57 Parameters

58 ----------

59 G : graph

60 The NetworkX graph used to construct the Pandas DataFrame.

62 nodelist : list, optional

63 The rows and columns are ordered according to the nodes in `nodelist`.

64 If `nodelist` is None, then the ordering is produced by G.nodes().

66 multigraph_weight : {sum, min, max}, optional

67 An operator that determines how weights in multigraphs are handled.

68 The default is to sum the weights of the multiple edges.

70 weight : string or None, optional

71 The edge attribute that holds the numerical value used for

72 the edge weight. If an edge does not have that attribute, then the

73 value 1 is used instead.

75 nonedge : float, optional

76 The matrix values corresponding to nonedges are typically set to zero.

77 However, this could be undesirable if there are matrix values

78 corresponding to actual edges that also have the value zero. If so,

79 one might prefer nonedges to have some other value, such as nan.

81 Returns

82 -------

83 df : Pandas DataFrame

84 Graph adjacency matrix

86 Notes

87 -----

88 For directed graphs, entry i,j corresponds to an edge from i to j.

90 The DataFrame entries are assigned to the weight edge attribute. When

91 an edge does not have a weight attribute, the value of the entry is set to

92 the number 1. For multiple (parallel) edges, the values of the entries

93 are determined by the 'multigraph_weight' parameter. The default is to

94 sum the weight attributes for each of the parallel edges.

96 When `nodelist` does not contain every node in `G`, the matrix is built

97 from the subgraph of `G` that is induced by the nodes in `nodelist`.

99 The convention used for self-loop edges in graphs is to assign the

100 diagonal matrix entry value to the weight attribute of the edge

101 (or the number 1 if the edge has no weight attribute). If the

102 alternate convention of doubling the edge weight is desired the

103 resulting Pandas DataFrame can be modified as follows::

104

105 >>> import pandas as pd

106 >>> G = nx.Graph([(1, 1), (2, 2)])

107 >>> df = nx.to_pandas_adjacency(G)

108 >>> df

109 1 2

110 1 1.0 0.0

111 2 0.0 1.0

112 >>> diag_idx = list(range(len(df)))

113 >>> df.iloc[diag_idx, diag_idx] *= 2

114 >>> df

115 1 2

116 1 2.0 0.0

117 2 0.0 2.0

118

119 Examples

120 --------

121 >>> G = nx.MultiDiGraph()

122 >>> G.add_edge(0, 1, weight=2)

123 0

124 >>> G.add_edge(1, 0)

125 0

126 >>> G.add_edge(2, 2, weight=3)

127 0

128 >>> G.add_edge(2, 2)

129 1

130 >>> nx.to_pandas_adjacency(G, nodelist=[0, 1, 2], dtype=int)

131 0 1 2

132 0 0 2 0

133 1 1 0 0

134 2 0 0 4

135

136 """

137 import pandas as pd

138

139 M = to_numpy_array(

140 G,

141 nodelist=nodelist,

142 dtype=dtype,

143 order=order,

144 multigraph_weight=multigraph_weight,

145 weight=weight,

146 nonedge=nonedge,

147 )

148 if nodelist is None:

149 nodelist = list(G)

150 return pd.DataFrame(data=M, index=nodelist, columns=nodelist)

151

152

153@nx._dispatchable(graphs=None, returns_graph=True)

154def from_pandas_adjacency(df, create_using=None):

155 r"""Returns a graph from Pandas DataFrame.

156

157 The Pandas DataFrame is interpreted as an adjacency matrix for the graph.

158

159 Parameters

160 ----------

161 df : Pandas DataFrame

162 An adjacency matrix representation of a graph

163

164 create_using : NetworkX graph constructor, optional (default=nx.Graph)

165 Graph type to create. If graph instance, then cleared before populated.

166

167 Notes

168 -----

169 For directed graphs, explicitly mention create_using=nx.DiGraph,

170 and entry i,j of df corresponds to an edge from i to j.

171

172 If `df` has a single data type for each entry it will be converted to an

173 appropriate Python data type.

174

175 If you have node attributes stored in a separate dataframe `df_nodes`,

176 you can load those attributes to the graph `G` using the following code::

177

178 df_nodes = pd.DataFrame({"node_id": [1, 2, 3], "attribute1": ["A", "B", "C"]})

179 G.add_nodes_from((n, dict(d)) for n, d in df_nodes.iterrows())

180

181 If `df` has a user-specified compound data type the names

182 of the data fields will be used as attribute keys in the resulting

183 NetworkX graph.

184

185 See Also

186 --------

187 to_pandas_adjacency

188

189 Examples

190 --------

191 Simple integer weights on edges:

192

193 >>> import pandas as pd

194 >>> pd.options.display.max_columns = 20

195 >>> df = pd.DataFrame([[1, 1], [2, 1]])

196 >>> df

197 0 1

198 0 1 1

199 1 2 1

200 >>> G = nx.from_pandas_adjacency(df)

201 >>> G.name = "Graph from pandas adjacency matrix"

202 >>> print(G)

203 Graph named 'Graph from pandas adjacency matrix' with 2 nodes and 3 edges

204 """

205

206 try:

207 df = df[df.index]

208 except Exception as err:

209 missing = list(set(df.index).difference(set(df.columns)))

210 msg = f"{missing} not in columns"

211 raise nx.NetworkXError("Columns must match Indices.", msg) from err

212

213 A = df.values

214 G = from_numpy_array(A, create_using=create_using, nodelist=df.columns)

215

216 return G

217

218

219@nx._dispatchable(preserve_edge_attrs=True)

220def to_pandas_edgelist(

221 G,

222 source="source",

223 target="target",

224 nodelist=None,

225 dtype=None,

226 edge_key=None,

227):

228 """Returns the graph edge list as a Pandas DataFrame.

229

230 Parameters

231 ----------

232 G : graph

233 The NetworkX graph used to construct the Pandas DataFrame.

234

235 source : str or int, optional

236 A valid column name (string or integer) for the source nodes (for the

237 directed case).

238

239 target : str or int, optional

240 A valid column name (string or integer) for the target nodes (for the

241 directed case).

242

243 nodelist : list, optional

244 Use only nodes specified in nodelist

245

246 dtype : dtype, default None

247 Use to create the DataFrame. Data type to force.

248 Only a single dtype is allowed. If None, infer.

249

250 edge_key : str or int or None, optional (default=None)

251 A valid column name (string or integer) for the edge keys (for the

252 multigraph case). If None, edge keys are not stored in the DataFrame.

253

254 Returns

255 -------

256 df : Pandas DataFrame

257 Graph edge list

258

259 Examples

260 --------

261 >>> G = nx.Graph(

262 ... [

263 ... ("A", "B", {"cost": 1, "weight": 7}),

264 ... ("C", "E", {"cost": 9, "weight": 10}),

265 ... ]

266 ... )

267 >>> df = nx.to_pandas_edgelist(G, nodelist=["A", "C"])

268 >>> df[["source", "target", "cost", "weight"]]

269 source target cost weight

270 0 A B 1 7

271 1 C E 9 10

272

273 >>> G = nx.MultiGraph([("A", "B", {"cost": 1}), ("A", "B", {"cost": 9})])

274 >>> df = nx.to_pandas_edgelist(G, nodelist=["A", "C"], edge_key="ekey")

275 >>> df[["source", "target", "cost", "ekey"]]

276 source target cost ekey

277 0 A B 1 0

278 1 A B 9 1

279

280 """

281 import pandas as pd

282

283 if nodelist is None:

284 edgelist = G.edges(data=True)

285 else:

286 edgelist = G.edges(nodelist, data=True)

287 source_nodes = [s for s, _, _ in edgelist]

288 target_nodes = [t for _, t, _ in edgelist]

289

290 all_attrs = set().union(*(d.keys() for _, _, d in edgelist))

291 if source in all_attrs:

292 raise nx.NetworkXError(f"Source name {source!r} is an edge attr name")

293 if target in all_attrs:

294 raise nx.NetworkXError(f"Target name {target!r} is an edge attr name")

295

296 nan = float("nan")

297 edge_attr = {k: [d.get(k, nan) for _, _, d in edgelist] for k in all_attrs}

298

299 if G.is_multigraph() and edge_key is not None:

300 if edge_key in all_attrs:

301 raise nx.NetworkXError(f"Edge key name {edge_key!r} is an edge attr name")

302 edge_keys = [k for _, _, k in G.edges(keys=True)]

303 edgelistdict = {source: source_nodes, target: target_nodes, edge_key: edge_keys}

304 else:

305 edgelistdict = {source: source_nodes, target: target_nodes}

306

307 edgelistdict.update(edge_attr)

308 return pd.DataFrame(edgelistdict, dtype=dtype)

309

310

311@nx._dispatchable(graphs=None, returns_graph=True)

312def from_pandas_edgelist(

313 df,

314 source="source",

315 target="target",

316 edge_attr=None,

317 create_using=None,

318 edge_key=None,

319):

320 """Returns a graph from Pandas DataFrame containing an edge list.

321

322 The Pandas DataFrame should contain at least two columns of node names and

323 zero or more columns of edge attributes. Each row will be processed as one

324 edge instance.

325

326 Note: This function iterates over DataFrame.values, which is not

327 guaranteed to retain the data type across columns in the row. This is only

328 a problem if your row is entirely numeric and a mix of ints and floats. In

329 that case, all values will be returned as floats. See the

330 DataFrame.iterrows documentation for an example.

331

332 Parameters

333 ----------

334 df : Pandas DataFrame

335 An edge list representation of a graph

336

337 source : str or int

338 A valid column name (string or integer) for the source nodes (for the

339 directed case).

340

341 target : str or int

342 A valid column name (string or integer) for the target nodes (for the

343 directed case).

344

345 edge_attr : str or int, iterable, True, or None

346 A valid column name (str or int) or iterable of column names that are

347 used to retrieve items and add them to the graph as edge attributes.

348 If `True`, all columns will be added except `source`, `target` and `edge_key`.

349 If `None`, no edge attributes are added to the graph.

350

351 create_using : NetworkX graph constructor, optional (default=nx.Graph)

352 Graph type to create. If graph instance, then cleared before populated.

353

354 edge_key : str or None, optional (default=None)

355 A valid column name for the edge keys (for a MultiGraph). The values in

356 this column are used for the edge keys when adding edges if create_using

357 is a multigraph.

358

359 Notes

360 -----

361 If you have node attributes stored in a separate dataframe `df_nodes`,

362 you can load those attributes to the graph `G` using the following code::

363

364 df_nodes = pd.DataFrame({"node_id": [1, 2, 3], "attribute1": ["A", "B", "C"]})

365 G.add_nodes_from((n, dict(d)) for n, d in df_nodes.iterrows())

366

367 See Also

368 --------

369 to_pandas_edgelist

370

371 Examples

372 --------

373 Simple integer weights on edges:

374

375 >>> import pandas as pd

376 >>> pd.options.display.max_columns = 20

377 >>> import numpy as np

378 >>> rng = np.random.RandomState(seed=5)

379 >>> ints = rng.randint(1, 11, size=(3, 2))

380 >>> a = ["A", "B", "C"]

381 >>> b = ["D", "A", "E"]

382 >>> df = pd.DataFrame(ints, columns=["weight", "cost"])

383 >>> df[0] = a

384 >>> df["b"] = b

385 >>> df[["weight", "cost", 0, "b"]]

386 weight cost 0 b

387 0 4 7 A D

388 1 7 1 B A

389 2 10 9 C E

390 >>> G = nx.from_pandas_edgelist(df, 0, "b", ["weight", "cost"])

391 >>> G["E"]["C"]["weight"]

392 10

393 >>> G["E"]["C"]["cost"]

394 9

395 >>> edges = pd.DataFrame(

396 ... {

397 ... "source": [0, 1, 2],

398 ... "target": [2, 2, 3],

399 ... "weight": [3, 4, 5],

400 ... "color": ["red", "blue", "blue"],

401 ... }

402 ... )

403 >>> G = nx.from_pandas_edgelist(edges, edge_attr=True)

404 >>> G[0][2]["color"]

405 'red'

406

407 Build multigraph with custom keys:

408

409 >>> edges = pd.DataFrame(

410 ... {

411 ... "source": [0, 1, 2, 0],

412 ... "target": [2, 2, 3, 2],

413 ... "my_edge_key": ["A", "B", "C", "D"],

414 ... "weight": [3, 4, 5, 6],

415 ... "color": ["red", "blue", "blue", "blue"],

416 ... }

417 ... )

418 >>> G = nx.from_pandas_edgelist(

419 ... edges,

420 ... edge_key="my_edge_key",

421 ... edge_attr=["weight", "color"],

422 ... create_using=nx.MultiGraph(),

423 ... )

424 >>> G[0][2]

425 AtlasView({'A': {'weight': 3, 'color': 'red'}, 'D': {'weight': 6, 'color': 'blue'}})

426

427

428 """

429 g = nx.empty_graph(0, create_using)

430

431 if edge_attr is None:

432 if g.is_multigraph() and edge_key is not None:

433 for u, v, k in zip(df[source], df[target], df[edge_key]):

434 g.add_edge(u, v, k)

435 else:

436 g.add_edges_from(zip(df[source], df[target]))

437 return g

438

439 reserved_columns = [source, target]

440 if g.is_multigraph() and edge_key is not None:

441 reserved_columns.append(edge_key)

442

443 # Additional columns requested

444 attr_col_headings = []

445 attribute_data = []

446 if edge_attr is True:

447 attr_col_headings = [c for c in df.columns if c not in reserved_columns]

448 elif isinstance(edge_attr, list | tuple):

449 attr_col_headings = edge_attr

450 else:

451 attr_col_headings = [edge_attr]

452 if len(attr_col_headings) == 0:

453 raise nx.NetworkXError(

454 f"Invalid edge_attr argument: No columns found with name: {attr_col_headings}"

455 )

456

457 try:

458 attribute_data = zip(*[df[col] for col in attr_col_headings])

459 except (KeyError, TypeError) as err:

460 msg = f"Invalid edge_attr argument: {edge_attr}"

461 raise nx.NetworkXError(msg) from err

462

463 if g.is_multigraph():

464 # => append the edge keys from the df to the bundled data

465 if edge_key is not None:

466 try:

467 multigraph_edge_keys = df[edge_key]

468 attribute_data = zip(attribute_data, multigraph_edge_keys)

469 except (KeyError, TypeError) as err:

470 msg = f"Invalid edge_key argument: {edge_key}"

471 raise nx.NetworkXError(msg) from err

472

473 for s, t, attrs in zip(df[source], df[target], attribute_data):

474 if edge_key is not None:

475 attrs, multigraph_edge_key = attrs

476 key = g.add_edge(s, t, key=multigraph_edge_key)

477 else:

478 key = g.add_edge(s, t)

479

480 g[s][t][key].update(zip(attr_col_headings, attrs))

481 else:

482 for s, t, attrs in zip(df[source], df[target], attribute_data):

483 g.add_edge(s, t)

484 g[s][t].update(zip(attr_col_headings, attrs))

485

486 return g

487

488

489@nx._dispatchable(edge_attrs="weight")

490def to_scipy_sparse_array(G, nodelist=None, dtype=None, weight="weight", format="csr"):

491 """Returns the graph adjacency matrix as a SciPy sparse array.

492

493 Parameters

494 ----------

495 G : graph

496 The NetworkX graph used to construct the sparse array.

497

498 nodelist : list, optional

499 The rows and columns are ordered according to the nodes in `nodelist`.

500 If `nodelist` is None, then the ordering is produced by ``G.nodes()``.

501

502 dtype : NumPy data-type, optional

503 A valid NumPy dtype used to initialize the array. If None, then the

504 NumPy default is used.

505

506 weight : string or None, optional (default='weight')

507 The edge attribute that holds the numerical value used for

508 the edge weight. If None then all edge weights are 1.

509

510 format : str in {'bsr', 'csr', 'csc', 'coo', 'lil', 'dia', 'dok'}

511 The format of the sparse array to be returned (default 'csr'). For

512 some algorithms different implementations of sparse arrays

513 can perform better. See [1]_ for details.

514

515 Returns

516 -------

517 A : SciPy sparse array

518 Graph adjacency matrix.

519

520 Notes

521 -----

522 For directed graphs, matrix entry ``i, j`` corresponds to an edge from

523 ``i`` to ``j``.

524

525 The values of the adjacency matrix are populated using the edge attribute held in

526 parameter `weight`. When an edge does not have that attribute, the

527 value of the entry is 1.

528

529 For multiple edges the matrix values are the sums of the edge weights.

530

531 When `nodelist` does not contain every node in `G`, the adjacency matrix

532 is built from the subgraph of `G` that is induced by the nodes in

533 `nodelist`.

534

535 The convention used for self-loop edges in graphs is to assign the

536 diagonal matrix entry value to the weight attribute of the edge

537 (or the number 1 if the edge has no weight attribute). If the

538 alternate convention of doubling the edge weight is desired the

539 resulting array can be modified as follows::

540

541 >>> G = nx.Graph([(1, 1)])

542 >>> A = nx.to_scipy_sparse_array(G)

543 >>> A.toarray()

544 array([[1]])

545 >>> A.setdiag(A.diagonal() * 2)

546 >>> A.toarray()

547 array([[2]])

548

549 Examples

550 --------

551

552 Basic usage:

553

554 >>> G = nx.path_graph(4)

555 >>> A = nx.to_scipy_sparse_array(G)

556 >>> A # doctest: +SKIP

557 <Compressed Sparse Row sparse array of dtype 'int64'

558 with 6 stored elements and shape (4, 4)>

559

560 >>> A.toarray()

561 array([[0, 1, 0, 0],

562 [1, 0, 1, 0],

563 [0, 1, 0, 1],

564 [0, 0, 1, 0]])

565

566 .. note:: The `toarray` method is used in these examples to better visualize

567 the adjacency matrix. For a dense representation of the adjaceny matrix,

568 use `to_numpy_array` instead.

569

570 Directed graphs:

571

572 >>> G = nx.DiGraph([(0, 1), (1, 2), (2, 3)])

573 >>> nx.to_scipy_sparse_array(G).toarray()

574 array([[0, 1, 0, 0],

575 [0, 0, 1, 0],

576 [0, 0, 0, 1],

577 [0, 0, 0, 0]])

578

579 >>> H = G.reverse()

580 >>> H.edges

581 OutEdgeView([(1, 0), (2, 1), (3, 2)])

582 >>> nx.to_scipy_sparse_array(H).toarray()

583 array([[0, 0, 0, 0],

584 [1, 0, 0, 0],

585 [0, 1, 0, 0],

586 [0, 0, 1, 0]])

587

588 By default, the order of the rows/columns of the adjacency matrix is determined

589 by the ordering of the nodes in `G`:

590

591 >>> G = nx.Graph()

592 >>> G.add_nodes_from([3, 5, 0, 1])

593 >>> G.add_edges_from([(1, 3), (1, 5)])

594 >>> nx.to_scipy_sparse_array(G).toarray()

595 array([[0, 0, 0, 1],

596 [0, 0, 0, 1],

597 [0, 0, 0, 0],

598 [1, 1, 0, 0]])

599

600 The ordering of the rows can be changed with `nodelist`:

601

602 >>> ordered = [0, 1, 3, 5]

603 >>> nx.to_scipy_sparse_array(G, nodelist=ordered).toarray()

604 array([[0, 0, 0, 0],

605 [0, 0, 1, 1],

606 [0, 1, 0, 0],

607 [0, 1, 0, 0]])

608

609 If `nodelist` contains a subset of the nodes in `G`, the adjacency matrix

610 for the node-induced subgraph is produced:

611

612 >>> nx.to_scipy_sparse_array(G, nodelist=[1, 3, 5]).toarray()

613 array([[0, 1, 1],

614 [1, 0, 0],

615 [1, 0, 0]])

616

617 The values of the adjacency matrix are drawn from the edge attribute

618 specified by the `weight` parameter:

619

620 >>> G = nx.path_graph(4)

621 >>> nx.set_edge_attributes(

622 ... G, values={(0, 1): 1, (1, 2): 10, (2, 3): 2}, name="weight"

623 ... )

624 >>> nx.set_edge_attributes(

625 ... G, values={(0, 1): 50, (1, 2): 35, (2, 3): 10}, name="capacity"

626 ... )

627 >>> nx.to_scipy_sparse_array(G).toarray() # Default weight="weight"

628 array([[ 0, 1, 0, 0],

629 [ 1, 0, 10, 0],

630 [ 0, 10, 0, 2],

631 [ 0, 0, 2, 0]])

632 >>> nx.to_scipy_sparse_array(G, weight="capacity").toarray()

633 array([[ 0, 50, 0, 0],

634 [50, 0, 35, 0],

635 [ 0, 35, 0, 10],

636 [ 0, 0, 10, 0]])

637

638 Any edges that don't have a `weight` attribute default to 1:

639

640 >>> G[1][2].pop("capacity")

641 35

642 >>> nx.to_scipy_sparse_array(G, weight="capacity").toarray()

643 array([[ 0, 50, 0, 0],

644 [50, 0, 1, 0],

645 [ 0, 1, 0, 10],

646 [ 0, 0, 10, 0]])

647

648 When `G` is a multigraph, the values in the adjacency matrix are given by

649 the sum of the `weight` edge attribute over each edge key:

650

651 >>> G = nx.MultiDiGraph([(0, 1), (0, 1), (0, 1), (2, 0)])

652 >>> nx.to_scipy_sparse_array(G).toarray()

653 array([[0, 3, 0],

654 [0, 0, 0],

655 [1, 0, 0]])

656

657 References

658 ----------

659 .. [1] Scipy Dev. References, "Sparse Arrays",

660 https://docs.scipy.org/doc/scipy/reference/sparse.html

661 """

662 import scipy as sp

663

664 if len(G) == 0:

665 raise nx.NetworkXError("Graph has no nodes or edges")

666

667 if nodelist is None:

668 nodelist = list(G)

669 nlen = len(G)

670 else:

671 nlen = len(nodelist)

672 if nlen == 0:

673 raise nx.NetworkXError("nodelist has no nodes")

674 nodeset = set(G.nbunch_iter(nodelist))

675 if nlen != len(nodeset):

676 for n in nodelist:

677 if n not in G:

678 raise nx.NetworkXError(f"Node {n} in nodelist is not in G")

679 raise nx.NetworkXError("nodelist contains duplicates.")

680 if nlen < len(G):

681 G = G.subgraph(nodelist)

682

683 index = dict(zip(nodelist, range(nlen)))

684 coefficients = zip(

685 *((index[u], index[v], wt) for u, v, wt in G.edges(data=weight, default=1))

686 )

687 try:

688 row, col, data = coefficients

689 except ValueError:

690 # there is no edge in the subgraph

691 row, col, data = [], [], []

692

693 if G.is_directed():

694 A = sp.sparse.coo_array((data, (row, col)), shape=(nlen, nlen), dtype=dtype)

695 else:

696 # symmetrize matrix

697 d = data + data

698 r = row + col

699 c = col + row

700 # selfloop entries get double counted when symmetrizing

701 # so we subtract the data on the diagonal

702 selfloops = list(nx.selfloop_edges(G, data=weight, default=1))

703 if selfloops:

704 diag_index, diag_data = zip(*((index[u], -wt) for u, v, wt in selfloops))

705 d += diag_data

706 r += diag_index

707 c += diag_index

708 A = sp.sparse.coo_array((d, (r, c)), shape=(nlen, nlen), dtype=dtype)

709 try:

710 return A.asformat(format)

711 except ValueError as err:

712 raise nx.NetworkXError(f"Unknown sparse matrix format: {format}") from err

713

714

715def _csr_gen_triples(A):

716 """Converts a SciPy sparse array in **Compressed Sparse Row** format to

717 an iterable of weighted edge triples.

718

719 """

720 nrows = A.shape[0]

721 indptr, dst_indices, data = A.indptr, A.indices, A.data

722 import numpy as np

723

724 src_indices = np.repeat(np.arange(nrows), np.diff(indptr))

725 return zip(src_indices.tolist(), dst_indices.tolist(), A.data.tolist())

726

727

728def _csc_gen_triples(A):

729 """Converts a SciPy sparse array in **Compressed Sparse Column** format to

730 an iterable of weighted edge triples.

731

732 """

733 ncols = A.shape[1]

734 indptr, src_indices, data = A.indptr, A.indices, A.data

735 import numpy as np

736

737 dst_indices = np.repeat(np.arange(ncols), np.diff(indptr))

738 return zip(src_indices.tolist(), dst_indices.tolist(), A.data.tolist())

739

740

741def _coo_gen_triples(A):

742 """Converts a SciPy sparse array in **Coordinate** format to an iterable

743 of weighted edge triples.

744

745 """

746 return zip(A.row.tolist(), A.col.tolist(), A.data.tolist())

747

748

749def _dok_gen_triples(A):

750 """Converts a SciPy sparse array in **Dictionary of Keys** format to an

751 iterable of weighted edge triples.

752

753 """

754 for (r, c), v in A.items():

755 # Use `v.item()` to convert a NumPy scalar to the appropriate Python scalar

756 yield int(r), int(c), v.item()

757

758

759def _generate_weighted_edges(A):

760 """Returns an iterable over (u, v, w) triples, where u and v are adjacent

761 vertices and w is the weight of the edge joining u and v.

762

763 `A` is a SciPy sparse array (in any format).

764

765 """

766 if A.format == "csr":

767 return _csr_gen_triples(A)

768 if A.format == "csc":

769 return _csc_gen_triples(A)

770 if A.format == "dok":

771 return _dok_gen_triples(A)

772 # If A is in any other format (including COO), convert it to COO format.

773 return _coo_gen_triples(A.tocoo())

774

775

776@nx._dispatchable(graphs=None, returns_graph=True)

777def from_scipy_sparse_array(

778 A, parallel_edges=False, create_using=None, edge_attribute="weight"

779):

780 """Creates a new graph from an adjacency matrix given as a SciPy sparse

781 array.

782

783 Parameters

784 ----------

785 A: scipy.sparse array

786 An adjacency matrix representation of a graph

787

788 parallel_edges : Boolean

789 If this is True, `create_using` is a multigraph, and `A` is an

790 integer matrix, then entry *(i, j)* in the matrix is interpreted as the

791 number of parallel edges joining vertices *i* and *j* in the graph.

792 If it is False, then the entries in the matrix are interpreted as

793 the weight of a single edge joining the vertices.

794

795 create_using : NetworkX graph constructor, optional (default=nx.Graph)

796 Graph type to create. If graph instance, then cleared before populated.

797

798 edge_attribute: string

799 Name of edge attribute to store matrix numeric value. The data will

800 have the same type as the matrix entry (int, float, (real,imag)).

801

802 Notes

803 -----

804 For directed graphs, explicitly mention create_using=nx.DiGraph,

805 and entry i,j of A corresponds to an edge from i to j.

806

807 If `create_using` is :class:`networkx.MultiGraph` or

808 :class:`networkx.MultiDiGraph`, `parallel_edges` is True, and the

809 entries of `A` are of type :class:`int`, then this function returns a

810 multigraph (constructed from `create_using`) with parallel edges.

811 In this case, `edge_attribute` will be ignored.

812

813 If `create_using` indicates an undirected multigraph, then only the edges

814 indicated by the upper triangle of the matrix `A` will be added to the

815 graph.

816

817 Examples

818 --------

819 >>> import scipy as sp

820 >>> A = sp.sparse.eye(2, 2, 1)

821 >>> G = nx.from_scipy_sparse_array(A)

822

823 If `create_using` indicates a multigraph and the matrix has only integer

824 entries and `parallel_edges` is False, then the entries will be treated

825 as weights for edges joining the nodes (without creating parallel edges):

826

827 >>> A = sp.sparse.csr_array([[1, 1], [1, 2]])

828 >>> G = nx.from_scipy_sparse_array(A, create_using=nx.MultiGraph)

829 >>> G[1][1]

830 AtlasView({0: {'weight': 2}})

831

832 If `create_using` indicates a multigraph and the matrix has only integer

833 entries and `parallel_edges` is True, then the entries will be treated

834 as the number of parallel edges joining those two vertices:

835

836 >>> A = sp.sparse.csr_array([[1, 1], [1, 2]])

837 >>> G = nx.from_scipy_sparse_array(

838 ... A, parallel_edges=True, create_using=nx.MultiGraph

839 ... )

840 >>> G[1][1]

841 AtlasView({0: {'weight': 1}, 1: {'weight': 1}})

842

843 """

844 G = nx.empty_graph(0, create_using)

845 n, m = A.shape

846 if n != m:

847 raise nx.NetworkXError(f"Adjacency matrix not square: nx,ny={A.shape}")

848 # Make sure we get even the isolated nodes of the graph.

849 G.add_nodes_from(range(n))

850 # Create an iterable over (u, v, w) triples and for each triple, add an

851 # edge from u to v with weight w.

852 triples = _generate_weighted_edges(A)

853 # If the entries in the adjacency matrix are integers, the graph is a

854 # multigraph, and parallel_edges is True, then create parallel edges, each

855 # with weight 1, for each entry in the adjacency matrix. Otherwise, create

856 # one edge for each positive entry in the adjacency matrix and set the

857 # weight of that edge to be the entry in the matrix.

858 if A.dtype.kind in ("i", "u") and G.is_multigraph() and parallel_edges:

859 chain = itertools.chain.from_iterable

860 # The following line is equivalent to:

861 #

862 # for (u, v) in edges:

863 # for d in range(A[u, v]):

864 # G.add_edge(u, v, weight=1)

865 #

866 triples = chain(((u, v, 1) for d in range(w)) for (u, v, w) in triples)

867 # If we are creating an undirected multigraph, only add the edges from the

868 # upper triangle of the matrix. Otherwise, add all the edges. This relies

869 # on the fact that the vertices created in the

870 # `_generated_weighted_edges()` function are actually the row/column

871 # indices for the matrix `A`.

872 #

873 # Without this check, we run into a problem where each edge is added twice

874 # when `G.add_weighted_edges_from()` is invoked below.

875 if G.is_multigraph() and not G.is_directed():

876 triples = ((u, v, d) for u, v, d in triples if u <= v)

877 G.add_weighted_edges_from(triples, weight=edge_attribute)

878 return G

879

880

881@nx._dispatchable(edge_attrs="weight") # edge attrs may also be obtained from `dtype`

882def to_numpy_array(

883 G,

884 nodelist=None,

885 dtype=None,

886 order=None,

887 multigraph_weight=sum,

888 weight="weight",

889 nonedge=0.0,

890):

891 """Returns the graph adjacency matrix as a NumPy array.

892

893 Parameters

894 ----------

895 G : graph

896 The NetworkX graph used to construct the NumPy array.

897

898 nodelist : list, optional

899 The rows and columns are ordered according to the nodes in `nodelist`.

900 If `nodelist` is ``None``, then the ordering is produced by ``G.nodes()``.

901

902 dtype : NumPy data type, optional

903 A NumPy data type used to initialize the array. If None, then the NumPy

904 default is used. The dtype can be structured if `weight=None`, in which

905 case the dtype field names are used to look up edge attributes. The

906 result is a structured array where each named field in the dtype

907 corresponds to the adjacency for that edge attribute. See examples for

908 details.

909

910 order : {'C', 'F'}, optional

911 Whether to store multidimensional data in C- or Fortran-contiguous

912 (row- or column-wise) order in memory. If None, then the NumPy default

913 is used.

914

915 multigraph_weight : callable, optional

916 An function that determines how weights in multigraphs are handled.

917 The function should accept a sequence of weights and return a single

918 value. The default is to sum the weights of the multiple edges.

919

920 weight : string or None optional (default = 'weight')

921 The edge attribute that holds the numerical value used for

922 the edge weight. If an edge does not have that attribute, then the

923 value 1 is used instead. `weight` must be ``None`` if a structured

924 dtype is used.

925

926 nonedge : array_like (default = 0.0)

927 The value used to represent non-edges in the adjacency matrix.

928 The array values corresponding to nonedges are typically set to zero.

929 However, this could be undesirable if there are array values

930 corresponding to actual edges that also have the value zero. If so,

931 one might prefer nonedges to have some other value, such as ``nan``.

932

933 Returns

934 -------

935 A : NumPy ndarray

936 Graph adjacency matrix

937

938 Raises

939 ------

940 NetworkXError

941 If `dtype` is a structured dtype and `G` is a multigraph

942 ValueError

943 If `dtype` is a structured dtype and `weight` is not `None`

944

945 See Also

946 --------

947 from_numpy_array

948

949 Notes

950 -----

951 For directed graphs, entry ``i, j`` corresponds to an edge from ``i`` to ``j``.

952

953 Entries in the adjacency matrix are given by the `weight` edge attribute.

954 When an edge does not have a weight attribute, the value of the entry is

955 set to the number 1. For multiple (parallel) edges, the values of the

956 entries are determined by the `multigraph_weight` parameter. The default is

957 to sum the weight attributes for each of the parallel edges.

958

959 When `nodelist` does not contain every node in `G`, the adjacency matrix is

960 built from the subgraph of `G` that is induced by the nodes in `nodelist`.

961

962 The convention used for self-loop edges in graphs is to assign the

963 diagonal array entry value to the weight attribute of the edge

964 (or the number 1 if the edge has no weight attribute). If the

965 alternate convention of doubling the edge weight is desired the

966 resulting NumPy array can be modified as follows:

967

968 >>> import numpy as np

969 >>> G = nx.Graph([(1, 1)])

970 >>> A = nx.to_numpy_array(G)

971 >>> A

972 array([[1.]])

973 >>> A[np.diag_indices_from(A)] *= 2

974 >>> A

975 array([[2.]])

976

977 Examples

978 --------

979 >>> G = nx.MultiDiGraph()

980 >>> G.add_edge(0, 1, weight=2)

981 0

982 >>> G.add_edge(1, 0)

983 0

984 >>> G.add_edge(2, 2, weight=3)

985 0

986 >>> G.add_edge(2, 2)

987 1

988 >>> nx.to_numpy_array(G, nodelist=[0, 1, 2])

989 array([[0., 2., 0.],

990 [1., 0., 0.],

991 [0., 0., 4.]])

992

993 When `nodelist` argument is used, nodes of `G` which do not appear in the `nodelist`

994 and their edges are not included in the adjacency matrix. Here is an example:

995

996 >>> G = nx.Graph()

997 >>> G.add_edge(3, 1)

998 >>> G.add_edge(2, 0)

999 >>> G.add_edge(2, 1)

1000 >>> G.add_edge(3, 0)

1001 >>> nx.to_numpy_array(G, nodelist=[1, 2, 3])

1002 array([[0., 1., 1.],

1003 [1., 0., 0.],

1004 [1., 0., 0.]])

1005

1006 This function can also be used to create adjacency matrices for multiple

1007 edge attributes with structured dtypes:

1008

1009 >>> G = nx.Graph()

1010 >>> G.add_edge(0, 1, weight=10)

1011 >>> G.add_edge(1, 2, cost=5)

1012 >>> G.add_edge(2, 3, weight=3, cost=-4.0)

1013 >>> dtype = np.dtype([("weight", int), ("cost", float)])

1014 >>> A = nx.to_numpy_array(G, dtype=dtype, weight=None)

1015 >>> A["weight"]

1016 array([[ 0, 10, 0, 0],

1017 [10, 0, 1, 0],

1018 [ 0, 1, 0, 3],

1019 [ 0, 0, 3, 0]])

1020 >>> A["cost"]

1021 array([[ 0., 1., 0., 0.],

1022 [ 1., 0., 5., 0.],

1023 [ 0., 5., 0., -4.],

1024 [ 0., 0., -4., 0.]])

1025

1026 As stated above, the argument "nonedge" is useful especially when there are

1027 actually edges with weight 0 in the graph. Setting a nonedge value different than 0,

1028 makes it much clearer to differentiate such 0-weighted edges and actual nonedge values.

1029

1030 >>> G = nx.Graph()

1031 >>> G.add_edge(3, 1, weight=2)

1032 >>> G.add_edge(2, 0, weight=0)

1033 >>> G.add_edge(2, 1, weight=0)

1034 >>> G.add_edge(3, 0, weight=1)

1035 >>> nx.to_numpy_array(G, nonedge=-1.0)

1036 array([[-1., 2., -1., 1.],

1037 [ 2., -1., 0., -1.],

1038 [-1., 0., -1., 0.],

1039 [ 1., -1., 0., -1.]])

1040 """

1041 import numpy as np

1042

1043 if nodelist is None:

1044 nodelist = list(G)

1045 nlen = len(nodelist)

1046

1047 # Input validation

1048 nodeset = set(nodelist)

1049 if nodeset - set(G):

1050 raise nx.NetworkXError(f"Nodes {nodeset - set(G)} in nodelist is not in G")

1051 if len(nodeset) < nlen:

1052 raise nx.NetworkXError("nodelist contains duplicates.")

1053

1054 A = np.full((nlen, nlen), fill_value=nonedge, dtype=dtype, order=order)

1055

1056 # Corner cases: empty nodelist or graph without any edges

1057 if nlen == 0 or G.number_of_edges() == 0:

1058 return A

1059

1060 # If dtype is structured and weight is None, use dtype field names as

1061 # edge attributes

1062 edge_attrs = None # Only single edge attribute by default

1063 if A.dtype.names:

1064 if weight is None:

1065 edge_attrs = dtype.names

1066 else:

1067 raise ValueError(

1068 "Specifying `weight` not supported for structured dtypes\n."

1069 "To create adjacency matrices from structured dtypes, use `weight=None`."

1070 )

1071

1072 # Map nodes to row/col in matrix

1073 idx = dict(zip(nodelist, range(nlen)))

1074 if len(nodelist) < len(G):

1075 G = G.subgraph(nodelist).copy()

1076

1077 # Collect all edge weights and reduce with `multigraph_weights`

1078 if G.is_multigraph():

1079 if edge_attrs:

1080 raise nx.NetworkXError(

1081 "Structured arrays are not supported for MultiGraphs"

1082 )

1083 d = defaultdict(list)

1084 for u, v, wt in G.edges(data=weight, default=1.0):

1085 d[(idx[u], idx[v])].append(wt)

1086 i, j = np.array(list(d.keys())).T # indices

1087 wts = [multigraph_weight(ws) for ws in d.values()] # reduced weights

1088 else:

1089 i, j, wts = [], [], []

1090

1091 # Special branch: multi-attr adjacency from structured dtypes

1092 if edge_attrs:

1093 # Extract edges with all data

1094 for u, v, data in G.edges(data=True):

1095 i.append(idx[u])

1096 j.append(idx[v])

1097 wts.append(data)

1098 # Map each attribute to the appropriate named field in the

1099 # structured dtype

1100 for attr in edge_attrs:

1101 attr_data = [wt.get(attr, 1.0) for wt in wts]

1102 A[attr][i, j] = attr_data

1103 if not G.is_directed():

1104 A[attr][j, i] = attr_data

1105 return A

1106

1107 for u, v, wt in G.edges(data=weight, default=1.0):

1108 i.append(idx[u])

1109 j.append(idx[v])

1110 wts.append(wt)

1111

1112 # Set array values with advanced indexing

1113 A[i, j] = wts

1114 if not G.is_directed():

1115 A[j, i] = wts

1116

1117 return A

1118

1119

1120@nx._dispatchable(graphs=None, returns_graph=True)

1121def from_numpy_array(

1122 A, parallel_edges=False, create_using=None, edge_attr="weight", *, nodelist=None

1123):

1124 """Returns a graph from a 2D NumPy array.

1125

1126 The 2D NumPy array is interpreted as an adjacency matrix for the graph.

1127

1128 Parameters

1129 ----------

1130 A : a 2D numpy.ndarray

1131 An adjacency matrix representation of a graph

1132

1133 parallel_edges : Boolean

1134 If this is True, `create_using` is a multigraph, and `A` is an

1135 integer array, then entry *(i, j)* in the array is interpreted as the

1136 number of parallel edges joining vertices *i* and *j* in the graph.

1137 If it is False, then the entries in the array are interpreted as

1138 the weight of a single edge joining the vertices.

1139

1140 create_using : NetworkX graph constructor, optional (default=nx.Graph)

1141 Graph type to create. If graph instance, then cleared before populated.

1142

1143 edge_attr : String, optional (default="weight")

1144 The attribute to which the array values are assigned on each edge. If

1145 it is None, edge attributes will not be assigned.

1146

1147 nodelist : sequence of nodes, optional

1148 A sequence of objects to use as the nodes in the graph. If provided, the

1149 list of nodes must be the same length as the dimensions of `A`. The

1150 default is `None`, in which case the nodes are drawn from ``range(n)``.

1151

1152 Notes

1153 -----

1154 For directed graphs, explicitly mention create_using=nx.DiGraph,

1155 and entry i,j of A corresponds to an edge from i to j.

1156

1157 If `create_using` is :class:`networkx.MultiGraph` or

1158 :class:`networkx.MultiDiGraph`, `parallel_edges` is True, and the

1159 entries of `A` are of type :class:`int`, then this function returns a

1160 multigraph (of the same type as `create_using`) with parallel edges.

1161

1162 If `create_using` indicates an undirected multigraph, then only the edges

1163 indicated by the upper triangle of the array `A` will be added to the

1164 graph.

1165

1166 If `edge_attr` is Falsy (False or None), edge attributes will not be

1167 assigned, and the array data will be treated like a binary mask of

1168 edge presence or absence. Otherwise, the attributes will be assigned

1169 as follows:

1170

1171 If the NumPy array has a single data type for each array entry it

1172 will be converted to an appropriate Python data type.

1173

1174 If the NumPy array has a user-specified compound data type the names

1175 of the data fields will be used as attribute keys in the resulting

1176 NetworkX graph.

1177

1178 See Also

1179 --------

1180 to_numpy_array

1181

1182 Examples

1183 --------

1184 Simple integer weights on edges:

1185

1186 >>> import numpy as np

1187 >>> A = np.array([[1, 1], [2, 1]])

1188 >>> G = nx.from_numpy_array(A)

1189 >>> G.edges(data=True)

1190 EdgeDataView([(0, 0, {'weight': 1}), (0, 1, {'weight': 2}), (1, 1, {'weight': 1})])

1191

1192 If `create_using` indicates a multigraph and the array has only integer

1193 entries and `parallel_edges` is False, then the entries will be treated

1194 as weights for edges joining the nodes (without creating parallel edges):

1195

1196 >>> A = np.array([[1, 1], [1, 2]])

1197 >>> G = nx.from_numpy_array(A, create_using=nx.MultiGraph)

1198 >>> G[1][1]

1199 AtlasView({0: {'weight': 2}})

1200

1201 If `create_using` indicates a multigraph and the array has only integer

1202 entries and `parallel_edges` is True, then the entries will be treated

1203 as the number of parallel edges joining those two vertices:

1204

1205 >>> A = np.array([[1, 1], [1, 2]])

1206 >>> temp = nx.MultiGraph()

1207 >>> G = nx.from_numpy_array(A, parallel_edges=True, create_using=temp)

1208 >>> G[1][1]

1209 AtlasView({0: {'weight': 1}, 1: {'weight': 1}})

1210

1211 User defined compound data type on edges:

1212

1213 >>> dt = [("weight", float), ("cost", int)]

1214 >>> A = np.array([[(1.0, 2)]], dtype=dt)

1215 >>> G = nx.from_numpy_array(A)

1216 >>> G.edges()

1217 EdgeView([(0, 0)])

1218 >>> G[0][0]["cost"]

1219 2

1220 >>> G[0][0]["weight"]

1221 1.0

1222

1223 """

1224 kind_to_python_type = {

1225 "f": float,

1226 "i": int,

1227 "u": int,

1228 "b": bool,

1229 "c": complex,

1230 "S": str,

1231 "U": str,

1232 "V": "void",

1233 }

1234 G = nx.empty_graph(0, create_using)

1235 if A.ndim != 2:

1236 raise nx.NetworkXError(f"Input array must be 2D, not {A.ndim}")

1237 n, m = A.shape

1238 if n != m:

1239 raise nx.NetworkXError(f"Adjacency matrix not square: nx,ny={A.shape}")

1240 dt = A.dtype

1241 try:

1242 python_type = kind_to_python_type[dt.kind]

1243 except Exception as err:

1244 raise TypeError(f"Unknown numpy data type: {dt}") from err

1245 if _default_nodes := (nodelist is None):

1246 nodelist = range(n)

1247 else:

1248 if len(nodelist) != n:

1249 raise ValueError("nodelist must have the same length as A.shape[0]")

1250

1251 # Make sure we get even the isolated nodes of the graph.

1252 G.add_nodes_from(nodelist)

1253 # Get a list of all the entries in the array with nonzero entries. These

1254 # coordinates become edges in the graph. (convert to int from np.int64)

1255 edges = ((int(e[0]), int(e[1])) for e in zip(*A.nonzero()))

1256 # handle numpy constructed data type

1257 if python_type == "void":

1258 # Sort the fields by their offset, then by dtype, then by name.

1259 fields = sorted(

1260 (offset, dtype, name) for name, (dtype, offset) in A.dtype.fields.items()

1261 )

1262 triples = (

1263 (

1264 u,

1265 v,

1266 {}

1267 if edge_attr in [False, None]

1268 else {

1269 name: kind_to_python_type[dtype.kind](val)

1270 for (_, dtype, name), val in zip(fields, A[u, v])

1271 },

1272 )

1273 for u, v in edges

1274 )

1275 # If the entries in the adjacency matrix are integers, the graph is a

1276 # multigraph, and parallel_edges is True, then create parallel edges, each

1277 # with weight 1, for each entry in the adjacency matrix. Otherwise, create

1278 # one edge for each positive entry in the adjacency matrix and set the

1279 # weight of that edge to be the entry in the matrix.

1280 elif python_type is int and G.is_multigraph() and parallel_edges:

1281 chain = itertools.chain.from_iterable

1282 # The following line is equivalent to:

1283 #

1284 # for (u, v) in edges:

1285 # for d in range(A[u, v]):

1286 # G.add_edge(u, v, weight=1)

1287 #

1288 if edge_attr in [False, None]:

1289 triples = chain(((u, v, {}) for d in range(A[u, v])) for (u, v) in edges)

1290 else:

1291 triples = chain(

1292 ((u, v, {edge_attr: 1}) for d in range(A[u, v])) for (u, v) in edges

1293 )

1294 else: # basic data type

1295 if edge_attr in [False, None]:

1296 triples = ((u, v, {}) for u, v in edges)

1297 else:

1298 triples = ((u, v, {edge_attr: python_type(A[u, v])}) for u, v in edges)

1299 # If we are creating an undirected multigraph, only add the edges from the

1300 # upper triangle of the matrix. Otherwise, add all the edges. This relies

1301 # on the fact that the vertices created in the

1302 # `_generated_weighted_edges()` function are actually the row/column

1303 # indices for the matrix `A`.

1304 #

1305 # Without this check, we run into a problem where each edge is added twice

1306 # when `G.add_edges_from()` is invoked below.

1307 if G.is_multigraph() and not G.is_directed():

1308 triples = ((u, v, d) for u, v, d in triples if u <= v)

1309 # Remap nodes if user provided custom `nodelist`

1310 if not _default_nodes:

1311 idx_to_node = dict(enumerate(nodelist))

1312 triples = ((idx_to_node[u], idx_to_node[v], d) for u, v, d in triples)

1313 G.add_edges_from(triples)

1314 return G

Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/networkx/convert_matrix.py: 10%

259 statements