1"""
2Functions for constructing matrix-like objects from graph attributes.
3"""
4
5import networkx as nx
6
7__all__ = ["attr_matrix", "attr_sparse_matrix"]
8
9
10def _node_value(G, node_attr):
11 """Returns a function that returns a value from G.nodes[u].
12
13 We return a function expecting a node as its sole argument. Then, in the
14 simplest scenario, the returned function will return G.nodes[u][node_attr].
15 However, we also handle the case when `node_attr` is None (returns the node)
16 or when `node_attr` is a function itself.
17
18 Parameters
19 ----------
20 G : graph
21 A NetworkX graph
22
23 node_attr : {None, str, callable}
24 Specification of how the value of the node attribute should be obtained
25 from the node attribute dictionary.
26
27 Returns
28 -------
29 value : function
30 A function expecting a node as its sole argument. The function will
31 returns a value from G.nodes[u] that depends on `edge_attr`.
32
33 """
34 if node_attr is None:
35
36 def value(u):
37 return u
38
39 elif not callable(node_attr):
40 # assume it is a key for the node attribute dictionary
41 def value(u):
42 return G.nodes[u][node_attr]
43
44 else:
45 # Advanced: Allow users to specify something else.
46 #
47 # For example,
48 # node_attr = lambda u: G.nodes[u].get('size', .5) * 3
49 #
50 value = node_attr
51
52 return value
53
54
55def _edge_value(G, edge_attr):
56 """Returns a function that returns a value from G[u][v].
57
58 Suppose there exists an edge between u and v. Then we return a function
59 expecting u and v as arguments. For Graph and DiGraph, G[u][v] is
60 the edge attribute dictionary, and the function (essentially) returns
61 G[u][v][edge_attr]. However, we also handle cases when `edge_attr` is None
62 and when it is a function itself. For MultiGraph and MultiDiGraph, G[u][v]
63 is a dictionary of all edges between u and v. In this case, the returned
64 function sums the value of `edge_attr` for every edge between u and v.
65
66 Parameters
67 ----------
68 G : graph
69 A NetworkX graph
70
71 edge_attr : {None, str, callable}
72 Specification of how the value of the edge attribute should be obtained
73 from the edge attribute dictionary, G[u][v]. For multigraphs, G[u][v]
74 is a dictionary of all the edges between u and v. This allows for
75 special treatment of multiedges.
76
77 Returns
78 -------
79 value : function
80 A function expecting two nodes as parameters. The nodes should
81 represent the from- and to- node of an edge. The function will
82 return a value from G[u][v] that depends on `edge_attr`.
83
84 """
85
86 if edge_attr is None:
87 # topological count of edges
88
89 if G.is_multigraph():
90
91 def value(u, v):
92 return len(G[u][v])
93
94 else:
95
96 def value(u, v):
97 return 1
98
99 elif not callable(edge_attr):
100 # assume it is a key for the edge attribute dictionary
101
102 if edge_attr == "weight":
103 # provide a default value
104 if G.is_multigraph():
105
106 def value(u, v):
107 return sum(d.get(edge_attr, 1) for d in G[u][v].values())
108
109 else:
110
111 def value(u, v):
112 return G[u][v].get(edge_attr, 1)
113
114 else:
115 # otherwise, the edge attribute MUST exist for each edge
116 if G.is_multigraph():
117
118 def value(u, v):
119 return sum(d[edge_attr] for d in G[u][v].values())
120
121 else:
122
123 def value(u, v):
124 return G[u][v][edge_attr]
125
126 else:
127 # Advanced: Allow users to specify something else.
128 #
129 # Alternative default value:
130 # edge_attr = lambda u,v: G[u][v].get('thickness', .5)
131 #
132 # Function on an attribute:
133 # edge_attr = lambda u,v: abs(G[u][v]['weight'])
134 #
135 # Handle Multi(Di)Graphs differently:
136 # edge_attr = lambda u,v: numpy.prod([d['size'] for d in G[u][v].values()])
137 #
138 # Ignore multiple edges
139 # edge_attr = lambda u,v: 1 if len(G[u][v]) else 0
140 #
141 value = edge_attr
142
143 return value
144
145
146@nx._dispatchable(edge_attrs={"edge_attr": None}, node_attrs="node_attr")
147def attr_matrix(
148 G,
149 edge_attr=None,
150 node_attr=None,
151 normalized=False,
152 rc_order=None,
153 dtype=None,
154 order=None,
155):
156 """Returns the attribute matrix using attributes from `G` as a numpy array.
157
158 If only `G` is passed in, then the adjacency matrix is constructed.
159
160 Let A be a discrete set of values for the node attribute `node_attr`. Then
161 the elements of A represent the rows and columns of the constructed matrix.
162 Now, iterate through every edge e=(u,v) in `G` and consider the value
163 of the edge attribute `edge_attr`. If ua and va are the values of the
164 node attribute `node_attr` for u and v, respectively, then the value of
165 the edge attribute is added to the matrix element at (ua, va).
166
167 Parameters
168 ----------
169 G : graph
170 The NetworkX graph used to construct the attribute matrix.
171
172 edge_attr : str, optional (default: number of edges for each matrix element)
173 Each element of the matrix represents a running total of the
174 specified edge attribute for edges whose node attributes correspond
175 to the rows/cols of the matrix. The attribute must be present for
176 all edges in the graph. If no attribute is specified, then we
177 just count the number of edges whose node attributes correspond
178 to the matrix element.
179
180 node_attr : str, optional (default: use nodes of the graph)
181 Each row and column in the matrix represents a particular value
182 of the node attribute. The attribute must be present for all nodes
183 in the graph. Note, the values of this attribute should be reliably
184 hashable. So, float values are not recommended. If no attribute is
185 specified, then the rows and columns will be the nodes of the graph.
186
187 normalized : bool, optional (default: False)
188 If True, then each row is normalized by the summation of its values.
189
190 rc_order : list, optional (default: order of nodes in G)
191 A list of the node attribute values. This list specifies the ordering
192 of rows and columns of the array. If no ordering is provided, then
193 the ordering will be the same as the node order in `G`.
194 When `rc_order` is `None`, the function returns a 2-tuple ``(matrix, ordering)``
195
196 Other Parameters
197 ----------------
198 dtype : NumPy data-type, optional
199 A valid NumPy dtype used to initialize the array. Keep in mind certain
200 dtypes can yield unexpected results if the array is to be normalized.
201 The parameter is passed to numpy.zeros(). If unspecified, the NumPy
202 default is used.
203
204 order : {'C', 'F'}, optional
205 Whether to store multidimensional data in C- or Fortran-contiguous
206 (row- or column-wise) order in memory. This parameter is passed to
207 numpy.zeros(). If unspecified, the NumPy default is used.
208
209 Returns
210 -------
211 M : 2D NumPy ndarray
212 The attribute matrix.
213
214 ordering : list
215 If `rc_order` was specified, then only the attribute matrix is returned.
216 However, if `rc_order` was None, then the ordering used to construct
217 the matrix is returned as well.
218
219 Examples
220 --------
221 Construct an adjacency matrix:
222
223 >>> G = nx.Graph()
224 >>> G.add_edge(0, 1, thickness=1, weight=3)
225 >>> G.add_edge(0, 2, thickness=2)
226 >>> G.add_edge(1, 2, thickness=3)
227 >>> nx.attr_matrix(G, rc_order=[0, 1, 2])
228 array([[0., 1., 1.],
229 [1., 0., 1.],
230 [1., 1., 0.]])
231
232 Alternatively, we can obtain the matrix describing edge thickness.
233
234 >>> nx.attr_matrix(G, edge_attr="thickness", rc_order=[0, 1, 2])
235 array([[0., 1., 2.],
236 [1., 0., 3.],
237 [2., 3., 0.]])
238
239 We can also color the nodes and ask for the probability distribution over
240 all edges (u,v) describing:
241
242 Pr(v has color Y | u has color X)
243
244 >>> G.nodes[0]["color"] = "red"
245 >>> G.nodes[1]["color"] = "red"
246 >>> G.nodes[2]["color"] = "blue"
247 >>> rc = ["red", "blue"]
248 >>> nx.attr_matrix(G, node_attr="color", normalized=True, rc_order=rc)
249 array([[0.33333333, 0.66666667],
250 [1. , 0. ]])
251
252 For example, the above tells us that for all edges (u,v):
253
254 Pr( v is red | u is red) = 1/3
255 Pr( v is blue | u is red) = 2/3
256
257 Pr( v is red | u is blue) = 1
258 Pr( v is blue | u is blue) = 0
259
260 Finally, we can obtain the total weights listed by the node colors.
261
262 >>> nx.attr_matrix(G, edge_attr="weight", node_attr="color", rc_order=rc)
263 array([[3., 2.],
264 [2., 0.]])
265
266 Thus, the total weight over all edges (u,v) with u and v having colors:
267
268 (red, red) is 3 # the sole contribution is from edge (0,1)
269 (red, blue) is 2 # contributions from edges (0,2) and (1,2)
270 (blue, red) is 2 # same as (red, blue) since graph is undirected
271 (blue, blue) is 0 # there are no edges with blue endpoints
272
273 """
274 import numpy as np
275
276 edge_value = _edge_value(G, edge_attr)
277 node_value = _node_value(G, node_attr)
278
279 if rc_order is None:
280 ordering = list({node_value(n) for n in G})
281 else:
282 ordering = rc_order
283
284 N = len(ordering)
285 undirected = not G.is_directed()
286 index = dict(zip(ordering, range(N)))
287 M = np.zeros((N, N), dtype=dtype, order=order)
288
289 seen = set()
290 for u, nbrdict in G.adjacency():
291 for v in nbrdict:
292 # Obtain the node attribute values.
293 i, j = index[node_value(u)], index[node_value(v)]
294 if v not in seen:
295 M[i, j] += edge_value(u, v)
296 if undirected:
297 M[j, i] = M[i, j]
298
299 if undirected:
300 seen.add(u)
301
302 if normalized:
303 M /= M.sum(axis=1).reshape((N, 1))
304
305 if rc_order is None:
306 return M, ordering
307 else:
308 return M
309
310
311@nx._dispatchable(edge_attrs={"edge_attr": None}, node_attrs="node_attr")
312def attr_sparse_matrix(
313 G, edge_attr=None, node_attr=None, normalized=False, rc_order=None, dtype=None
314):
315 """Returns a SciPy sparse array using attributes from G.
316
317 If only `G` is passed in, then the adjacency matrix is constructed.
318
319 Let A be a discrete set of values for the node attribute `node_attr`. Then
320 the elements of A represent the rows and columns of the constructed matrix.
321 Now, iterate through every edge e=(u,v) in `G` and consider the value
322 of the edge attribute `edge_attr`. If ua and va are the values of the
323 node attribute `node_attr` for u and v, respectively, then the value of
324 the edge attribute is added to the matrix element at (ua, va).
325
326 Parameters
327 ----------
328 G : graph
329 The NetworkX graph used to construct the NumPy matrix.
330
331 edge_attr : str, optional (default: number of edges for each matrix element)
332 Each element of the matrix represents a running total of the
333 specified edge attribute for edges whose node attributes correspond
334 to the rows/cols of the matrix. The attribute must be present for
335 all edges in the graph. If no attribute is specified, then we
336 just count the number of edges whose node attributes correspond
337 to the matrix element.
338
339 node_attr : str, optional (default: use nodes of the graph)
340 Each row and column in the matrix represents a particular value
341 of the node attribute. The attribute must be present for all nodes
342 in the graph. Note, the values of this attribute should be reliably
343 hashable. So, float values are not recommended. If no attribute is
344 specified, then the rows and columns will be the nodes of the graph.
345
346 normalized : bool, optional (default: False)
347 If True, then each row is normalized by the summation of its values.
348
349 rc_order : list, optional (default: order of nodes in G)
350 A list of the node attribute values. This list specifies the ordering
351 of rows and columns of the array and the return value. If no ordering
352 is provided, then the ordering will be that of nodes in `G`.
353
354 Other Parameters
355 ----------------
356 dtype : NumPy data-type, optional
357 A valid NumPy dtype used to initialize the array. Keep in mind certain
358 dtypes can yield unexpected results if the array is to be normalized.
359 The parameter is passed to numpy.zeros(). If unspecified, the NumPy
360 default is used.
361
362 Returns
363 -------
364 M : SciPy sparse array
365 The attribute matrix.
366
367 ordering : list
368 If `rc_order` was specified, then only the matrix is returned.
369 However, if `rc_order` was None, then the ordering used to construct
370 the matrix is returned as well.
371
372 Examples
373 --------
374 Construct an adjacency matrix:
375
376 >>> G = nx.Graph()
377 >>> G.add_edge(0, 1, thickness=1, weight=3)
378 >>> G.add_edge(0, 2, thickness=2)
379 >>> G.add_edge(1, 2, thickness=3)
380 >>> M = nx.attr_sparse_matrix(G, rc_order=[0, 1, 2])
381 >>> M.toarray()
382 array([[0., 1., 1.],
383 [1., 0., 1.],
384 [1., 1., 0.]])
385
386 Alternatively, we can obtain the matrix describing edge thickness.
387
388 >>> M = nx.attr_sparse_matrix(G, edge_attr="thickness", rc_order=[0, 1, 2])
389 >>> M.toarray()
390 array([[0., 1., 2.],
391 [1., 0., 3.],
392 [2., 3., 0.]])
393
394 We can also color the nodes and ask for the probability distribution over
395 all edges (u,v) describing:
396
397 Pr(v has color Y | u has color X)
398
399 >>> G.nodes[0]["color"] = "red"
400 >>> G.nodes[1]["color"] = "red"
401 >>> G.nodes[2]["color"] = "blue"
402 >>> rc = ["red", "blue"]
403 >>> M = nx.attr_sparse_matrix(G, node_attr="color", normalized=True, rc_order=rc)
404 >>> M.toarray()
405 array([[0.33333333, 0.66666667],
406 [1. , 0. ]])
407
408 For example, the above tells us that for all edges (u,v):
409
410 Pr( v is red | u is red) = 1/3
411 Pr( v is blue | u is red) = 2/3
412
413 Pr( v is red | u is blue) = 1
414 Pr( v is blue | u is blue) = 0
415
416 Finally, we can obtain the total weights listed by the node colors.
417
418 >>> M = nx.attr_sparse_matrix(G, edge_attr="weight", node_attr="color", rc_order=rc)
419 >>> M.toarray()
420 array([[3., 2.],
421 [2., 0.]])
422
423 Thus, the total weight over all edges (u,v) with u and v having colors:
424
425 (red, red) is 3 # the sole contribution is from edge (0,1)
426 (red, blue) is 2 # contributions from edges (0,2) and (1,2)
427 (blue, red) is 2 # same as (red, blue) since graph is undirected
428 (blue, blue) is 0 # there are no edges with blue endpoints
429
430 """
431 import numpy as np
432 import scipy as sp
433
434 edge_value = _edge_value(G, edge_attr)
435 node_value = _node_value(G, node_attr)
436
437 if rc_order is None:
438 ordering = list({node_value(n) for n in G})
439 else:
440 ordering = rc_order
441
442 N = len(ordering)
443 undirected = not G.is_directed()
444 index = dict(zip(ordering, range(N)))
445 M = sp.sparse.lil_array((N, N), dtype=dtype)
446
447 seen = set()
448 for u, nbrdict in G.adjacency():
449 for v in nbrdict:
450 # Obtain the node attribute values.
451 i, j = index[node_value(u)], index[node_value(v)]
452 if v not in seen:
453 M[i, j] += edge_value(u, v)
454 if undirected:
455 M[j, i] = M[i, j]
456
457 if undirected:
458 seen.add(u)
459
460 if normalized:
461 M *= 1 / M.sum(axis=1)[:, np.newaxis] # in-place mult preserves sparse
462
463 if rc_order is None:
464 return M, ordering
465 else:
466 return M