Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/networkx/algorithms/graph_hashing.py: 23%
48 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-10-20 07:00 +0000
« prev ^ index » next coverage.py v7.3.2, created at 2023-10-20 07:00 +0000
1"""
2Functions for hashing graphs to strings.
3Isomorphic graphs should be assigned identical hashes.
4For now, only Weisfeiler-Lehman hashing is implemented.
5"""
7from collections import Counter, defaultdict
8from hashlib import blake2b
10import networkx as nx
12__all__ = ["weisfeiler_lehman_graph_hash", "weisfeiler_lehman_subgraph_hashes"]
15def _hash_label(label, digest_size):
16 return blake2b(label.encode("ascii"), digest_size=digest_size).hexdigest()
19def _init_node_labels(G, edge_attr, node_attr):
20 if node_attr:
21 return {u: str(dd[node_attr]) for u, dd in G.nodes(data=True)}
22 elif edge_attr:
23 return {u: "" for u in G}
24 else:
25 return {u: str(deg) for u, deg in G.degree()}
28def _neighborhood_aggregate(G, node, node_labels, edge_attr=None):
29 """
30 Compute new labels for given node by aggregating
31 the labels of each node's neighbors.
32 """
33 label_list = []
34 for nbr in G.neighbors(node):
35 prefix = "" if edge_attr is None else str(G[node][nbr][edge_attr])
36 label_list.append(prefix + node_labels[nbr])
37 return node_labels[node] + "".join(sorted(label_list))
40@nx._dispatch(edge_attrs={"edge_attr": None}, node_attrs="node_attr")
41def weisfeiler_lehman_graph_hash(
42 G, edge_attr=None, node_attr=None, iterations=3, digest_size=16
43):
44 """Return Weisfeiler Lehman (WL) graph hash.
46 The function iteratively aggregates and hashes neighbourhoods of each node.
47 After each node's neighbors are hashed to obtain updated node labels,
48 a hashed histogram of resulting labels is returned as the final hash.
50 Hashes are identical for isomorphic graphs and strong guarantees that
51 non-isomorphic graphs will get different hashes. See [1]_ for details.
53 If no node or edge attributes are provided, the degree of each node
54 is used as its initial label.
55 Otherwise, node and/or edge labels are used to compute the hash.
57 Parameters
58 ----------
59 G: graph
60 The graph to be hashed.
61 Can have node and/or edge attributes. Can also have no attributes.
62 edge_attr: string, default=None
63 The key in edge attribute dictionary to be used for hashing.
64 If None, edge labels are ignored.
65 node_attr: string, default=None
66 The key in node attribute dictionary to be used for hashing.
67 If None, and no edge_attr given, use the degrees of the nodes as labels.
68 iterations: int, default=3
69 Number of neighbor aggregations to perform.
70 Should be larger for larger graphs.
71 digest_size: int, default=16
72 Size (in bits) of blake2b hash digest to use for hashing node labels.
74 Returns
75 -------
76 h : string
77 Hexadecimal string corresponding to hash of the input graph.
79 Examples
80 --------
81 Two graphs with edge attributes that are isomorphic, except for
82 differences in the edge labels.
84 >>> G1 = nx.Graph()
85 >>> G1.add_edges_from(
86 ... [
87 ... (1, 2, {"label": "A"}),
88 ... (2, 3, {"label": "A"}),
89 ... (3, 1, {"label": "A"}),
90 ... (1, 4, {"label": "B"}),
91 ... ]
92 ... )
93 >>> G2 = nx.Graph()
94 >>> G2.add_edges_from(
95 ... [
96 ... (5, 6, {"label": "B"}),
97 ... (6, 7, {"label": "A"}),
98 ... (7, 5, {"label": "A"}),
99 ... (7, 8, {"label": "A"}),
100 ... ]
101 ... )
103 Omitting the `edge_attr` option, results in identical hashes.
105 >>> nx.weisfeiler_lehman_graph_hash(G1)
106 '7bc4dde9a09d0b94c5097b219891d81a'
107 >>> nx.weisfeiler_lehman_graph_hash(G2)
108 '7bc4dde9a09d0b94c5097b219891d81a'
110 With edge labels, the graphs are no longer assigned
111 the same hash digest.
113 >>> nx.weisfeiler_lehman_graph_hash(G1, edge_attr="label")
114 'c653d85538bcf041d88c011f4f905f10'
115 >>> nx.weisfeiler_lehman_graph_hash(G2, edge_attr="label")
116 '3dcd84af1ca855d0eff3c978d88e7ec7'
118 Notes
119 -----
120 To return the WL hashes of each subgraph of a graph, use
121 `weisfeiler_lehman_subgraph_hashes`
123 Similarity between hashes does not imply similarity between graphs.
125 References
126 ----------
127 .. [1] Shervashidze, Nino, Pascal Schweitzer, Erik Jan Van Leeuwen,
128 Kurt Mehlhorn, and Karsten M. Borgwardt. Weisfeiler Lehman
129 Graph Kernels. Journal of Machine Learning Research. 2011.
130 http://www.jmlr.org/papers/volume12/shervashidze11a/shervashidze11a.pdf
132 See also
133 --------
134 weisfeiler_lehman_subgraph_hashes
135 """
137 def weisfeiler_lehman_step(G, labels, edge_attr=None):
138 """
139 Apply neighborhood aggregation to each node
140 in the graph.
141 Computes a dictionary with labels for each node.
142 """
143 new_labels = {}
144 for node in G.nodes():
145 label = _neighborhood_aggregate(G, node, labels, edge_attr=edge_attr)
146 new_labels[node] = _hash_label(label, digest_size)
147 return new_labels
149 # set initial node labels
150 node_labels = _init_node_labels(G, edge_attr, node_attr)
152 subgraph_hash_counts = []
153 for _ in range(iterations):
154 node_labels = weisfeiler_lehman_step(G, node_labels, edge_attr=edge_attr)
155 counter = Counter(node_labels.values())
156 # sort the counter, extend total counts
157 subgraph_hash_counts.extend(sorted(counter.items(), key=lambda x: x[0]))
159 # hash the final counter
160 return _hash_label(str(tuple(subgraph_hash_counts)), digest_size)
163@nx._dispatch(edge_attrs={"edge_attr": None}, node_attrs="node_attr")
164def weisfeiler_lehman_subgraph_hashes(
165 G, edge_attr=None, node_attr=None, iterations=3, digest_size=16
166):
167 """
168 Return a dictionary of subgraph hashes by node.
170 Dictionary keys are nodes in `G`, and values are a list of hashes.
171 Each hash corresponds to a subgraph rooted at a given node u in `G`.
172 Lists of subgraph hashes are sorted in increasing order of depth from
173 their root node, with the hash at index i corresponding to a subgraph
174 of nodes at most i edges distance from u. Thus, each list will contain
175 ``iterations + 1`` elements - a hash for a subgraph at each depth, and
176 additionally a hash of the initial node label (or equivalently a
177 subgraph of depth 0)
179 The function iteratively aggregates and hashes neighbourhoods of each node.
180 This is achieved for each step by replacing for each node its label from
181 the previous iteration with its hashed 1-hop neighborhood aggregate.
182 The new node label is then appended to a list of node labels for each
183 node.
185 To aggregate neighborhoods at each step for a node $n$, all labels of
186 nodes adjacent to $n$ are concatenated. If the `edge_attr` parameter is set,
187 labels for each neighboring node are prefixed with the value of this attribute
188 along the connecting edge from this neighbor to node $n$. The resulting string
189 is then hashed to compress this information into a fixed digest size.
191 Thus, at the $i$-th iteration, nodes within $i$ hops influence any given
192 hashed node label. We can therefore say that at depth $i$ for node $n$
193 we have a hash for a subgraph induced by the $2i$-hop neighborhood of $n$.
195 The output can be used to to create general Weisfeiler-Lehman graph kernels,
196 or generate features for graphs or nodes - for example to generate 'words' in
197 a graph as seen in the 'graph2vec' algorithm.
198 See [1]_ & [2]_ respectively for details.
200 Hashes are identical for isomorphic subgraphs and there exist strong
201 guarantees that non-isomorphic graphs will get different hashes.
202 See [1]_ for details.
204 If no node or edge attributes are provided, the degree of each node
205 is used as its initial label.
206 Otherwise, node and/or edge labels are used to compute the hash.
208 Parameters
209 ----------
210 G: graph
211 The graph to be hashed.
212 Can have node and/or edge attributes. Can also have no attributes.
213 edge_attr: string, default=None
214 The key in edge attribute dictionary to be used for hashing.
215 If None, edge labels are ignored.
216 node_attr: string, default=None
217 The key in node attribute dictionary to be used for hashing.
218 If None, and no edge_attr given, use the degrees of the nodes as labels.
219 iterations: int, default=3
220 Number of neighbor aggregations to perform.
221 Should be larger for larger graphs.
222 digest_size: int, default=16
223 Size (in bits) of blake2b hash digest to use for hashing node labels.
224 The default size is 16 bits
226 Returns
227 -------
228 node_subgraph_hashes : dict
229 A dictionary with each key given by a node in G, and each value given
230 by the subgraph hashes in order of depth from the key node.
232 Examples
233 --------
234 Finding similar nodes in different graphs:
236 >>> G1 = nx.Graph()
237 >>> G1.add_edges_from([
238 ... (1, 2), (2, 3), (2, 4), (3, 5), (4, 6), (5, 7), (6, 7)
239 ... ])
240 >>> G2 = nx.Graph()
241 >>> G2.add_edges_from([
242 ... (1, 3), (2, 3), (1, 6), (1, 5), (4, 6)
243 ... ])
244 >>> g1_hashes = nx.weisfeiler_lehman_subgraph_hashes(G1, iterations=3, digest_size=8)
245 >>> g2_hashes = nx.weisfeiler_lehman_subgraph_hashes(G2, iterations=3, digest_size=8)
247 Even though G1 and G2 are not isomorphic (they have different numbers of edges),
248 the hash sequence of depth 3 for node 1 in G1 and node 5 in G2 are similar:
250 >>> g1_hashes[1]
251 ['a93b64973cfc8897', 'db1b43ae35a1878f', '57872a7d2059c1c0']
252 >>> g2_hashes[5]
253 ['a93b64973cfc8897', 'db1b43ae35a1878f', '1716d2a4012fa4bc']
255 The first 2 WL subgraph hashes match. From this we can conclude that it's very
256 likely the neighborhood of 4 hops around these nodes are isomorphic: each
257 iteration aggregates 1-hop neighbourhoods meaning hashes at depth $n$ are influenced
258 by every node within $2n$ hops.
260 However the neighborhood of 6 hops is no longer isomorphic since their 3rd hash does
261 not match.
263 These nodes may be candidates to be classified together since their local topology
264 is similar.
266 Notes
267 -----
268 To hash the full graph when subgraph hashes are not needed, use
269 `weisfeiler_lehman_graph_hash` for efficiency.
271 Similarity between hashes does not imply similarity between graphs.
273 References
274 ----------
275 .. [1] Shervashidze, Nino, Pascal Schweitzer, Erik Jan Van Leeuwen,
276 Kurt Mehlhorn, and Karsten M. Borgwardt. Weisfeiler Lehman
277 Graph Kernels. Journal of Machine Learning Research. 2011.
278 http://www.jmlr.org/papers/volume12/shervashidze11a/shervashidze11a.pdf
279 .. [2] Annamalai Narayanan, Mahinthan Chandramohan, Rajasekar Venkatesan,
280 Lihui Chen, Yang Liu and Shantanu Jaiswa. graph2vec: Learning
281 Distributed Representations of Graphs. arXiv. 2017
282 https://arxiv.org/pdf/1707.05005.pdf
284 See also
285 --------
286 weisfeiler_lehman_graph_hash
287 """
289 def weisfeiler_lehman_step(G, labels, node_subgraph_hashes, edge_attr=None):
290 """
291 Apply neighborhood aggregation to each node
292 in the graph.
293 Computes a dictionary with labels for each node.
294 Appends the new hashed label to the dictionary of subgraph hashes
295 originating from and indexed by each node in G
296 """
297 new_labels = {}
298 for node in G.nodes():
299 label = _neighborhood_aggregate(G, node, labels, edge_attr=edge_attr)
300 hashed_label = _hash_label(label, digest_size)
301 new_labels[node] = hashed_label
302 node_subgraph_hashes[node].append(hashed_label)
303 return new_labels
305 node_labels = _init_node_labels(G, edge_attr, node_attr)
307 node_subgraph_hashes = defaultdict(list)
308 for _ in range(iterations):
309 node_labels = weisfeiler_lehman_step(
310 G, node_labels, node_subgraph_hashes, edge_attr
311 )
313 return dict(node_subgraph_hashes)