Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/networkx/algorithms/graph

1"""

2Functions for hashing graphs to strings.

3Isomorphic graphs should be assigned identical hashes.

4For now, only Weisfeiler-Lehman hashing is implemented.

5"""

7from collections import Counter, defaultdict

8from hashlib import blake2b

10import networkx as nx

12__all__ = ["weisfeiler_lehman_graph_hash", "weisfeiler_lehman_subgraph_hashes"]

15def _hash_label(label, digest_size):

16 return blake2b(label.encode("ascii"), digest_size=digest_size).hexdigest()

19def _init_node_labels(G, edge_attr, node_attr):

20 if node_attr:

21 return {u: str(dd[node_attr]) for u, dd in G.nodes(data=True)}

22 elif edge_attr:

23 return {u: "" for u in G}

24 else:

25 return {u: str(deg) for u, deg in G.degree()}

28def _neighborhood_aggregate(G, node, node_labels, edge_attr=None):

29 """

30 Compute new labels for given node by aggregating

31 the labels of each node's neighbors.

32 """

33 label_list = []

34 for nbr in G.neighbors(node):

35 prefix = "" if edge_attr is None else str(G[node][nbr][edge_attr])

36 label_list.append(prefix + node_labels[nbr])

37 return node_labels[node] + "".join(sorted(label_list))

40@nx._dispatch(edge_attrs={"edge_attr": None}, node_attrs="node_attr")

41def weisfeiler_lehman_graph_hash(

42 G, edge_attr=None, node_attr=None, iterations=3, digest_size=16

43):

44 """Return Weisfeiler Lehman (WL) graph hash.

46 The function iteratively aggregates and hashes neighbourhoods of each node.

47 After each node's neighbors are hashed to obtain updated node labels,

48 a hashed histogram of resulting labels is returned as the final hash.

50 Hashes are identical for isomorphic graphs and strong guarantees that

51 non-isomorphic graphs will get different hashes. See [1]_ for details.

53 If no node or edge attributes are provided, the degree of each node

54 is used as its initial label.

55 Otherwise, node and/or edge labels are used to compute the hash.

57 Parameters

58 ----------

59 G: graph

60 The graph to be hashed.

61 Can have node and/or edge attributes. Can also have no attributes.

62 edge_attr: string, default=None

63 The key in edge attribute dictionary to be used for hashing.

64 If None, edge labels are ignored.

65 node_attr: string, default=None

66 The key in node attribute dictionary to be used for hashing.

67 If None, and no edge_attr given, use the degrees of the nodes as labels.

68 iterations: int, default=3

69 Number of neighbor aggregations to perform.

70 Should be larger for larger graphs.

71 digest_size: int, default=16

72 Size (in bits) of blake2b hash digest to use for hashing node labels.

74 Returns

75 -------

76 h : string

77 Hexadecimal string corresponding to hash of the input graph.

79 Examples

80 --------

81 Two graphs with edge attributes that are isomorphic, except for

82 differences in the edge labels.

84 >>> G1 = nx.Graph()

85 >>> G1.add_edges_from(

86 ... [

87 ... (1, 2, {"label": "A"}),

88 ... (2, 3, {"label": "A"}),

89 ... (3, 1, {"label": "A"}),

90 ... (1, 4, {"label": "B"}),

91 ... ]

92 ... )

93 >>> G2 = nx.Graph()

94 >>> G2.add_edges_from(

95 ... [

96 ... (5, 6, {"label": "B"}),

97 ... (6, 7, {"label": "A"}),

98 ... (7, 5, {"label": "A"}),

99 ... (7, 8, {"label": "A"}),

100 ... ]

101 ... )

102

103 Omitting the `edge_attr` option, results in identical hashes.

104

105 >>> nx.weisfeiler_lehman_graph_hash(G1)

106 '7bc4dde9a09d0b94c5097b219891d81a'

107 >>> nx.weisfeiler_lehman_graph_hash(G2)

108 '7bc4dde9a09d0b94c5097b219891d81a'

109

110 With edge labels, the graphs are no longer assigned

111 the same hash digest.

112

113 >>> nx.weisfeiler_lehman_graph_hash(G1, edge_attr="label")

114 'c653d85538bcf041d88c011f4f905f10'

115 >>> nx.weisfeiler_lehman_graph_hash(G2, edge_attr="label")

116 '3dcd84af1ca855d0eff3c978d88e7ec7'

117

118 Notes

119 -----

120 To return the WL hashes of each subgraph of a graph, use

121 `weisfeiler_lehman_subgraph_hashes`

122

123 Similarity between hashes does not imply similarity between graphs.

124

125 References

126 ----------

127 .. [1] Shervashidze, Nino, Pascal Schweitzer, Erik Jan Van Leeuwen,

128 Kurt Mehlhorn, and Karsten M. Borgwardt. Weisfeiler Lehman

129 Graph Kernels. Journal of Machine Learning Research. 2011.

130 http://www.jmlr.org/papers/volume12/shervashidze11a/shervashidze11a.pdf

131

132 See also

133 --------

134 weisfeiler_lehman_subgraph_hashes

135 """

136

137 def weisfeiler_lehman_step(G, labels, edge_attr=None):

138 """

139 Apply neighborhood aggregation to each node

140 in the graph.

141 Computes a dictionary with labels for each node.

142 """

143 new_labels = {}

144 for node in G.nodes():

145 label = _neighborhood_aggregate(G, node, labels, edge_attr=edge_attr)

146 new_labels[node] = _hash_label(label, digest_size)

147 return new_labels

148

149 # set initial node labels

150 node_labels = _init_node_labels(G, edge_attr, node_attr)

151

152 subgraph_hash_counts = []

153 for _ in range(iterations):

154 node_labels = weisfeiler_lehman_step(G, node_labels, edge_attr=edge_attr)

155 counter = Counter(node_labels.values())

156 # sort the counter, extend total counts

157 subgraph_hash_counts.extend(sorted(counter.items(), key=lambda x: x[0]))

158

159 # hash the final counter

160 return _hash_label(str(tuple(subgraph_hash_counts)), digest_size)

161

162

163@nx._dispatch(edge_attrs={"edge_attr": None}, node_attrs="node_attr")

164def weisfeiler_lehman_subgraph_hashes(

165 G, edge_attr=None, node_attr=None, iterations=3, digest_size=16

166):

167 """

168 Return a dictionary of subgraph hashes by node.

169

170 Dictionary keys are nodes in `G`, and values are a list of hashes.

171 Each hash corresponds to a subgraph rooted at a given node u in `G`.

172 Lists of subgraph hashes are sorted in increasing order of depth from

173 their root node, with the hash at index i corresponding to a subgraph

174 of nodes at most i edges distance from u. Thus, each list will contain

175 ``iterations + 1`` elements - a hash for a subgraph at each depth, and

176 additionally a hash of the initial node label (or equivalently a

177 subgraph of depth 0)

178

179 The function iteratively aggregates and hashes neighbourhoods of each node.

180 This is achieved for each step by replacing for each node its label from

181 the previous iteration with its hashed 1-hop neighborhood aggregate.

182 The new node label is then appended to a list of node labels for each

183 node.

184

185 To aggregate neighborhoods at each step for a node $n$, all labels of

186 nodes adjacent to $n$ are concatenated. If the `edge_attr` parameter is set,

187 labels for each neighboring node are prefixed with the value of this attribute

188 along the connecting edge from this neighbor to node $n$. The resulting string

189 is then hashed to compress this information into a fixed digest size.

190

191 Thus, at the $i$-th iteration, nodes within $i$ hops influence any given

192 hashed node label. We can therefore say that at depth $i$ for node $n$

193 we have a hash for a subgraph induced by the $2i$-hop neighborhood of $n$.

194

195 The output can be used to to create general Weisfeiler-Lehman graph kernels,

196 or generate features for graphs or nodes - for example to generate 'words' in

197 a graph as seen in the 'graph2vec' algorithm.

198 See [1]_ & [2]_ respectively for details.

199

200 Hashes are identical for isomorphic subgraphs and there exist strong

201 guarantees that non-isomorphic graphs will get different hashes.

202 See [1]_ for details.

203

204 If no node or edge attributes are provided, the degree of each node

205 is used as its initial label.

206 Otherwise, node and/or edge labels are used to compute the hash.

207

208 Parameters

209 ----------

210 G: graph

211 The graph to be hashed.

212 Can have node and/or edge attributes. Can also have no attributes.

213 edge_attr: string, default=None

214 The key in edge attribute dictionary to be used for hashing.

215 If None, edge labels are ignored.

216 node_attr: string, default=None

217 The key in node attribute dictionary to be used for hashing.

218 If None, and no edge_attr given, use the degrees of the nodes as labels.

219 iterations: int, default=3

220 Number of neighbor aggregations to perform.

221 Should be larger for larger graphs.

222 digest_size: int, default=16

223 Size (in bits) of blake2b hash digest to use for hashing node labels.

224 The default size is 16 bits

225

226 Returns

227 -------

228 node_subgraph_hashes : dict

229 A dictionary with each key given by a node in G, and each value given

230 by the subgraph hashes in order of depth from the key node.

231

232 Examples

233 --------

234 Finding similar nodes in different graphs:

235

236 >>> G1 = nx.Graph()

237 >>> G1.add_edges_from([

238 ... (1, 2), (2, 3), (2, 4), (3, 5), (4, 6), (5, 7), (6, 7)

239 ... ])

240 >>> G2 = nx.Graph()

241 >>> G2.add_edges_from([

242 ... (1, 3), (2, 3), (1, 6), (1, 5), (4, 6)

243 ... ])

244 >>> g1_hashes = nx.weisfeiler_lehman_subgraph_hashes(G1, iterations=3, digest_size=8)

245 >>> g2_hashes = nx.weisfeiler_lehman_subgraph_hashes(G2, iterations=3, digest_size=8)

246

247 Even though G1 and G2 are not isomorphic (they have different numbers of edges),

248 the hash sequence of depth 3 for node 1 in G1 and node 5 in G2 are similar:

249

250 >>> g1_hashes[1]

251 ['a93b64973cfc8897', 'db1b43ae35a1878f', '57872a7d2059c1c0']

252 >>> g2_hashes[5]

253 ['a93b64973cfc8897', 'db1b43ae35a1878f', '1716d2a4012fa4bc']

254

255 The first 2 WL subgraph hashes match. From this we can conclude that it's very

256 likely the neighborhood of 4 hops around these nodes are isomorphic: each

257 iteration aggregates 1-hop neighbourhoods meaning hashes at depth $n$ are influenced

258 by every node within $2n$ hops.

259

260 However the neighborhood of 6 hops is no longer isomorphic since their 3rd hash does

261 not match.

262

263 These nodes may be candidates to be classified together since their local topology

264 is similar.

265

266 Notes

267 -----

268 To hash the full graph when subgraph hashes are not needed, use

269 `weisfeiler_lehman_graph_hash` for efficiency.

270

271 Similarity between hashes does not imply similarity between graphs.

272

273 References

274 ----------

275 .. [1] Shervashidze, Nino, Pascal Schweitzer, Erik Jan Van Leeuwen,

276 Kurt Mehlhorn, and Karsten M. Borgwardt. Weisfeiler Lehman

277 Graph Kernels. Journal of Machine Learning Research. 2011.

278 http://www.jmlr.org/papers/volume12/shervashidze11a/shervashidze11a.pdf

279 .. [2] Annamalai Narayanan, Mahinthan Chandramohan, Rajasekar Venkatesan,

280 Lihui Chen, Yang Liu and Shantanu Jaiswa. graph2vec: Learning

281 Distributed Representations of Graphs. arXiv. 2017

282 https://arxiv.org/pdf/1707.05005.pdf

283

284 See also

285 --------

286 weisfeiler_lehman_graph_hash

287 """

288

289 def weisfeiler_lehman_step(G, labels, node_subgraph_hashes, edge_attr=None):

290 """

291 Apply neighborhood aggregation to each node

292 in the graph.

293 Computes a dictionary with labels for each node.

294 Appends the new hashed label to the dictionary of subgraph hashes

295 originating from and indexed by each node in G

296 """

297 new_labels = {}

298 for node in G.nodes():

299 label = _neighborhood_aggregate(G, node, labels, edge_attr=edge_attr)

300 hashed_label = _hash_label(label, digest_size)

301 new_labels[node] = hashed_label

302 node_subgraph_hashes[node].append(hashed_label)

303 return new_labels

304

305 node_labels = _init_node_labels(G, edge_attr, node_attr)

306

307 node_subgraph_hashes = defaultdict(list)

308 for _ in range(iterations):

309 node_labels = weisfeiler_lehman_step(

310 G, node_labels, node_subgraph_hashes, edge_attr

311 )

312

313 return dict(node_subgraph_hashes)

Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/networkx/algorithms/graph_hashing.py: 23%

48 statements