Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/networkx/algorithms/node_classification.py: 16%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

61 statements  

1"""This module provides the functions for node classification problem. 

2 

3The functions in this module are not imported 

4into the top level `networkx` namespace. 

5You can access these functions by importing 

6the `networkx.algorithms.node_classification` modules, 

7then accessing the functions as attributes of `node_classification`. 

8For example: 

9 

10 >>> from networkx.algorithms import node_classification 

11 >>> G = nx.path_graph(4) 

12 >>> G.edges() 

13 EdgeView([(0, 1), (1, 2), (2, 3)]) 

14 >>> G.nodes[0]["label"] = "A" 

15 >>> G.nodes[3]["label"] = "B" 

16 >>> node_classification.harmonic_function(G) 

17 ['A', 'A', 'B', 'B'] 

18 

19References 

20---------- 

21Zhu, X., Ghahramani, Z., & Lafferty, J. (2003, August). 

22Semi-supervised learning using gaussian fields and harmonic functions. 

23In ICML (Vol. 3, pp. 912-919). 

24""" 

25 

26import networkx as nx 

27 

28__all__ = ["harmonic_function", "local_and_global_consistency"] 

29 

30 

31@nx.utils.not_implemented_for("directed") 

32@nx._dispatchable(node_attrs="label_name") 

33def harmonic_function(G, max_iter=30, label_name="label"): 

34 """Node classification by Harmonic function 

35 

36 Function for computing Harmonic function algorithm by Zhu et al. 

37 

38 Parameters 

39 ---------- 

40 G : NetworkX Graph 

41 max_iter : int 

42 maximum number of iterations allowed 

43 label_name : string 

44 name of target labels to predict 

45 

46 Returns 

47 ------- 

48 predicted : list 

49 List of length ``len(G)`` with the predicted labels for each node. 

50 

51 Raises 

52 ------ 

53 NetworkXError 

54 If no nodes in `G` have attribute `label_name`. 

55 

56 Examples 

57 -------- 

58 >>> from networkx.algorithms import node_classification 

59 >>> G = nx.path_graph(4) 

60 >>> G.nodes[0]["label"] = "A" 

61 >>> G.nodes[3]["label"] = "B" 

62 >>> G.nodes(data=True) 

63 NodeDataView({0: {'label': 'A'}, 1: {}, 2: {}, 3: {'label': 'B'}}) 

64 >>> G.edges() 

65 EdgeView([(0, 1), (1, 2), (2, 3)]) 

66 >>> predicted = node_classification.harmonic_function(G) 

67 >>> predicted 

68 ['A', 'A', 'B', 'B'] 

69 

70 References 

71 ---------- 

72 Zhu, X., Ghahramani, Z., & Lafferty, J. (2003, August). 

73 Semi-supervised learning using gaussian fields and harmonic functions. 

74 In ICML (Vol. 3, pp. 912-919). 

75 """ 

76 import numpy as np 

77 import scipy as sp 

78 

79 X = nx.to_scipy_sparse_array(G) # adjacency matrix 

80 labels, label_dict = _get_label_info(G, label_name) 

81 

82 if labels.shape[0] == 0: 

83 raise nx.NetworkXError( 

84 f"No node on the input graph is labeled by '{label_name}'." 

85 ) 

86 

87 n_samples = X.shape[0] 

88 n_classes = label_dict.shape[0] 

89 F = np.zeros((n_samples, n_classes)) 

90 

91 # Build propagation matrix 

92 degrees = X.sum(axis=0) 

93 degrees[degrees == 0] = 1 # Avoid division by 0 

94 D = sp.sparse.dia_array((1.0 / degrees, 0), shape=(n_samples, n_samples)).tocsr() 

95 P = (D @ X).tolil() 

96 P[labels[:, 0]] = 0 # labels[:, 0] indicates IDs of labeled nodes 

97 # Build base matrix 

98 B = np.zeros((n_samples, n_classes)) 

99 B[labels[:, 0], labels[:, 1]] = 1 

100 

101 for _ in range(max_iter): 

102 F = (P @ F) + B 

103 

104 return label_dict[np.argmax(F, axis=1)].tolist() 

105 

106 

107@nx.utils.not_implemented_for("directed") 

108@nx._dispatchable(node_attrs="label_name") 

109def local_and_global_consistency(G, alpha=0.99, max_iter=30, label_name="label"): 

110 """Node classification by Local and Global Consistency 

111 

112 Function for computing Local and global consistency algorithm by Zhou et al. 

113 

114 Parameters 

115 ---------- 

116 G : NetworkX Graph 

117 alpha : float 

118 Clamping factor 

119 max_iter : int 

120 Maximum number of iterations allowed 

121 label_name : string 

122 Name of target labels to predict 

123 

124 Returns 

125 ------- 

126 predicted : list 

127 List of length ``len(G)`` with the predicted labels for each node. 

128 

129 Raises 

130 ------ 

131 NetworkXError 

132 If no nodes in `G` have attribute `label_name`. 

133 

134 Examples 

135 -------- 

136 >>> from networkx.algorithms import node_classification 

137 >>> G = nx.path_graph(4) 

138 >>> G.nodes[0]["label"] = "A" 

139 >>> G.nodes[3]["label"] = "B" 

140 >>> G.nodes(data=True) 

141 NodeDataView({0: {'label': 'A'}, 1: {}, 2: {}, 3: {'label': 'B'}}) 

142 >>> G.edges() 

143 EdgeView([(0, 1), (1, 2), (2, 3)]) 

144 >>> predicted = node_classification.local_and_global_consistency(G) 

145 >>> predicted 

146 ['A', 'A', 'B', 'B'] 

147 

148 References 

149 ---------- 

150 Zhou, D., Bousquet, O., Lal, T. N., Weston, J., & Schölkopf, B. (2004). 

151 Learning with local and global consistency. 

152 Advances in neural information processing systems, 16(16), 321-328. 

153 """ 

154 import numpy as np 

155 import scipy as sp 

156 

157 X = nx.to_scipy_sparse_array(G) # adjacency matrix 

158 labels, label_dict = _get_label_info(G, label_name) 

159 

160 if labels.shape[0] == 0: 

161 raise nx.NetworkXError( 

162 f"No node on the input graph is labeled by '{label_name}'." 

163 ) 

164 

165 n_samples = X.shape[0] 

166 n_classes = label_dict.shape[0] 

167 F = np.zeros((n_samples, n_classes)) 

168 

169 # Build propagation matrix 

170 degrees = X.sum(axis=0) 

171 degrees[degrees == 0] = 1 # Avoid division by 0 

172 D2 = sp.sparse.dia_array( 

173 (1.0 / np.sqrt(degrees), 0), shape=(n_samples, n_samples) 

174 ).tocsr() 

175 P = alpha * ((D2 @ X) @ D2) 

176 # Build base matrix 

177 B = np.zeros((n_samples, n_classes)) 

178 B[labels[:, 0], labels[:, 1]] = 1 - alpha 

179 

180 for _ in range(max_iter): 

181 F = (P @ F) + B 

182 

183 return label_dict[np.argmax(F, axis=1)].tolist() 

184 

185 

186def _get_label_info(G, label_name): 

187 """Get and return information of labels from the input graph 

188 

189 Parameters 

190 ---------- 

191 G : Network X graph 

192 label_name : string 

193 Name of the target label 

194 

195 Returns 

196 ------- 

197 labels : numpy array, shape = [n_labeled_samples, 2] 

198 Array of pairs of labeled node ID and label ID 

199 label_dict : numpy array, shape = [n_classes] 

200 Array of labels 

201 i-th element contains the label corresponding label ID `i` 

202 """ 

203 import numpy as np 

204 

205 labels = [] 

206 label_to_id = {} 

207 lid = 0 

208 for i, n in enumerate(G.nodes(data=True)): 

209 if label_name in n[1]: 

210 label = n[1][label_name] 

211 if label not in label_to_id: 

212 label_to_id[label] = lid 

213 lid += 1 

214 labels.append([i, label_to_id[label]]) 

215 labels = np.array(labels) 

216 label_dict = np.array( 

217 [label for label, _ in sorted(label_to_id.items(), key=lambda x: x[1])] 

218 ) 

219 return (labels, label_dict)