Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/networkx/algorithms/node_classification.py: 15%
60 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-10-20 07:00 +0000
« prev ^ index » next coverage.py v7.3.2, created at 2023-10-20 07:00 +0000
1""" This module provides the functions for node classification problem.
3The functions in this module are not imported
4into the top level `networkx` namespace.
5You can access these functions by importing
6the `networkx.algorithms.node_classification` modules,
7then accessing the functions as attributes of `node_classification`.
8For example:
10 >>> from networkx.algorithms import node_classification
11 >>> G = nx.path_graph(4)
12 >>> G.edges()
13 EdgeView([(0, 1), (1, 2), (2, 3)])
14 >>> G.nodes[0]["label"] = "A"
15 >>> G.nodes[3]["label"] = "B"
16 >>> node_classification.harmonic_function(G)
17 ['A', 'A', 'B', 'B']
19References
20----------
21Zhu, X., Ghahramani, Z., & Lafferty, J. (2003, August).
22Semi-supervised learning using gaussian fields and harmonic functions.
23In ICML (Vol. 3, pp. 912-919).
24"""
25import networkx as nx
27__all__ = ["harmonic_function", "local_and_global_consistency"]
30@nx.utils.not_implemented_for("directed")
31@nx._dispatch(node_attrs="label_name")
32def harmonic_function(G, max_iter=30, label_name="label"):
33 """Node classification by Harmonic function
35 Function for computing Harmonic function algorithm by Zhu et al.
37 Parameters
38 ----------
39 G : NetworkX Graph
40 max_iter : int
41 maximum number of iterations allowed
42 label_name : string
43 name of target labels to predict
45 Returns
46 -------
47 predicted : list
48 List of length ``len(G)`` with the predicted labels for each node.
50 Raises
51 ------
52 NetworkXError
53 If no nodes in `G` have attribute `label_name`.
55 Examples
56 --------
57 >>> from networkx.algorithms import node_classification
58 >>> G = nx.path_graph(4)
59 >>> G.nodes[0]["label"] = "A"
60 >>> G.nodes[3]["label"] = "B"
61 >>> G.nodes(data=True)
62 NodeDataView({0: {'label': 'A'}, 1: {}, 2: {}, 3: {'label': 'B'}})
63 >>> G.edges()
64 EdgeView([(0, 1), (1, 2), (2, 3)])
65 >>> predicted = node_classification.harmonic_function(G)
66 >>> predicted
67 ['A', 'A', 'B', 'B']
69 References
70 ----------
71 Zhu, X., Ghahramani, Z., & Lafferty, J. (2003, August).
72 Semi-supervised learning using gaussian fields and harmonic functions.
73 In ICML (Vol. 3, pp. 912-919).
74 """
75 import numpy as np
76 import scipy as sp
78 X = nx.to_scipy_sparse_array(G) # adjacency matrix
79 labels, label_dict = _get_label_info(G, label_name)
81 if labels.shape[0] == 0:
82 raise nx.NetworkXError(
83 f"No node on the input graph is labeled by '{label_name}'."
84 )
86 n_samples = X.shape[0]
87 n_classes = label_dict.shape[0]
88 F = np.zeros((n_samples, n_classes))
90 # Build propagation matrix
91 degrees = X.sum(axis=0)
92 degrees[degrees == 0] = 1 # Avoid division by 0
93 # TODO: csr_array
94 D = sp.sparse.csr_array(sp.sparse.diags((1.0 / degrees), offsets=0))
95 P = (D @ X).tolil()
96 P[labels[:, 0]] = 0 # labels[:, 0] indicates IDs of labeled nodes
97 # Build base matrix
98 B = np.zeros((n_samples, n_classes))
99 B[labels[:, 0], labels[:, 1]] = 1
101 for _ in range(max_iter):
102 F = (P @ F) + B
104 return label_dict[np.argmax(F, axis=1)].tolist()
107@nx.utils.not_implemented_for("directed")
108@nx._dispatch(node_attrs="label_name")
109def local_and_global_consistency(G, alpha=0.99, max_iter=30, label_name="label"):
110 """Node classification by Local and Global Consistency
112 Function for computing Local and global consistency algorithm by Zhou et al.
114 Parameters
115 ----------
116 G : NetworkX Graph
117 alpha : float
118 Clamping factor
119 max_iter : int
120 Maximum number of iterations allowed
121 label_name : string
122 Name of target labels to predict
124 Returns
125 -------
126 predicted : list
127 List of length ``len(G)`` with the predicted labels for each node.
129 Raises
130 ------
131 NetworkXError
132 If no nodes in `G` have attribute `label_name`.
134 Examples
135 --------
136 >>> from networkx.algorithms import node_classification
137 >>> G = nx.path_graph(4)
138 >>> G.nodes[0]["label"] = "A"
139 >>> G.nodes[3]["label"] = "B"
140 >>> G.nodes(data=True)
141 NodeDataView({0: {'label': 'A'}, 1: {}, 2: {}, 3: {'label': 'B'}})
142 >>> G.edges()
143 EdgeView([(0, 1), (1, 2), (2, 3)])
144 >>> predicted = node_classification.local_and_global_consistency(G)
145 >>> predicted
146 ['A', 'A', 'B', 'B']
148 References
149 ----------
150 Zhou, D., Bousquet, O., Lal, T. N., Weston, J., & Schölkopf, B. (2004).
151 Learning with local and global consistency.
152 Advances in neural information processing systems, 16(16), 321-328.
153 """
154 import numpy as np
155 import scipy as sp
157 X = nx.to_scipy_sparse_array(G) # adjacency matrix
158 labels, label_dict = _get_label_info(G, label_name)
160 if labels.shape[0] == 0:
161 raise nx.NetworkXError(
162 f"No node on the input graph is labeled by '{label_name}'."
163 )
165 n_samples = X.shape[0]
166 n_classes = label_dict.shape[0]
167 F = np.zeros((n_samples, n_classes))
169 # Build propagation matrix
170 degrees = X.sum(axis=0)
171 degrees[degrees == 0] = 1 # Avoid division by 0
172 # TODO: csr_array
173 D2 = np.sqrt(sp.sparse.csr_array(sp.sparse.diags((1.0 / degrees), offsets=0)))
174 P = alpha * ((D2 @ X) @ D2)
175 # Build base matrix
176 B = np.zeros((n_samples, n_classes))
177 B[labels[:, 0], labels[:, 1]] = 1 - alpha
179 for _ in range(max_iter):
180 F = (P @ F) + B
182 return label_dict[np.argmax(F, axis=1)].tolist()
185def _get_label_info(G, label_name):
186 """Get and return information of labels from the input graph
188 Parameters
189 ----------
190 G : Network X graph
191 label_name : string
192 Name of the target label
194 Returns
195 -------
196 labels : numpy array, shape = [n_labeled_samples, 2]
197 Array of pairs of labeled node ID and label ID
198 label_dict : numpy array, shape = [n_classes]
199 Array of labels
200 i-th element contains the label corresponding label ID `i`
201 """
202 import numpy as np
204 labels = []
205 label_to_id = {}
206 lid = 0
207 for i, n in enumerate(G.nodes(data=True)):
208 if label_name in n[1]:
209 label = n[1][label_name]
210 if label not in label_to_id:
211 label_to_id[label] = lid
212 lid += 1
213 labels.append([i, label_to_id[label]])
214 labels = np.array(labels)
215 label_dict = np.array(
216 [label for label, _ in sorted(label_to_id.items(), key=lambda x: x[1])]
217 )
218 return (labels, label_dict)