1import itertools
2
3import networkx as nx
4
5__all__ = ["birank"]
6
7
8@nx._dispatchable(edge_attrs="weight")
9def birank(
10 G,
11 nodes,
12 *,
13 alpha=None,
14 beta=None,
15 top_personalization=None,
16 bottom_personalization=None,
17 max_iter=100,
18 tol=1.0e-6,
19 weight="weight",
20):
21 r"""Compute the BiRank score for nodes in a bipartite network.
22
23 Given the bipartite sets $U$ and $P$, the BiRank algorithm seeks to satisfy
24 the following recursive relationships between the scores of nodes $j \in P$
25 and $i \in U$:
26
27 .. math::
28
29 p_j = \alpha \sum_{i \in U} \frac{w_{ij}}{\sqrt{d_i}\sqrt{d_j}} u_i
30 + (1 - \alpha) p_j^0
31
32 u_i = \beta \sum_{j \in P} \frac{w_{ij}}{\sqrt{d_i}\sqrt{d_j}} p_j
33 + (1 - \beta) u_i^0
34
35 where
36
37 * $p_j$ and $u_i$ are the BiRank scores of nodes $j \in P$ and $i \in U$.
38 * $w_{ij}$ is the weight of the edge between nodes $i \in U$ and $j \in P$
39 (With a value of 0 if no edge exists).
40 * $d_i$ and $d_j$ are the weighted degrees of nodes $i \in U$ and $j \in P$,
41 respectively.
42 * $p_j^0$ and $u_i^0$ are personalization values that can encode a priori
43 weights for the nodes $j \in P$ and $i \in U$, respectively. Akin to the
44 personalization vector used by PageRank.
45 * $\alpha$ and $\beta$ are damping hyperparameters applying to nodes in $P$
46 and $U$ respectively. They can take values in the interval $[0, 1]$, and
47 are analogous to those used by PageRank.
48
49 Below are two use cases for this algorithm.
50
51 1. Personalized Recommendation System
52 Given a bipartite graph representing users and items, BiRank can be used
53 as a collaborative filtering algorithm to recommend items to users.
54 Previous ratings are encoded as edge weights, and the specific ratings
55 of an individual user on a set of items is used as the personalization
56 vector over items. See the example below for an implementation of this
57 on a toy dataset provided in [1]_.
58
59 2. Popularity Prediction
60 Given a bipartite graph representing user interactions with items, e.g.
61 commits to a GitHub repository, BiRank can be used to predict the
62 popularity of a given item. Edge weights should encode the strength of
63 the interaction signal. This could be a raw count, or weighted by a time
64 decay function like that specified in Eq. (15) of [1]_. The
65 personalization vectors can be used to encode existing popularity
66 signals, for example, the monthly download count of a repository's
67 package.
68
69 Parameters
70 ----------
71 G : graph
72 A bipartite network
73
74 nodes : iterable of nodes
75 Container with all nodes belonging to the first bipartite node set
76 ('top'). The nodes in this set use the hyperparameter `alpha`, and the
77 personalization dictionary `top_personalization`. The nodes in the second
78 bipartite node set ('bottom') are automatically determined by taking the
79 complement of 'top' with respect to the graph `G`.
80
81 alpha : float, optional (default=0.80 if top_personalization not empty, else 1)
82 Damping factor for the 'top' nodes. Must be in the interval $[0, 1]$.
83 Larger alpha and beta generally reduce the effect of the personalizations
84 and increase the number of iterations before convergence. Choice of value
85 is largely dependent on use case, and experimentation is recommended.
86
87 beta : float, optional (default=0.80 if bottom_personalization not empty, else 1)
88 Damping factor for the 'bottom' nodes. Must be in the interval $[0, 1]$.
89 Larger alpha and beta generally reduce the effect of the personalizations
90 and increase the number of iterations before convergence. Choice of value
91 is largely dependent on use case, and experimentation is recommended.
92
93 top_personalization : dict, optional (default=None)
94 Dictionary keyed by nodes in 'top' to that node's personalization value.
95 Unspecified nodes in 'top' will be assigned a personalization value of 0.
96 Personalization values are used to encode a priori weights for a given node,
97 and should be non-negative.
98
99 bottom_personalization : dict, optional (default=None)
100 Dictionary keyed by nodes in 'bottom' to that node's personalization value.
101 Unspecified nodes in 'bottom' will be assigned a personalization value of 0.
102 Personalization values are used to encode a priori weights for a given node,
103 and should be non-negative.
104
105 max_iter : int, optional (default=100)
106 Maximum number of iterations in power method eigenvalue solver.
107
108 tol : float, optional (default=1.0e-6)
109 Error tolerance used to check convergence in power method solver. The
110 iteration will stop after a tolerance of both ``len(top) * tol`` and
111 ``len(bottom) * tol`` is reached for nodes in 'top' and 'bottom'
112 respectively.
113
114 weight : string or None, optional (default='weight')
115 Edge data key to use as weight.
116
117 Returns
118 -------
119 birank : dictionary
120 Dictionary keyed by node to that node's BiRank score.
121
122 Raises
123 ------
124 NetworkXAlgorithmError
125 If the parameters `alpha` or `beta` are not in the interval [0, 1],
126 if either of the bipartite sets are empty, or if negative values are
127 provided in the personalization dictionaries.
128
129 PowerIterationFailedConvergence
130 If the algorithm fails to converge to the specified tolerance
131 within the specified number of iterations of the power iteration
132 method.
133
134 Examples
135 --------
136 Construct a bipartite graph with user-item ratings and use BiRank to
137 recommend items to a user (user 1). The example below uses the `rating`
138 edge attribute as the weight of the edges. The `top_personalization` vector
139 is used to encode the user's previous ratings on items.
140
141 Creation of graph, bipartite sets for the example.
142
143 >>> elist = [
144 ... ("u1", "p1", 5),
145 ... ("u2", "p1", 5),
146 ... ("u2", "p2", 4),
147 ... ("u3", "p1", 3),
148 ... ("u3", "p3", 2),
149 ... ]
150 >>> G = nx.Graph()
151 >>> G.add_weighted_edges_from(elist, weight="rating")
152 >>> product_nodes = ("p1", "p2", "p3")
153 >>> user = "u1"
154
155 First, we create a personalization vector for the user based on on their
156 ratings of past items. In this case they have only rated one item (p1, with
157 a rating of 5) in the past.
158
159 >>> user_personalization = {
160 ... product: rating
161 ... for _, product, rating in G.edges(nbunch=user, data="rating")
162 ... }
163 >>> user_personalization
164 {'p1': 5}
165
166 Calculate the BiRank score of all nodes in the graph, filter for the items
167 that the user has not rated yet, and sort the results by score.
168
169 >>> user_birank_results = nx.bipartite.birank(
170 ... G, product_nodes, top_personalization=user_personalization, weight="rating"
171 ... )
172 >>> user_birank_results = filter(
173 ... lambda item: item[0][0] == "p" and user not in G.neighbors(item[0]),
174 ... user_birank_results.items(),
175 ... )
176 >>> user_birank_results = sorted(
177 ... user_birank_results, key=lambda item: item[1], reverse=True
178 ... )
179 >>> user_recommendations = {
180 ... product: round(score, 5) for product, score in user_birank_results
181 ... }
182 >>> user_recommendations
183 {'p2': 1.44818, 'p3': 1.04811}
184
185 We find that user 1 should be recommended item p2 over item p3. This is due
186 to the fact that user 2 rated also rated p1 highly, while user 3 did not.
187 Thus user 2's tastes are inferred to be similar to user 1's, and carry more
188 weight in the recommendation.
189
190 See Also
191 --------
192 :func:`~networkx.algorithms.link_analysis.pagerank_alg.pagerank`
193 :func:`~networkx.algorithms.link_analysis.hits_alg.hits`
194 :func:`~networkx.algorithms.bipartite.centrality.betweenness_centrality`
195 :func:`~networkx.algorithms.bipartite.basic.sets`
196 :func:`~networkx.algorithms.bipartite.basic.is_bipartite`
197
198 Notes
199 -----
200 The `nodes` input parameter must contain all nodes in one bipartite
201 node set, but the dictionary returned contains all nodes from both
202 bipartite node sets. See :mod:`bipartite documentation
203 <networkx.algorithms.bipartite>` for further details on how
204 bipartite graphs are handled in NetworkX.
205
206 In the case a personalization dictionary is not provided for top (bottom)
207 `alpha` (`beta`) will default to 1. This is because a damping factor
208 without a non-zero entry in the personalization vector will lead to the
209 algorithm converging to the zero vector.
210
211 References
212 ----------
213 .. [1] Xiangnan He, Ming Gao, Min-Yen Kan, and Dingxian Wang. 2017.
214 BiRank: Towards Ranking on Bipartite Graphs. IEEE Trans. on Knowl.
215 and Data Eng. 29, 1 (January 2017), 57–71.
216 https://arxiv.org/pdf/1708.04396
217
218 """
219 import numpy as np
220 import scipy as sp
221
222 # Initialize the sets of top and bottom nodes
223 top = set(nodes)
224 bottom = set(G) - top
225 top_count = len(top)
226 bottom_count = len(bottom)
227
228 if top_count == 0 or bottom_count == 0:
229 raise nx.NetworkXAlgorithmError(
230 "The BiRank algorithm requires a bipartite graph with at least one"
231 "node in each set."
232 )
233
234 # Clean the personalization dictionaries
235 top_personalization = _clean_personalization_dict(top_personalization)
236 bottom_personalization = _clean_personalization_dict(bottom_personalization)
237
238 # Set default values for alpha and beta if not provided
239 if alpha is None:
240 alpha = 0.8 if top_personalization else 1
241 if beta is None:
242 beta = 0.8 if bottom_personalization else 1
243
244 if alpha < 0 or alpha > 1:
245 raise nx.NetworkXAlgorithmError("alpha must be in the interval [0, 1]")
246 if beta < 0 or beta > 1:
247 raise nx.NetworkXAlgorithmError("beta must be in the interval [0, 1]")
248
249 # Initialize query vectors
250 p0 = np.array([top_personalization.get(n, 0) for n in top], dtype=float)
251 u0 = np.array([bottom_personalization.get(n, 0) for n in bottom], dtype=float)
252
253 # Construct degree normalized biadjacency matrix `S` and its transpose
254 W = nx.bipartite.biadjacency_matrix(G, bottom, top, weight=weight, dtype=float)
255 p_degrees = W.sum(axis=0, dtype=float)
256 # Handle case where the node is disconnected - avoids warning
257 p_degrees[p_degrees == 0] = 1.0
258 D_p = sp.sparse.dia_array(
259 ([1.0 / np.sqrt(p_degrees)], [0]),
260 shape=(top_count, top_count),
261 dtype=float,
262 )
263 u_degrees = W.sum(axis=1, dtype=float)
264 u_degrees[u_degrees == 0] = 1.0
265 D_u = sp.sparse.dia_array(
266 ([1.0 / np.sqrt(u_degrees)], [0]),
267 shape=(bottom_count, bottom_count),
268 dtype=float,
269 )
270 S = D_u.tocsr() @ W @ D_p.tocsr()
271 S_T = S.T
272
273 # Initialize birank vectors for iteration
274 p = np.ones(top_count, dtype=float) / top_count
275 u = beta * (S @ p) + (1 - beta) * u0
276
277 # Iterate until convergence
278 for _ in range(max_iter):
279 p_last = p
280 u_last = u
281 p = alpha * (S_T @ u) + (1 - alpha) * p0
282 u = beta * (S @ p) + (1 - beta) * u0
283
284 # Continue iterating if the error (absolute if less than 1, relative otherwise)
285 # is above the tolerance threshold for either p or u
286 err_u = np.absolute((u_last - u) / np.maximum(1.0, u_last)).sum()
287 if err_u >= len(u) * tol:
288 continue
289 err_p = np.absolute((p_last - p) / np.maximum(1.0, p_last)).sum()
290 if err_p >= len(p) * tol:
291 continue
292
293 # Handle edge case where if both alpha and beta are 1, scale is
294 # indeterminate, so normalization is required to return consistent results
295 if alpha == 1 and beta == 1:
296 p = p / np.linalg.norm(p, 1)
297 u = u / np.linalg.norm(u, 1)
298
299 # If both error thresholds pass, return a single dictionary mapping
300 # nodes to their scores
301 return dict(
302 zip(itertools.chain(top, bottom), map(float, itertools.chain(p, u)))
303 )
304
305 # If we reach this point, we have not converged
306 raise nx.PowerIterationFailedConvergence(max_iter)
307
308
309def _clean_personalization_dict(personalization):
310 """Filter out zero values from the personalization dictionary,
311 handle case where None is passed, ensure values are non-negative."""
312 if personalization is None:
313 return {}
314 if any(value < 0 for value in personalization.values()):
315 raise nx.NetworkXAlgorithmError("Personalization values must be non-negative.")
316 return {node: value for node, value in personalization.items() if value != 0}