1"""Generators for classes of graphs used in studying social networks."""
2
3import itertools
4import math
5
6import networkx as nx
7from networkx.utils import py_random_state
8
9__all__ = [
10 "caveman_graph",
11 "connected_caveman_graph",
12 "relaxed_caveman_graph",
13 "random_partition_graph",
14 "planted_partition_graph",
15 "gaussian_random_partition_graph",
16 "ring_of_cliques",
17 "windmill_graph",
18 "stochastic_block_model",
19 "LFR_benchmark_graph",
20]
21
22
23@nx._dispatchable(graphs=None, returns_graph=True)
24def caveman_graph(l, k):
25 """Returns a caveman graph of `l` cliques of size `k`.
26
27 Parameters
28 ----------
29 l : int
30 Number of cliques
31 k : int
32 Size of cliques
33
34 Returns
35 -------
36 G : NetworkX Graph
37 caveman graph
38
39 Notes
40 -----
41 This returns an undirected graph, it can be converted to a directed
42 graph using :func:`nx.to_directed`, or a multigraph using
43 ``nx.MultiGraph(nx.caveman_graph(l, k))``. Only the undirected version is
44 described in [1]_ and it is unclear which of the directed
45 generalizations is most useful.
46
47 Examples
48 --------
49 >>> G = nx.caveman_graph(3, 3)
50
51 See also
52 --------
53
54 connected_caveman_graph
55
56 References
57 ----------
58 .. [1] Watts, D. J. 'Networks, Dynamics, and the Small-World Phenomenon.'
59 Amer. J. Soc. 105, 493-527, 1999.
60 """
61 # l disjoint cliques of size k
62 G = nx.empty_graph(l * k)
63 if k > 1:
64 for start in range(0, l * k, k):
65 edges = itertools.combinations(range(start, start + k), 2)
66 G.add_edges_from(edges)
67 return G
68
69
70@nx._dispatchable(graphs=None, returns_graph=True)
71def connected_caveman_graph(l, k):
72 """Returns a connected caveman graph of `l` cliques of size `k`.
73
74 The connected caveman graph is formed by creating `n` cliques of size
75 `k`, then a single edge in each clique is rewired to a node in an
76 adjacent clique.
77
78 Parameters
79 ----------
80 l : int
81 number of cliques
82 k : int
83 size of cliques (k at least 2 or NetworkXError is raised)
84
85 Returns
86 -------
87 G : NetworkX Graph
88 connected caveman graph
89
90 Raises
91 ------
92 NetworkXError
93 If the size of cliques `k` is smaller than 2.
94
95 Notes
96 -----
97 This returns an undirected graph, it can be converted to a directed
98 graph using :func:`nx.to_directed`, or a multigraph using
99 ``nx.MultiGraph(nx.caveman_graph(l, k))``. Only the undirected version is
100 described in [1]_ and it is unclear which of the directed
101 generalizations is most useful.
102
103 Examples
104 --------
105 >>> G = nx.connected_caveman_graph(3, 3)
106
107 References
108 ----------
109 .. [1] Watts, D. J. 'Networks, Dynamics, and the Small-World Phenomenon.'
110 Amer. J. Soc. 105, 493-527, 1999.
111 """
112 if k < 2:
113 raise nx.NetworkXError(
114 "The size of cliques in a connected caveman graph must be at least 2."
115 )
116
117 G = nx.caveman_graph(l, k)
118 for start in range(0, l * k, k):
119 G.remove_edge(start, start + 1)
120 G.add_edge(start, (start - 1) % (l * k))
121 return G
122
123
124@py_random_state(3)
125@nx._dispatchable(graphs=None, returns_graph=True)
126def relaxed_caveman_graph(l, k, p, seed=None):
127 """Returns a relaxed caveman graph.
128
129 A relaxed caveman graph starts with `l` cliques of size `k`. Edges are
130 then randomly rewired with probability `p` to link different cliques.
131
132 Parameters
133 ----------
134 l : int
135 Number of groups
136 k : int
137 Size of cliques
138 p : float
139 Probability of rewiring each edge.
140 seed : integer, random_state, or None (default)
141 Indicator of random number generation state.
142 See :ref:`Randomness<randomness>`.
143
144 Returns
145 -------
146 G : NetworkX Graph
147 Relaxed Caveman Graph
148
149 Raises
150 ------
151 NetworkXError
152 If p is not in [0,1]
153
154 Examples
155 --------
156 >>> G = nx.relaxed_caveman_graph(2, 3, 0.1, seed=42)
157
158 References
159 ----------
160 .. [1] Santo Fortunato, Community Detection in Graphs,
161 Physics Reports Volume 486, Issues 3-5, February 2010, Pages 75-174.
162 https://arxiv.org/abs/0906.0612
163 """
164 G = nx.caveman_graph(l, k)
165 nodes = list(G)
166 for u, v in G.edges():
167 if seed.random() < p: # rewire the edge
168 x = seed.choice(nodes)
169 if G.has_edge(u, x):
170 continue
171 G.remove_edge(u, v)
172 G.add_edge(u, x)
173 return G
174
175
176@py_random_state(3)
177@nx._dispatchable(graphs=None, returns_graph=True)
178def random_partition_graph(sizes, p_in, p_out, seed=None, directed=False):
179 """Returns the random partition graph with a partition of sizes.
180
181 A partition graph is a graph of communities with sizes defined by
182 s in sizes. Nodes in the same group are connected with probability
183 p_in and nodes of different groups are connected with probability
184 p_out.
185
186 Parameters
187 ----------
188 sizes : list of ints
189 Sizes of groups
190 p_in : float
191 probability of edges with in groups
192 p_out : float
193 probability of edges between groups
194 directed : boolean optional, default=False
195 Whether to create a directed graph
196 seed : integer, random_state, or None (default)
197 Indicator of random number generation state.
198 See :ref:`Randomness<randomness>`.
199
200 Returns
201 -------
202 G : NetworkX Graph or DiGraph
203 random partition graph of size sum(gs)
204
205 Raises
206 ------
207 NetworkXError
208 If p_in or p_out is not in [0,1]
209
210 Examples
211 --------
212 >>> G = nx.random_partition_graph([10, 10, 10], 0.25, 0.01)
213 >>> len(G)
214 30
215 >>> partition = G.graph["partition"]
216 >>> len(partition)
217 3
218
219 Notes
220 -----
221 This is a generalization of the planted-l-partition described in
222 [1]_. It allows for the creation of groups of any size.
223
224 The partition is store as a graph attribute 'partition'.
225
226 References
227 ----------
228 .. [1] Santo Fortunato 'Community Detection in Graphs' Physical Reports
229 Volume 486, Issue 3-5 p. 75-174. https://arxiv.org/abs/0906.0612
230 """
231 # Use geometric method for O(n+m) complexity algorithm
232 # partition = nx.community_sets(nx.get_node_attributes(G, 'affiliation'))
233 if not 0.0 <= p_in <= 1.0:
234 raise nx.NetworkXError("p_in must be in [0,1]")
235 if not 0.0 <= p_out <= 1.0:
236 raise nx.NetworkXError("p_out must be in [0,1]")
237
238 # create connection matrix
239 num_blocks = len(sizes)
240 p = [[p_out for s in range(num_blocks)] for r in range(num_blocks)]
241 for r in range(num_blocks):
242 p[r][r] = p_in
243
244 return stochastic_block_model(
245 sizes,
246 p,
247 nodelist=None,
248 seed=seed,
249 directed=directed,
250 selfloops=False,
251 sparse=True,
252 )
253
254
255@py_random_state(4)
256@nx._dispatchable(graphs=None, returns_graph=True)
257def planted_partition_graph(l, k, p_in, p_out, seed=None, directed=False):
258 """Returns the planted l-partition graph.
259
260 This model partitions a graph with n=l*k vertices in
261 l groups with k vertices each. Vertices of the same
262 group are linked with a probability p_in, and vertices
263 of different groups are linked with probability p_out.
264
265 Parameters
266 ----------
267 l : int
268 Number of groups
269 k : int
270 Number of vertices in each group
271 p_in : float
272 probability of connecting vertices within a group
273 p_out : float
274 probability of connected vertices between groups
275 seed : integer, random_state, or None (default)
276 Indicator of random number generation state.
277 See :ref:`Randomness<randomness>`.
278 directed : bool,optional (default=False)
279 If True return a directed graph
280
281 Returns
282 -------
283 G : NetworkX Graph or DiGraph
284 planted l-partition graph
285
286 Raises
287 ------
288 NetworkXError
289 If `p_in`, `p_out` are not in `[0, 1]`
290
291 Examples
292 --------
293 >>> G = nx.planted_partition_graph(4, 3, 0.5, 0.1, seed=42)
294
295 See Also
296 --------
297 random_partition_model
298
299 References
300 ----------
301 .. [1] A. Condon, R.M. Karp, Algorithms for graph partitioning
302 on the planted partition model,
303 Random Struct. Algor. 18 (2001) 116-140.
304
305 .. [2] Santo Fortunato 'Community Detection in Graphs' Physical Reports
306 Volume 486, Issue 3-5 p. 75-174. https://arxiv.org/abs/0906.0612
307 """
308 return random_partition_graph([k] * l, p_in, p_out, seed=seed, directed=directed)
309
310
311@py_random_state(6)
312@nx._dispatchable(graphs=None, returns_graph=True)
313def gaussian_random_partition_graph(n, s, v, p_in, p_out, directed=False, seed=None):
314 """Generate a Gaussian random partition graph.
315
316 A Gaussian random partition graph is created by creating k partitions
317 each with a size drawn from a normal distribution with mean s and variance
318 s/v. Nodes are connected within clusters with probability p_in and
319 between clusters with probability p_out[1]
320
321 Parameters
322 ----------
323 n : int
324 Number of nodes in the graph
325 s : float
326 Mean cluster size
327 v : float
328 Shape parameter. The variance of cluster size distribution is s/v.
329 p_in : float
330 Probability of intra cluster connection.
331 p_out : float
332 Probability of inter cluster connection.
333 directed : boolean, optional default=False
334 Whether to create a directed graph or not
335 seed : integer, random_state, or None (default)
336 Indicator of random number generation state.
337 See :ref:`Randomness<randomness>`.
338
339 Returns
340 -------
341 G : NetworkX Graph or DiGraph
342 gaussian random partition graph
343
344 Raises
345 ------
346 NetworkXError
347 If s is > n
348 If p_in or p_out is not in [0,1]
349
350 Notes
351 -----
352 Note the number of partitions is dependent on s,v and n, and that the
353 last partition may be considerably smaller, as it is sized to simply
354 fill out the nodes [1]
355
356 See Also
357 --------
358 random_partition_graph
359
360 Examples
361 --------
362 >>> G = nx.gaussian_random_partition_graph(100, 10, 10, 0.25, 0.1)
363 >>> len(G)
364 100
365
366 References
367 ----------
368 .. [1] Ulrik Brandes, Marco Gaertler, Dorothea Wagner,
369 Experiments on Graph Clustering Algorithms,
370 In the proceedings of the 11th Europ. Symp. Algorithms, 2003.
371 """
372 if s > n:
373 raise nx.NetworkXError("s must be <= n")
374 assigned = 0
375 sizes = []
376 while True:
377 size = int(seed.gauss(s, s / v + 0.5))
378 if size < 1: # how to handle 0 or negative sizes?
379 continue
380 if assigned + size >= n:
381 sizes.append(n - assigned)
382 break
383 assigned += size
384 sizes.append(size)
385 return random_partition_graph(sizes, p_in, p_out, seed=seed, directed=directed)
386
387
388@nx._dispatchable(graphs=None, returns_graph=True)
389def ring_of_cliques(num_cliques, clique_size):
390 """Defines a "ring of cliques" graph.
391
392 A ring of cliques graph is consisting of cliques, connected through single
393 links. Each clique is a complete graph.
394
395 Parameters
396 ----------
397 num_cliques : int
398 Number of cliques
399 clique_size : int
400 Size of cliques
401
402 Returns
403 -------
404 G : NetworkX Graph
405 ring of cliques graph
406
407 Raises
408 ------
409 NetworkXError
410 If the number of cliques is lower than 2 or
411 if the size of cliques is smaller than 2.
412
413 Examples
414 --------
415 >>> G = nx.ring_of_cliques(8, 4)
416
417 See Also
418 --------
419 connected_caveman_graph
420
421 Notes
422 -----
423 The `connected_caveman_graph` graph removes a link from each clique to
424 connect it with the next clique. Instead, the `ring_of_cliques` graph
425 simply adds the link without removing any link from the cliques.
426 """
427 if num_cliques < 2:
428 raise nx.NetworkXError("A ring of cliques must have at least two cliques")
429 if clique_size < 2:
430 raise nx.NetworkXError("The cliques must have at least two nodes")
431
432 G = nx.Graph()
433 for i in range(num_cliques):
434 edges = itertools.combinations(
435 range(i * clique_size, i * clique_size + clique_size), 2
436 )
437 G.add_edges_from(edges)
438 G.add_edge(
439 i * clique_size + 1, (i + 1) * clique_size % (num_cliques * clique_size)
440 )
441 return G
442
443
444@nx._dispatchable(graphs=None, returns_graph=True)
445def windmill_graph(n, k):
446 """Generate a windmill graph.
447 A windmill graph is a graph of `n` cliques each of size `k` that are all
448 joined at one node.
449 It can be thought of as taking a disjoint union of `n` cliques of size `k`,
450 selecting one point from each, and contracting all of the selected points.
451 Alternatively, one could generate `n` cliques of size `k-1` and one node
452 that is connected to all other nodes in the graph.
453
454 Parameters
455 ----------
456 n : int
457 Number of cliques
458 k : int
459 Size of cliques
460
461 Returns
462 -------
463 G : NetworkX Graph
464 windmill graph with n cliques of size k
465
466 Raises
467 ------
468 NetworkXError
469 If the number of cliques is less than two
470 If the size of the cliques are less than two
471
472 Examples
473 --------
474 >>> G = nx.windmill_graph(4, 5)
475
476 Notes
477 -----
478 The node labeled `0` will be the node connected to all other nodes.
479 Note that windmill graphs are usually denoted `Wd(k,n)`, so the parameters
480 are in the opposite order as the parameters of this method.
481 """
482 if n < 2:
483 msg = "A windmill graph must have at least two cliques"
484 raise nx.NetworkXError(msg)
485 if k < 2:
486 raise nx.NetworkXError("The cliques must have at least two nodes")
487
488 G = nx.disjoint_union_all(
489 itertools.chain(
490 [nx.complete_graph(k)], (nx.complete_graph(k - 1) for _ in range(n - 1))
491 )
492 )
493 G.add_edges_from((0, i) for i in range(k, G.number_of_nodes()))
494 return G
495
496
497@py_random_state(3)
498@nx._dispatchable(graphs=None, returns_graph=True)
499def stochastic_block_model(
500 sizes, p, nodelist=None, seed=None, directed=False, selfloops=False, sparse=True
501):
502 """Returns a stochastic block model graph.
503
504 This model partitions the nodes in blocks of arbitrary sizes, and places
505 edges between pairs of nodes independently, with a probability that depends
506 on the blocks.
507
508 Parameters
509 ----------
510 sizes : list of ints
511 Sizes of blocks
512 p : list of list of floats
513 Element (r,s) gives the density of edges going from the nodes
514 of group r to nodes of group s.
515 p must match the number of groups (len(sizes) == len(p)),
516 and it must be symmetric if the graph is undirected.
517 nodelist : list, optional
518 The block tags are assigned according to the node identifiers
519 in nodelist. If nodelist is None, then the ordering is the
520 range [0,sum(sizes)-1].
521 seed : integer, random_state, or None (default)
522 Indicator of random number generation state.
523 See :ref:`Randomness<randomness>`.
524 directed : boolean optional, default=False
525 Whether to create a directed graph or not.
526 selfloops : boolean optional, default=False
527 Whether to include self-loops or not.
528 sparse: boolean optional, default=True
529 Use the sparse heuristic to speed up the generator.
530
531 Returns
532 -------
533 g : NetworkX Graph or DiGraph
534 Stochastic block model graph of size sum(sizes)
535
536 Raises
537 ------
538 NetworkXError
539 If probabilities are not in [0,1].
540 If the probability matrix is not square (directed case).
541 If the probability matrix is not symmetric (undirected case).
542 If the sizes list does not match nodelist or the probability matrix.
543 If nodelist contains duplicate.
544
545 Examples
546 --------
547 >>> sizes = [75, 75, 300]
548 >>> probs = [[0.25, 0.05, 0.02], [0.05, 0.35, 0.07], [0.02, 0.07, 0.40]]
549 >>> g = nx.stochastic_block_model(sizes, probs, seed=0)
550 >>> len(g)
551 450
552 >>> H = nx.quotient_graph(g, g.graph["partition"], relabel=True)
553 >>> for v in H.nodes(data=True):
554 ... print(round(v[1]["density"], 3))
555 0.245
556 0.348
557 0.405
558 >>> for v in H.edges(data=True):
559 ... print(round(1.0 * v[2]["weight"] / (sizes[v[0]] * sizes[v[1]]), 3))
560 0.051
561 0.022
562 0.07
563
564 See Also
565 --------
566 random_partition_graph
567 planted_partition_graph
568 gaussian_random_partition_graph
569 gnp_random_graph
570
571 References
572 ----------
573 .. [1] Holland, P. W., Laskey, K. B., & Leinhardt, S.,
574 "Stochastic blockmodels: First steps",
575 Social networks, 5(2), 109-137, 1983.
576 """
577 # Check if dimensions match
578 if len(sizes) != len(p):
579 raise nx.NetworkXException("'sizes' and 'p' do not match.")
580 # Check for probability symmetry (undirected) and shape (directed)
581 for row in p:
582 if len(p) != len(row):
583 raise nx.NetworkXException("'p' must be a square matrix.")
584 if not directed:
585 p_transpose = [list(i) for i in zip(*p)]
586 for i in zip(p, p_transpose):
587 for j in zip(i[0], i[1]):
588 if abs(j[0] - j[1]) > 1e-08:
589 raise nx.NetworkXException("'p' must be symmetric.")
590 # Check for probability range
591 for row in p:
592 for prob in row:
593 if prob < 0 or prob > 1:
594 raise nx.NetworkXException("Entries of 'p' not in [0,1].")
595 # Check for nodelist consistency
596 if nodelist is not None:
597 if len(nodelist) != sum(sizes):
598 raise nx.NetworkXException("'nodelist' and 'sizes' do not match.")
599 if len(nodelist) != len(set(nodelist)):
600 raise nx.NetworkXException("nodelist contains duplicate.")
601 else:
602 nodelist = range(sum(sizes))
603
604 # Setup the graph conditionally to the directed switch.
605 block_range = range(len(sizes))
606 if directed:
607 g = nx.DiGraph()
608 block_iter = itertools.product(block_range, block_range)
609 else:
610 g = nx.Graph()
611 block_iter = itertools.combinations_with_replacement(block_range, 2)
612 # Split nodelist in a partition (list of sets).
613 size_cumsum = [sum(sizes[0:x]) for x in range(len(sizes) + 1)]
614 g.graph["partition"] = [
615 set(nodelist[size_cumsum[x] : size_cumsum[x + 1]])
616 for x in range(len(size_cumsum) - 1)
617 ]
618 # Setup nodes and graph name
619 for block_id, nodes in enumerate(g.graph["partition"]):
620 for node in nodes:
621 g.add_node(node, block=block_id)
622
623 g.name = "stochastic_block_model"
624
625 # Test for edge existence
626 parts = g.graph["partition"]
627 for i, j in block_iter:
628 if i == j:
629 if directed:
630 if selfloops:
631 edges = itertools.product(parts[i], parts[i])
632 else:
633 edges = itertools.permutations(parts[i], 2)
634 else:
635 edges = itertools.combinations(parts[i], 2)
636 if selfloops:
637 edges = itertools.chain(edges, zip(parts[i], parts[i]))
638 for e in edges:
639 if seed.random() < p[i][j]:
640 g.add_edge(*e)
641 else:
642 edges = itertools.product(parts[i], parts[j])
643 if sparse:
644 if p[i][j] == 1: # Test edges cases p_ij = 0 or 1
645 for e in edges:
646 g.add_edge(*e)
647 elif p[i][j] > 0:
648 while True:
649 try:
650 logrand = math.log(seed.random())
651 skip = math.floor(logrand / math.log(1 - p[i][j]))
652 # consume "skip" edges
653 next(itertools.islice(edges, skip, skip), None)
654 e = next(edges)
655 g.add_edge(*e) # __safe
656 except StopIteration:
657 break
658 else:
659 for e in edges:
660 if seed.random() < p[i][j]:
661 g.add_edge(*e) # __safe
662 return g
663
664
665def _zipf_rv_below(gamma, xmin, threshold, seed):
666 """Returns a random value chosen from the bounded Zipf distribution.
667
668 Repeatedly draws values from the Zipf distribution until the
669 threshold is met, then returns that value.
670 """
671 result = nx.utils.zipf_rv(gamma, xmin, seed)
672 while result > threshold:
673 result = nx.utils.zipf_rv(gamma, xmin, seed)
674 return result
675
676
677def _powerlaw_sequence(gamma, low, high, condition, length, max_iters, seed):
678 """Returns a list of numbers obeying a constrained power law distribution.
679
680 ``gamma`` and ``low`` are the parameters for the Zipf distribution.
681
682 ``high`` is the maximum allowed value for values draw from the Zipf
683 distribution. For more information, see :func:`_zipf_rv_below`.
684
685 ``condition`` and ``length`` are Boolean-valued functions on
686 lists. While generating the list, random values are drawn and
687 appended to the list until ``length`` is satisfied by the created
688 list. Once ``condition`` is satisfied, the sequence generated in
689 this way is returned.
690
691 ``max_iters`` indicates the number of times to generate a list
692 satisfying ``length``. If the number of iterations exceeds this
693 value, :exc:`~networkx.exception.ExceededMaxIterations` is raised.
694
695 seed : integer, random_state, or None (default)
696 Indicator of random number generation state.
697 See :ref:`Randomness<randomness>`.
698 """
699 for i in range(max_iters):
700 seq = []
701 while not length(seq):
702 seq.append(_zipf_rv_below(gamma, low, high, seed))
703 if condition(seq):
704 return seq
705 raise nx.ExceededMaxIterations("Could not create power law sequence")
706
707
708def _hurwitz_zeta(x, q, tolerance):
709 """The Hurwitz zeta function, or the Riemann zeta function of two arguments.
710
711 ``x`` must be greater than one and ``q`` must be positive.
712
713 This function repeatedly computes subsequent partial sums until
714 convergence, as decided by ``tolerance``.
715 """
716 z = 0
717 z_prev = -float("inf")
718 k = 0
719 while abs(z - z_prev) > tolerance:
720 z_prev = z
721 z += 1 / ((k + q) ** x)
722 k += 1
723 return z
724
725
726def _generate_min_degree(gamma, average_degree, max_degree, tolerance, max_iters):
727 """Returns a minimum degree from the given average degree."""
728 # Defines zeta function whether or not Scipy is available
729 try:
730 from scipy.special import zeta
731 except ImportError:
732
733 def zeta(x, q):
734 return _hurwitz_zeta(x, q, tolerance)
735
736 min_deg_top = max_degree
737 min_deg_bot = 1
738 min_deg_mid = (min_deg_top - min_deg_bot) / 2 + min_deg_bot
739 itrs = 0
740 mid_avg_deg = 0
741 while abs(mid_avg_deg - average_degree) > tolerance:
742 if itrs > max_iters:
743 raise nx.ExceededMaxIterations("Could not match average_degree")
744 mid_avg_deg = 0
745 for x in range(int(min_deg_mid), max_degree + 1):
746 mid_avg_deg += (x ** (-gamma + 1)) / zeta(gamma, min_deg_mid)
747 if mid_avg_deg > average_degree:
748 min_deg_top = min_deg_mid
749 min_deg_mid = (min_deg_top - min_deg_bot) / 2 + min_deg_bot
750 else:
751 min_deg_bot = min_deg_mid
752 min_deg_mid = (min_deg_top - min_deg_bot) / 2 + min_deg_bot
753 itrs += 1
754 # return int(min_deg_mid + 0.5)
755 return round(min_deg_mid)
756
757
758def _generate_communities(degree_seq, community_sizes, mu, max_iters, seed):
759 """Returns a list of sets, each of which represents a community.
760
761 ``degree_seq`` is the degree sequence that must be met by the
762 graph.
763
764 ``community_sizes`` is the community size distribution that must be
765 met by the generated list of sets.
766
767 ``mu`` is a float in the interval [0, 1] indicating the fraction of
768 intra-community edges incident to each node.
769
770 ``max_iters`` is the number of times to try to add a node to a
771 community. This must be greater than the length of
772 ``degree_seq``, otherwise this function will always fail. If
773 the number of iterations exceeds this value,
774 :exc:`~networkx.exception.ExceededMaxIterations` is raised.
775
776 seed : integer, random_state, or None (default)
777 Indicator of random number generation state.
778 See :ref:`Randomness<randomness>`.
779
780 The communities returned by this are sets of integers in the set {0,
781 ..., *n* - 1}, where *n* is the length of ``degree_seq``.
782
783 """
784 # This assumes the nodes in the graph will be natural numbers.
785 result = [set() for _ in community_sizes]
786 n = len(degree_seq)
787 free = list(range(n))
788 for i in range(max_iters):
789 v = free.pop()
790 c = seed.choice(range(len(community_sizes)))
791 # s = int(degree_seq[v] * (1 - mu) + 0.5)
792 s = round(degree_seq[v] * (1 - mu))
793 # If the community is large enough, add the node to the chosen
794 # community. Otherwise, return it to the list of unaffiliated
795 # nodes.
796 if s < community_sizes[c]:
797 result[c].add(v)
798 else:
799 free.append(v)
800 # If the community is too big, remove a node from it.
801 if len(result[c]) > community_sizes[c]:
802 free.append(result[c].pop())
803 if not free:
804 return result
805 msg = "Could not assign communities; try increasing min_community"
806 raise nx.ExceededMaxIterations(msg)
807
808
809@py_random_state(11)
810@nx._dispatchable(graphs=None, returns_graph=True)
811def LFR_benchmark_graph(
812 n,
813 tau1,
814 tau2,
815 mu,
816 average_degree=None,
817 min_degree=None,
818 max_degree=None,
819 min_community=None,
820 max_community=None,
821 tol=1.0e-7,
822 max_iters=500,
823 seed=None,
824):
825 r"""Returns the LFR benchmark graph.
826
827 This algorithm proceeds as follows:
828
829 1) Find a degree sequence with a power law distribution, and minimum
830 value ``min_degree``, which has approximate average degree
831 ``average_degree``. This is accomplished by either
832
833 a) specifying ``min_degree`` and not ``average_degree``,
834 b) specifying ``average_degree`` and not ``min_degree``, in which
835 case a suitable minimum degree will be found.
836
837 ``max_degree`` can also be specified, otherwise it will be set to
838 ``n``. Each node *u* will have $\mu \mathrm{deg}(u)$ edges
839 joining it to nodes in communities other than its own and $(1 -
840 \mu) \mathrm{deg}(u)$ edges joining it to nodes in its own
841 community.
842 2) Generate community sizes according to a power law distribution
843 with exponent ``tau2``. If ``min_community`` and
844 ``max_community`` are not specified they will be selected to be
845 ``min_degree`` and ``max_degree``, respectively. Community sizes
846 are generated until the sum of their sizes equals ``n``.
847 3) Each node will be randomly assigned a community with the
848 condition that the community is large enough for the node's
849 intra-community degree, $(1 - \mu) \mathrm{deg}(u)$ as
850 described in step 2. If a community grows too large, a random node
851 will be selected for reassignment to a new community, until all
852 nodes have been assigned a community.
853 4) Each node *u* then adds $(1 - \mu) \mathrm{deg}(u)$
854 intra-community edges and $\mu \mathrm{deg}(u)$ inter-community
855 edges.
856
857 Parameters
858 ----------
859 n : int
860 Number of nodes in the created graph.
861
862 tau1 : float
863 Power law exponent for the degree distribution of the created
864 graph. This value must be strictly greater than one.
865
866 tau2 : float
867 Power law exponent for the community size distribution in the
868 created graph. This value must be strictly greater than one.
869
870 mu : float
871 Fraction of inter-community edges incident to each node. This
872 value must be in the interval [0, 1].
873
874 average_degree : float
875 Desired average degree of nodes in the created graph. This value
876 must be in the interval [0, *n*]. Exactly one of this and
877 ``min_degree`` must be specified, otherwise a
878 :exc:`NetworkXError` is raised.
879
880 min_degree : int
881 Minimum degree of nodes in the created graph. This value must be
882 in the interval [0, *n*]. Exactly one of this and
883 ``average_degree`` must be specified, otherwise a
884 :exc:`NetworkXError` is raised.
885
886 max_degree : int
887 Maximum degree of nodes in the created graph. If not specified,
888 this is set to ``n``, the total number of nodes in the graph.
889
890 min_community : int
891 Minimum size of communities in the graph. If not specified, this
892 is set to ``min_degree``.
893
894 max_community : int
895 Maximum size of communities in the graph. If not specified, this
896 is set to ``n``, the total number of nodes in the graph.
897
898 tol : float
899 Tolerance when comparing floats, specifically when comparing
900 average degree values.
901
902 max_iters : int
903 Maximum number of iterations to try to create the community sizes,
904 degree distribution, and community affiliations.
905
906 seed : integer, random_state, or None (default)
907 Indicator of random number generation state.
908 See :ref:`Randomness<randomness>`.
909
910 Returns
911 -------
912 G : NetworkX graph
913 The LFR benchmark graph generated according to the specified
914 parameters.
915
916 Each node in the graph has a node attribute ``'community'`` that
917 stores the community (that is, the set of nodes) that includes
918 it.
919
920 Raises
921 ------
922 NetworkXError
923 If any of the parameters do not meet their upper and lower bounds:
924
925 - ``tau1`` and ``tau2`` must be strictly greater than 1.
926 - ``mu`` must be in [0, 1].
927 - ``max_degree`` must be in {1, ..., *n*}.
928 - ``min_community`` and ``max_community`` must be in {0, ...,
929 *n*}.
930
931 If not exactly one of ``average_degree`` and ``min_degree`` is
932 specified.
933
934 If ``min_degree`` is not specified and a suitable ``min_degree``
935 cannot be found.
936
937 ExceededMaxIterations
938 If a valid degree sequence cannot be created within
939 ``max_iters`` number of iterations.
940
941 If a valid set of community sizes cannot be created within
942 ``max_iters`` number of iterations.
943
944 If a valid community assignment cannot be created within ``10 *
945 n * max_iters`` number of iterations.
946
947 Examples
948 --------
949 Basic usage::
950
951 >>> from networkx.generators.community import LFR_benchmark_graph
952 >>> n = 250
953 >>> tau1 = 3
954 >>> tau2 = 1.5
955 >>> mu = 0.1
956 >>> G = LFR_benchmark_graph(
957 ... n, tau1, tau2, mu, average_degree=5, min_community=20, seed=10
958 ... )
959
960 Continuing the example above, you can get the communities from the
961 node attributes of the graph::
962
963 >>> communities = {frozenset(G.nodes[v]["community"]) for v in G}
964
965 Notes
966 -----
967 This algorithm differs slightly from the original way it was
968 presented in [1].
969
970 1) Rather than connecting the graph via a configuration model then
971 rewiring to match the intra-community and inter-community
972 degrees, we do this wiring explicitly at the end, which should be
973 equivalent.
974 2) The code posted on the author's website [2] calculates the random
975 power law distributed variables and their average using
976 continuous approximations, whereas we use the discrete
977 distributions here as both degree and community size are
978 discrete.
979
980 Though the authors describe the algorithm as quite robust, testing
981 during development indicates that a somewhat narrower parameter set
982 is likely to successfully produce a graph. Some suggestions have
983 been provided in the event of exceptions.
984
985 References
986 ----------
987 .. [1] "Benchmark graphs for testing community detection algorithms",
988 Andrea Lancichinetti, Santo Fortunato, and Filippo Radicchi,
989 Phys. Rev. E 78, 046110 2008
990 .. [2] https://www.santofortunato.net/resources
991
992 """
993 # Perform some basic parameter validation.
994 if not tau1 > 1:
995 raise nx.NetworkXError("tau1 must be greater than one")
996 if not tau2 > 1:
997 raise nx.NetworkXError("tau2 must be greater than one")
998 if not 0 <= mu <= 1:
999 raise nx.NetworkXError("mu must be in the interval [0, 1]")
1000
1001 # Validate parameters for generating the degree sequence.
1002 if max_degree is None:
1003 max_degree = n
1004 elif not 0 < max_degree <= n:
1005 raise nx.NetworkXError("max_degree must be in the interval (0, n]")
1006 if not ((min_degree is None) ^ (average_degree is None)):
1007 raise nx.NetworkXError(
1008 "Must assign exactly one of min_degree and average_degree"
1009 )
1010 if min_degree is None:
1011 min_degree = _generate_min_degree(
1012 tau1, average_degree, max_degree, tol, max_iters
1013 )
1014
1015 # Generate a degree sequence with a power law distribution.
1016 low, high = min_degree, max_degree
1017
1018 def condition(seq):
1019 return sum(seq) % 2 == 0
1020
1021 def length(seq):
1022 return len(seq) >= n
1023
1024 deg_seq = _powerlaw_sequence(tau1, low, high, condition, length, max_iters, seed)
1025
1026 # Validate parameters for generating the community size sequence.
1027 if min_community is None:
1028 min_community = min(deg_seq)
1029 if max_community is None:
1030 max_community = max(deg_seq)
1031
1032 # Generate a community size sequence with a power law distribution.
1033 #
1034 # TODO The original code incremented the number of iterations each
1035 # time a new Zipf random value was drawn from the distribution. This
1036 # differed from the way the number of iterations was incremented in
1037 # `_powerlaw_degree_sequence`, so this code was changed to match
1038 # that one. As a result, this code is allowed many more chances to
1039 # generate a valid community size sequence.
1040 low, high = min_community, max_community
1041
1042 def condition(seq):
1043 return sum(seq) == n
1044
1045 def length(seq):
1046 return sum(seq) >= n
1047
1048 comms = _powerlaw_sequence(tau2, low, high, condition, length, max_iters, seed)
1049
1050 # Generate the communities based on the given degree sequence and
1051 # community sizes.
1052 max_iters *= 10 * n
1053 communities = _generate_communities(deg_seq, comms, mu, max_iters, seed)
1054
1055 # Finally, generate the benchmark graph based on the given
1056 # communities, joining nodes according to the intra- and
1057 # inter-community degrees.
1058 G = nx.Graph()
1059 G.add_nodes_from(range(n))
1060 for c in communities:
1061 for u in c:
1062 while G.degree(u) < round(deg_seq[u] * (1 - mu)):
1063 v = seed.choice(list(c))
1064 G.add_edge(u, v)
1065 while G.degree(u) < deg_seq[u]:
1066 v = seed.choice(range(n))
1067 if v not in c:
1068 G.add_edge(u, v)
1069 G.nodes[u]["community"] = c
1070 return G