Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/networkx/generators/community.py: 13%
267 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-10-20 07:00 +0000
« prev ^ index » next coverage.py v7.3.2, created at 2023-10-20 07:00 +0000
1"""Generators for classes of graphs used in studying social networks."""
2import itertools
3import math
5import networkx as nx
6from networkx.utils import py_random_state
8__all__ = [
9 "caveman_graph",
10 "connected_caveman_graph",
11 "relaxed_caveman_graph",
12 "random_partition_graph",
13 "planted_partition_graph",
14 "gaussian_random_partition_graph",
15 "ring_of_cliques",
16 "windmill_graph",
17 "stochastic_block_model",
18 "LFR_benchmark_graph",
19]
22@nx._dispatch(graphs=None)
23def caveman_graph(l, k):
24 """Returns a caveman graph of `l` cliques of size `k`.
26 Parameters
27 ----------
28 l : int
29 Number of cliques
30 k : int
31 Size of cliques
33 Returns
34 -------
35 G : NetworkX Graph
36 caveman graph
38 Notes
39 -----
40 This returns an undirected graph, it can be converted to a directed
41 graph using :func:`nx.to_directed`, or a multigraph using
42 ``nx.MultiGraph(nx.caveman_graph(l, k))``. Only the undirected version is
43 described in [1]_ and it is unclear which of the directed
44 generalizations is most useful.
46 Examples
47 --------
48 >>> G = nx.caveman_graph(3, 3)
50 See also
51 --------
53 connected_caveman_graph
55 References
56 ----------
57 .. [1] Watts, D. J. 'Networks, Dynamics, and the Small-World Phenomenon.'
58 Amer. J. Soc. 105, 493-527, 1999.
59 """
60 # l disjoint cliques of size k
61 G = nx.empty_graph(l * k)
62 if k > 1:
63 for start in range(0, l * k, k):
64 edges = itertools.combinations(range(start, start + k), 2)
65 G.add_edges_from(edges)
66 return G
69@nx._dispatch(graphs=None)
70def connected_caveman_graph(l, k):
71 """Returns a connected caveman graph of `l` cliques of size `k`.
73 The connected caveman graph is formed by creating `n` cliques of size
74 `k`, then a single edge in each clique is rewired to a node in an
75 adjacent clique.
77 Parameters
78 ----------
79 l : int
80 number of cliques
81 k : int
82 size of cliques (k at least 2 or NetworkXError is raised)
84 Returns
85 -------
86 G : NetworkX Graph
87 connected caveman graph
89 Raises
90 ------
91 NetworkXError
92 If the size of cliques `k` is smaller than 2.
94 Notes
95 -----
96 This returns an undirected graph, it can be converted to a directed
97 graph using :func:`nx.to_directed`, or a multigraph using
98 ``nx.MultiGraph(nx.caveman_graph(l, k))``. Only the undirected version is
99 described in [1]_ and it is unclear which of the directed
100 generalizations is most useful.
102 Examples
103 --------
104 >>> G = nx.connected_caveman_graph(3, 3)
106 References
107 ----------
108 .. [1] Watts, D. J. 'Networks, Dynamics, and the Small-World Phenomenon.'
109 Amer. J. Soc. 105, 493-527, 1999.
110 """
111 if k < 2:
112 raise nx.NetworkXError(
113 "The size of cliques in a connected caveman graph " "must be at least 2."
114 )
116 G = nx.caveman_graph(l, k)
117 for start in range(0, l * k, k):
118 G.remove_edge(start, start + 1)
119 G.add_edge(start, (start - 1) % (l * k))
120 return G
123@py_random_state(3)
124@nx._dispatch(graphs=None)
125def relaxed_caveman_graph(l, k, p, seed=None):
126 """Returns a relaxed caveman graph.
128 A relaxed caveman graph starts with `l` cliques of size `k`. Edges are
129 then randomly rewired with probability `p` to link different cliques.
131 Parameters
132 ----------
133 l : int
134 Number of groups
135 k : int
136 Size of cliques
137 p : float
138 Probability of rewiring each edge.
139 seed : integer, random_state, or None (default)
140 Indicator of random number generation state.
141 See :ref:`Randomness<randomness>`.
143 Returns
144 -------
145 G : NetworkX Graph
146 Relaxed Caveman Graph
148 Raises
149 ------
150 NetworkXError
151 If p is not in [0,1]
153 Examples
154 --------
155 >>> G = nx.relaxed_caveman_graph(2, 3, 0.1, seed=42)
157 References
158 ----------
159 .. [1] Santo Fortunato, Community Detection in Graphs,
160 Physics Reports Volume 486, Issues 3-5, February 2010, Pages 75-174.
161 https://arxiv.org/abs/0906.0612
162 """
163 G = nx.caveman_graph(l, k)
164 nodes = list(G)
165 for u, v in G.edges():
166 if seed.random() < p: # rewire the edge
167 x = seed.choice(nodes)
168 if G.has_edge(u, x):
169 continue
170 G.remove_edge(u, v)
171 G.add_edge(u, x)
172 return G
175@py_random_state(3)
176@nx._dispatch(graphs=None)
177def random_partition_graph(sizes, p_in, p_out, seed=None, directed=False):
178 """Returns the random partition graph with a partition of sizes.
180 A partition graph is a graph of communities with sizes defined by
181 s in sizes. Nodes in the same group are connected with probability
182 p_in and nodes of different groups are connected with probability
183 p_out.
185 Parameters
186 ----------
187 sizes : list of ints
188 Sizes of groups
189 p_in : float
190 probability of edges with in groups
191 p_out : float
192 probability of edges between groups
193 directed : boolean optional, default=False
194 Whether to create a directed graph
195 seed : integer, random_state, or None (default)
196 Indicator of random number generation state.
197 See :ref:`Randomness<randomness>`.
199 Returns
200 -------
201 G : NetworkX Graph or DiGraph
202 random partition graph of size sum(gs)
204 Raises
205 ------
206 NetworkXError
207 If p_in or p_out is not in [0,1]
209 Examples
210 --------
211 >>> G = nx.random_partition_graph([10, 10, 10], 0.25, 0.01)
212 >>> len(G)
213 30
214 >>> partition = G.graph["partition"]
215 >>> len(partition)
216 3
218 Notes
219 -----
220 This is a generalization of the planted-l-partition described in
221 [1]_. It allows for the creation of groups of any size.
223 The partition is store as a graph attribute 'partition'.
225 References
226 ----------
227 .. [1] Santo Fortunato 'Community Detection in Graphs' Physical Reports
228 Volume 486, Issue 3-5 p. 75-174. https://arxiv.org/abs/0906.0612
229 """
230 # Use geometric method for O(n+m) complexity algorithm
231 # partition = nx.community_sets(nx.get_node_attributes(G, 'affiliation'))
232 if not 0.0 <= p_in <= 1.0:
233 raise nx.NetworkXError("p_in must be in [0,1]")
234 if not 0.0 <= p_out <= 1.0:
235 raise nx.NetworkXError("p_out must be in [0,1]")
237 # create connection matrix
238 num_blocks = len(sizes)
239 p = [[p_out for s in range(num_blocks)] for r in range(num_blocks)]
240 for r in range(num_blocks):
241 p[r][r] = p_in
243 return stochastic_block_model(
244 sizes,
245 p,
246 nodelist=None,
247 seed=seed,
248 directed=directed,
249 selfloops=False,
250 sparse=True,
251 )
254@py_random_state(4)
255@nx._dispatch(graphs=None)
256def planted_partition_graph(l, k, p_in, p_out, seed=None, directed=False):
257 """Returns the planted l-partition graph.
259 This model partitions a graph with n=l*k vertices in
260 l groups with k vertices each. Vertices of the same
261 group are linked with a probability p_in, and vertices
262 of different groups are linked with probability p_out.
264 Parameters
265 ----------
266 l : int
267 Number of groups
268 k : int
269 Number of vertices in each group
270 p_in : float
271 probability of connecting vertices within a group
272 p_out : float
273 probability of connected vertices between groups
274 seed : integer, random_state, or None (default)
275 Indicator of random number generation state.
276 See :ref:`Randomness<randomness>`.
277 directed : bool,optional (default=False)
278 If True return a directed graph
280 Returns
281 -------
282 G : NetworkX Graph or DiGraph
283 planted l-partition graph
285 Raises
286 ------
287 NetworkXError
288 If p_in,p_out are not in [0,1] or
290 Examples
291 --------
292 >>> G = nx.planted_partition_graph(4, 3, 0.5, 0.1, seed=42)
294 See Also
295 --------
296 random_partition_model
298 References
299 ----------
300 .. [1] A. Condon, R.M. Karp, Algorithms for graph partitioning
301 on the planted partition model,
302 Random Struct. Algor. 18 (2001) 116-140.
304 .. [2] Santo Fortunato 'Community Detection in Graphs' Physical Reports
305 Volume 486, Issue 3-5 p. 75-174. https://arxiv.org/abs/0906.0612
306 """
307 return random_partition_graph([k] * l, p_in, p_out, seed=seed, directed=directed)
310@py_random_state(6)
311@nx._dispatch(graphs=None)
312def gaussian_random_partition_graph(n, s, v, p_in, p_out, directed=False, seed=None):
313 """Generate a Gaussian random partition graph.
315 A Gaussian random partition graph is created by creating k partitions
316 each with a size drawn from a normal distribution with mean s and variance
317 s/v. Nodes are connected within clusters with probability p_in and
318 between clusters with probability p_out[1]
320 Parameters
321 ----------
322 n : int
323 Number of nodes in the graph
324 s : float
325 Mean cluster size
326 v : float
327 Shape parameter. The variance of cluster size distribution is s/v.
328 p_in : float
329 Probability of intra cluster connection.
330 p_out : float
331 Probability of inter cluster connection.
332 directed : boolean, optional default=False
333 Whether to create a directed graph or not
334 seed : integer, random_state, or None (default)
335 Indicator of random number generation state.
336 See :ref:`Randomness<randomness>`.
338 Returns
339 -------
340 G : NetworkX Graph or DiGraph
341 gaussian random partition graph
343 Raises
344 ------
345 NetworkXError
346 If s is > n
347 If p_in or p_out is not in [0,1]
349 Notes
350 -----
351 Note the number of partitions is dependent on s,v and n, and that the
352 last partition may be considerably smaller, as it is sized to simply
353 fill out the nodes [1]
355 See Also
356 --------
357 random_partition_graph
359 Examples
360 --------
361 >>> G = nx.gaussian_random_partition_graph(100, 10, 10, 0.25, 0.1)
362 >>> len(G)
363 100
365 References
366 ----------
367 .. [1] Ulrik Brandes, Marco Gaertler, Dorothea Wagner,
368 Experiments on Graph Clustering Algorithms,
369 In the proceedings of the 11th Europ. Symp. Algorithms, 2003.
370 """
371 if s > n:
372 raise nx.NetworkXError("s must be <= n")
373 assigned = 0
374 sizes = []
375 while True:
376 size = int(seed.gauss(s, s / v + 0.5))
377 if size < 1: # how to handle 0 or negative sizes?
378 continue
379 if assigned + size >= n:
380 sizes.append(n - assigned)
381 break
382 assigned += size
383 sizes.append(size)
384 return random_partition_graph(sizes, p_in, p_out, seed=seed, directed=directed)
387@nx._dispatch(graphs=None)
388def ring_of_cliques(num_cliques, clique_size):
389 """Defines a "ring of cliques" graph.
391 A ring of cliques graph is consisting of cliques, connected through single
392 links. Each clique is a complete graph.
394 Parameters
395 ----------
396 num_cliques : int
397 Number of cliques
398 clique_size : int
399 Size of cliques
401 Returns
402 -------
403 G : NetworkX Graph
404 ring of cliques graph
406 Raises
407 ------
408 NetworkXError
409 If the number of cliques is lower than 2 or
410 if the size of cliques is smaller than 2.
412 Examples
413 --------
414 >>> G = nx.ring_of_cliques(8, 4)
416 See Also
417 --------
418 connected_caveman_graph
420 Notes
421 -----
422 The `connected_caveman_graph` graph removes a link from each clique to
423 connect it with the next clique. Instead, the `ring_of_cliques` graph
424 simply adds the link without removing any link from the cliques.
425 """
426 if num_cliques < 2:
427 raise nx.NetworkXError("A ring of cliques must have at least " "two cliques")
428 if clique_size < 2:
429 raise nx.NetworkXError("The cliques must have at least two nodes")
431 G = nx.Graph()
432 for i in range(num_cliques):
433 edges = itertools.combinations(
434 range(i * clique_size, i * clique_size + clique_size), 2
435 )
436 G.add_edges_from(edges)
437 G.add_edge(
438 i * clique_size + 1, (i + 1) * clique_size % (num_cliques * clique_size)
439 )
440 return G
443@nx._dispatch(graphs=None)
444def windmill_graph(n, k):
445 """Generate a windmill graph.
446 A windmill graph is a graph of `n` cliques each of size `k` that are all
447 joined at one node.
448 It can be thought of as taking a disjoint union of `n` cliques of size `k`,
449 selecting one point from each, and contracting all of the selected points.
450 Alternatively, one could generate `n` cliques of size `k-1` and one node
451 that is connected to all other nodes in the graph.
453 Parameters
454 ----------
455 n : int
456 Number of cliques
457 k : int
458 Size of cliques
460 Returns
461 -------
462 G : NetworkX Graph
463 windmill graph with n cliques of size k
465 Raises
466 ------
467 NetworkXError
468 If the number of cliques is less than two
469 If the size of the cliques are less than two
471 Examples
472 --------
473 >>> G = nx.windmill_graph(4, 5)
475 Notes
476 -----
477 The node labeled `0` will be the node connected to all other nodes.
478 Note that windmill graphs are usually denoted `Wd(k,n)`, so the parameters
479 are in the opposite order as the parameters of this method.
480 """
481 if n < 2:
482 msg = "A windmill graph must have at least two cliques"
483 raise nx.NetworkXError(msg)
484 if k < 2:
485 raise nx.NetworkXError("The cliques must have at least two nodes")
487 G = nx.disjoint_union_all(
488 itertools.chain(
489 [nx.complete_graph(k)], (nx.complete_graph(k - 1) for _ in range(n - 1))
490 )
491 )
492 G.add_edges_from((0, i) for i in range(k, G.number_of_nodes()))
493 return G
496@py_random_state(3)
497@nx._dispatch(graphs=None)
498def stochastic_block_model(
499 sizes, p, nodelist=None, seed=None, directed=False, selfloops=False, sparse=True
500):
501 """Returns a stochastic block model graph.
503 This model partitions the nodes in blocks of arbitrary sizes, and places
504 edges between pairs of nodes independently, with a probability that depends
505 on the blocks.
507 Parameters
508 ----------
509 sizes : list of ints
510 Sizes of blocks
511 p : list of list of floats
512 Element (r,s) gives the density of edges going from the nodes
513 of group r to nodes of group s.
514 p must match the number of groups (len(sizes) == len(p)),
515 and it must be symmetric if the graph is undirected.
516 nodelist : list, optional
517 The block tags are assigned according to the node identifiers
518 in nodelist. If nodelist is None, then the ordering is the
519 range [0,sum(sizes)-1].
520 seed : integer, random_state, or None (default)
521 Indicator of random number generation state.
522 See :ref:`Randomness<randomness>`.
523 directed : boolean optional, default=False
524 Whether to create a directed graph or not.
525 selfloops : boolean optional, default=False
526 Whether to include self-loops or not.
527 sparse: boolean optional, default=True
528 Use the sparse heuristic to speed up the generator.
530 Returns
531 -------
532 g : NetworkX Graph or DiGraph
533 Stochastic block model graph of size sum(sizes)
535 Raises
536 ------
537 NetworkXError
538 If probabilities are not in [0,1].
539 If the probability matrix is not square (directed case).
540 If the probability matrix is not symmetric (undirected case).
541 If the sizes list does not match nodelist or the probability matrix.
542 If nodelist contains duplicate.
544 Examples
545 --------
546 >>> sizes = [75, 75, 300]
547 >>> probs = [[0.25, 0.05, 0.02], [0.05, 0.35, 0.07], [0.02, 0.07, 0.40]]
548 >>> g = nx.stochastic_block_model(sizes, probs, seed=0)
549 >>> len(g)
550 450
551 >>> H = nx.quotient_graph(g, g.graph["partition"], relabel=True)
552 >>> for v in H.nodes(data=True):
553 ... print(round(v[1]["density"], 3))
554 ...
555 0.245
556 0.348
557 0.405
558 >>> for v in H.edges(data=True):
559 ... print(round(1.0 * v[2]["weight"] / (sizes[v[0]] * sizes[v[1]]), 3))
560 ...
561 0.051
562 0.022
563 0.07
565 See Also
566 --------
567 random_partition_graph
568 planted_partition_graph
569 gaussian_random_partition_graph
570 gnp_random_graph
572 References
573 ----------
574 .. [1] Holland, P. W., Laskey, K. B., & Leinhardt, S.,
575 "Stochastic blockmodels: First steps",
576 Social networks, 5(2), 109-137, 1983.
577 """
578 # Check if dimensions match
579 if len(sizes) != len(p):
580 raise nx.NetworkXException("'sizes' and 'p' do not match.")
581 # Check for probability symmetry (undirected) and shape (directed)
582 for row in p:
583 if len(p) != len(row):
584 raise nx.NetworkXException("'p' must be a square matrix.")
585 if not directed:
586 p_transpose = [list(i) for i in zip(*p)]
587 for i in zip(p, p_transpose):
588 for j in zip(i[0], i[1]):
589 if abs(j[0] - j[1]) > 1e-08:
590 raise nx.NetworkXException("'p' must be symmetric.")
591 # Check for probability range
592 for row in p:
593 for prob in row:
594 if prob < 0 or prob > 1:
595 raise nx.NetworkXException("Entries of 'p' not in [0,1].")
596 # Check for nodelist consistency
597 if nodelist is not None:
598 if len(nodelist) != sum(sizes):
599 raise nx.NetworkXException("'nodelist' and 'sizes' do not match.")
600 if len(nodelist) != len(set(nodelist)):
601 raise nx.NetworkXException("nodelist contains duplicate.")
602 else:
603 nodelist = range(sum(sizes))
605 # Setup the graph conditionally to the directed switch.
606 block_range = range(len(sizes))
607 if directed:
608 g = nx.DiGraph()
609 block_iter = itertools.product(block_range, block_range)
610 else:
611 g = nx.Graph()
612 block_iter = itertools.combinations_with_replacement(block_range, 2)
613 # Split nodelist in a partition (list of sets).
614 size_cumsum = [sum(sizes[0:x]) for x in range(len(sizes) + 1)]
615 g.graph["partition"] = [
616 set(nodelist[size_cumsum[x] : size_cumsum[x + 1]])
617 for x in range(len(size_cumsum) - 1)
618 ]
619 # Setup nodes and graph name
620 for block_id, nodes in enumerate(g.graph["partition"]):
621 for node in nodes:
622 g.add_node(node, block=block_id)
624 g.name = "stochastic_block_model"
626 # Test for edge existence
627 parts = g.graph["partition"]
628 for i, j in block_iter:
629 if i == j:
630 if directed:
631 if selfloops:
632 edges = itertools.product(parts[i], parts[i])
633 else:
634 edges = itertools.permutations(parts[i], 2)
635 else:
636 edges = itertools.combinations(parts[i], 2)
637 if selfloops:
638 edges = itertools.chain(edges, zip(parts[i], parts[i]))
639 for e in edges:
640 if seed.random() < p[i][j]:
641 g.add_edge(*e)
642 else:
643 edges = itertools.product(parts[i], parts[j])
644 if sparse:
645 if p[i][j] == 1: # Test edges cases p_ij = 0 or 1
646 for e in edges:
647 g.add_edge(*e)
648 elif p[i][j] > 0:
649 while True:
650 try:
651 logrand = math.log(seed.random())
652 skip = math.floor(logrand / math.log(1 - p[i][j]))
653 # consume "skip" edges
654 next(itertools.islice(edges, skip, skip), None)
655 e = next(edges)
656 g.add_edge(*e) # __safe
657 except StopIteration:
658 break
659 else:
660 for e in edges:
661 if seed.random() < p[i][j]:
662 g.add_edge(*e) # __safe
663 return g
666def _zipf_rv_below(gamma, xmin, threshold, seed):
667 """Returns a random value chosen from the bounded Zipf distribution.
669 Repeatedly draws values from the Zipf distribution until the
670 threshold is met, then returns that value.
671 """
672 result = nx.utils.zipf_rv(gamma, xmin, seed)
673 while result > threshold:
674 result = nx.utils.zipf_rv(gamma, xmin, seed)
675 return result
678def _powerlaw_sequence(gamma, low, high, condition, length, max_iters, seed):
679 """Returns a list of numbers obeying a constrained power law distribution.
681 ``gamma`` and ``low`` are the parameters for the Zipf distribution.
683 ``high`` is the maximum allowed value for values draw from the Zipf
684 distribution. For more information, see :func:`_zipf_rv_below`.
686 ``condition`` and ``length`` are Boolean-valued functions on
687 lists. While generating the list, random values are drawn and
688 appended to the list until ``length`` is satisfied by the created
689 list. Once ``condition`` is satisfied, the sequence generated in
690 this way is returned.
692 ``max_iters`` indicates the number of times to generate a list
693 satisfying ``length``. If the number of iterations exceeds this
694 value, :exc:`~networkx.exception.ExceededMaxIterations` is raised.
696 seed : integer, random_state, or None (default)
697 Indicator of random number generation state.
698 See :ref:`Randomness<randomness>`.
699 """
700 for i in range(max_iters):
701 seq = []
702 while not length(seq):
703 seq.append(_zipf_rv_below(gamma, low, high, seed))
704 if condition(seq):
705 return seq
706 raise nx.ExceededMaxIterations("Could not create power law sequence")
709def _hurwitz_zeta(x, q, tolerance):
710 """The Hurwitz zeta function, or the Riemann zeta function of two arguments.
712 ``x`` must be greater than one and ``q`` must be positive.
714 This function repeatedly computes subsequent partial sums until
715 convergence, as decided by ``tolerance``.
716 """
717 z = 0
718 z_prev = -float("inf")
719 k = 0
720 while abs(z - z_prev) > tolerance:
721 z_prev = z
722 z += 1 / ((k + q) ** x)
723 k += 1
724 return z
727def _generate_min_degree(gamma, average_degree, max_degree, tolerance, max_iters):
728 """Returns a minimum degree from the given average degree."""
729 # Defines zeta function whether or not Scipy is available
730 try:
731 from scipy.special import zeta
732 except ImportError:
734 def zeta(x, q):
735 return _hurwitz_zeta(x, q, tolerance)
737 min_deg_top = max_degree
738 min_deg_bot = 1
739 min_deg_mid = (min_deg_top - min_deg_bot) / 2 + min_deg_bot
740 itrs = 0
741 mid_avg_deg = 0
742 while abs(mid_avg_deg - average_degree) > tolerance:
743 if itrs > max_iters:
744 raise nx.ExceededMaxIterations("Could not match average_degree")
745 mid_avg_deg = 0
746 for x in range(int(min_deg_mid), max_degree + 1):
747 mid_avg_deg += (x ** (-gamma + 1)) / zeta(gamma, min_deg_mid)
748 if mid_avg_deg > average_degree:
749 min_deg_top = min_deg_mid
750 min_deg_mid = (min_deg_top - min_deg_bot) / 2 + min_deg_bot
751 else:
752 min_deg_bot = min_deg_mid
753 min_deg_mid = (min_deg_top - min_deg_bot) / 2 + min_deg_bot
754 itrs += 1
755 # return int(min_deg_mid + 0.5)
756 return round(min_deg_mid)
759def _generate_communities(degree_seq, community_sizes, mu, max_iters, seed):
760 """Returns a list of sets, each of which represents a community.
762 ``degree_seq`` is the degree sequence that must be met by the
763 graph.
765 ``community_sizes`` is the community size distribution that must be
766 met by the generated list of sets.
768 ``mu`` is a float in the interval [0, 1] indicating the fraction of
769 intra-community edges incident to each node.
771 ``max_iters`` is the number of times to try to add a node to a
772 community. This must be greater than the length of
773 ``degree_seq``, otherwise this function will always fail. If
774 the number of iterations exceeds this value,
775 :exc:`~networkx.exception.ExceededMaxIterations` is raised.
777 seed : integer, random_state, or None (default)
778 Indicator of random number generation state.
779 See :ref:`Randomness<randomness>`.
781 The communities returned by this are sets of integers in the set {0,
782 ..., *n* - 1}, where *n* is the length of ``degree_seq``.
784 """
785 # This assumes the nodes in the graph will be natural numbers.
786 result = [set() for _ in community_sizes]
787 n = len(degree_seq)
788 free = list(range(n))
789 for i in range(max_iters):
790 v = free.pop()
791 c = seed.choice(range(len(community_sizes)))
792 # s = int(degree_seq[v] * (1 - mu) + 0.5)
793 s = round(degree_seq[v] * (1 - mu))
794 # If the community is large enough, add the node to the chosen
795 # community. Otherwise, return it to the list of unaffiliated
796 # nodes.
797 if s < community_sizes[c]:
798 result[c].add(v)
799 else:
800 free.append(v)
801 # If the community is too big, remove a node from it.
802 if len(result[c]) > community_sizes[c]:
803 free.append(result[c].pop())
804 if not free:
805 return result
806 msg = "Could not assign communities; try increasing min_community"
807 raise nx.ExceededMaxIterations(msg)
810@py_random_state(11)
811@nx._dispatch(graphs=None)
812def LFR_benchmark_graph(
813 n,
814 tau1,
815 tau2,
816 mu,
817 average_degree=None,
818 min_degree=None,
819 max_degree=None,
820 min_community=None,
821 max_community=None,
822 tol=1.0e-7,
823 max_iters=500,
824 seed=None,
825):
826 r"""Returns the LFR benchmark graph.
828 This algorithm proceeds as follows:
830 1) Find a degree sequence with a power law distribution, and minimum
831 value ``min_degree``, which has approximate average degree
832 ``average_degree``. This is accomplished by either
834 a) specifying ``min_degree`` and not ``average_degree``,
835 b) specifying ``average_degree`` and not ``min_degree``, in which
836 case a suitable minimum degree will be found.
838 ``max_degree`` can also be specified, otherwise it will be set to
839 ``n``. Each node *u* will have $\mu \mathrm{deg}(u)$ edges
840 joining it to nodes in communities other than its own and $(1 -
841 \mu) \mathrm{deg}(u)$ edges joining it to nodes in its own
842 community.
843 2) Generate community sizes according to a power law distribution
844 with exponent ``tau2``. If ``min_community`` and
845 ``max_community`` are not specified they will be selected to be
846 ``min_degree`` and ``max_degree``, respectively. Community sizes
847 are generated until the sum of their sizes equals ``n``.
848 3) Each node will be randomly assigned a community with the
849 condition that the community is large enough for the node's
850 intra-community degree, $(1 - \mu) \mathrm{deg}(u)$ as
851 described in step 2. If a community grows too large, a random node
852 will be selected for reassignment to a new community, until all
853 nodes have been assigned a community.
854 4) Each node *u* then adds $(1 - \mu) \mathrm{deg}(u)$
855 intra-community edges and $\mu \mathrm{deg}(u)$ inter-community
856 edges.
858 Parameters
859 ----------
860 n : int
861 Number of nodes in the created graph.
863 tau1 : float
864 Power law exponent for the degree distribution of the created
865 graph. This value must be strictly greater than one.
867 tau2 : float
868 Power law exponent for the community size distribution in the
869 created graph. This value must be strictly greater than one.
871 mu : float
872 Fraction of inter-community edges incident to each node. This
873 value must be in the interval [0, 1].
875 average_degree : float
876 Desired average degree of nodes in the created graph. This value
877 must be in the interval [0, *n*]. Exactly one of this and
878 ``min_degree`` must be specified, otherwise a
879 :exc:`NetworkXError` is raised.
881 min_degree : int
882 Minimum degree of nodes in the created graph. This value must be
883 in the interval [0, *n*]. Exactly one of this and
884 ``average_degree`` must be specified, otherwise a
885 :exc:`NetworkXError` is raised.
887 max_degree : int
888 Maximum degree of nodes in the created graph. If not specified,
889 this is set to ``n``, the total number of nodes in the graph.
891 min_community : int
892 Minimum size of communities in the graph. If not specified, this
893 is set to ``min_degree``.
895 max_community : int
896 Maximum size of communities in the graph. If not specified, this
897 is set to ``n``, the total number of nodes in the graph.
899 tol : float
900 Tolerance when comparing floats, specifically when comparing
901 average degree values.
903 max_iters : int
904 Maximum number of iterations to try to create the community sizes,
905 degree distribution, and community affiliations.
907 seed : integer, random_state, or None (default)
908 Indicator of random number generation state.
909 See :ref:`Randomness<randomness>`.
911 Returns
912 -------
913 G : NetworkX graph
914 The LFR benchmark graph generated according to the specified
915 parameters.
917 Each node in the graph has a node attribute ``'community'`` that
918 stores the community (that is, the set of nodes) that includes
919 it.
921 Raises
922 ------
923 NetworkXError
924 If any of the parameters do not meet their upper and lower bounds:
926 - ``tau1`` and ``tau2`` must be strictly greater than 1.
927 - ``mu`` must be in [0, 1].
928 - ``max_degree`` must be in {1, ..., *n*}.
929 - ``min_community`` and ``max_community`` must be in {0, ...,
930 *n*}.
932 If not exactly one of ``average_degree`` and ``min_degree`` is
933 specified.
935 If ``min_degree`` is not specified and a suitable ``min_degree``
936 cannot be found.
938 ExceededMaxIterations
939 If a valid degree sequence cannot be created within
940 ``max_iters`` number of iterations.
942 If a valid set of community sizes cannot be created within
943 ``max_iters`` number of iterations.
945 If a valid community assignment cannot be created within ``10 *
946 n * max_iters`` number of iterations.
948 Examples
949 --------
950 Basic usage::
952 >>> from networkx.generators.community import LFR_benchmark_graph
953 >>> n = 250
954 >>> tau1 = 3
955 >>> tau2 = 1.5
956 >>> mu = 0.1
957 >>> G = LFR_benchmark_graph(
958 ... n, tau1, tau2, mu, average_degree=5, min_community=20, seed=10
959 ... )
961 Continuing the example above, you can get the communities from the
962 node attributes of the graph::
964 >>> communities = {frozenset(G.nodes[v]["community"]) for v in G}
966 Notes
967 -----
968 This algorithm differs slightly from the original way it was
969 presented in [1].
971 1) Rather than connecting the graph via a configuration model then
972 rewiring to match the intra-community and inter-community
973 degrees, we do this wiring explicitly at the end, which should be
974 equivalent.
975 2) The code posted on the author's website [2] calculates the random
976 power law distributed variables and their average using
977 continuous approximations, whereas we use the discrete
978 distributions here as both degree and community size are
979 discrete.
981 Though the authors describe the algorithm as quite robust, testing
982 during development indicates that a somewhat narrower parameter set
983 is likely to successfully produce a graph. Some suggestions have
984 been provided in the event of exceptions.
986 References
987 ----------
988 .. [1] "Benchmark graphs for testing community detection algorithms",
989 Andrea Lancichinetti, Santo Fortunato, and Filippo Radicchi,
990 Phys. Rev. E 78, 046110 2008
991 .. [2] https://www.santofortunato.net/resources
993 """
994 # Perform some basic parameter validation.
995 if not tau1 > 1:
996 raise nx.NetworkXError("tau1 must be greater than one")
997 if not tau2 > 1:
998 raise nx.NetworkXError("tau2 must be greater than one")
999 if not 0 <= mu <= 1:
1000 raise nx.NetworkXError("mu must be in the interval [0, 1]")
1002 # Validate parameters for generating the degree sequence.
1003 if max_degree is None:
1004 max_degree = n
1005 elif not 0 < max_degree <= n:
1006 raise nx.NetworkXError("max_degree must be in the interval (0, n]")
1007 if not ((min_degree is None) ^ (average_degree is None)):
1008 raise nx.NetworkXError(
1009 "Must assign exactly one of min_degree and" " average_degree"
1010 )
1011 if min_degree is None:
1012 min_degree = _generate_min_degree(
1013 tau1, average_degree, max_degree, tol, max_iters
1014 )
1016 # Generate a degree sequence with a power law distribution.
1017 low, high = min_degree, max_degree
1019 def condition(seq):
1020 return sum(seq) % 2 == 0
1022 def length(seq):
1023 return len(seq) >= n
1025 deg_seq = _powerlaw_sequence(tau1, low, high, condition, length, max_iters, seed)
1027 # Validate parameters for generating the community size sequence.
1028 if min_community is None:
1029 min_community = min(deg_seq)
1030 if max_community is None:
1031 max_community = max(deg_seq)
1033 # Generate a community size sequence with a power law distribution.
1034 #
1035 # TODO The original code incremented the number of iterations each
1036 # time a new Zipf random value was drawn from the distribution. This
1037 # differed from the way the number of iterations was incremented in
1038 # `_powerlaw_degree_sequence`, so this code was changed to match
1039 # that one. As a result, this code is allowed many more chances to
1040 # generate a valid community size sequence.
1041 low, high = min_community, max_community
1043 def condition(seq):
1044 return sum(seq) == n
1046 def length(seq):
1047 return sum(seq) >= n
1049 comms = _powerlaw_sequence(tau2, low, high, condition, length, max_iters, seed)
1051 # Generate the communities based on the given degree sequence and
1052 # community sizes.
1053 max_iters *= 10 * n
1054 communities = _generate_communities(deg_seq, comms, mu, max_iters, seed)
1056 # Finally, generate the benchmark graph based on the given
1057 # communities, joining nodes according to the intra- and
1058 # inter-community degrees.
1059 G = nx.Graph()
1060 G.add_nodes_from(range(n))
1061 for c in communities:
1062 for u in c:
1063 while G.degree(u) < round(deg_seq[u] * (1 - mu)):
1064 v = seed.choice(list(c))
1065 G.add_edge(u, v)
1066 while G.degree(u) < deg_seq[u]:
1067 v = seed.choice(range(n))
1068 if v not in c:
1069 G.add_edge(u, v)
1070 G.nodes[u]["community"] = c
1071 return G