Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/networkx/generators/community.py: 14%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

270 statements  

1"""Generators for classes of graphs used in studying social networks.""" 

2 

3import itertools 

4import math 

5 

6import networkx as nx 

7from networkx.utils import py_random_state 

8 

9__all__ = [ 

10 "caveman_graph", 

11 "connected_caveman_graph", 

12 "relaxed_caveman_graph", 

13 "random_partition_graph", 

14 "planted_partition_graph", 

15 "gaussian_random_partition_graph", 

16 "ring_of_cliques", 

17 "windmill_graph", 

18 "stochastic_block_model", 

19 "LFR_benchmark_graph", 

20] 

21 

22 

23@nx._dispatchable(graphs=None, returns_graph=True) 

24def caveman_graph(l, k): 

25 """Returns a caveman graph of `l` cliques of size `k`. 

26 

27 Parameters 

28 ---------- 

29 l : int 

30 Number of cliques 

31 k : int 

32 Size of cliques 

33 

34 Returns 

35 ------- 

36 G : NetworkX Graph 

37 caveman graph 

38 

39 Notes 

40 ----- 

41 This returns an undirected graph, it can be converted to a directed 

42 graph using :func:`nx.to_directed`, or a multigraph using 

43 ``nx.MultiGraph(nx.caveman_graph(l, k))``. Only the undirected version is 

44 described in [1]_ and it is unclear which of the directed 

45 generalizations is most useful. 

46 

47 Examples 

48 -------- 

49 >>> G = nx.caveman_graph(3, 3) 

50 

51 See also 

52 -------- 

53 

54 connected_caveman_graph 

55 

56 References 

57 ---------- 

58 .. [1] Watts, D. J. 'Networks, Dynamics, and the Small-World Phenomenon.' 

59 Amer. J. Soc. 105, 493-527, 1999. 

60 """ 

61 # l disjoint cliques of size k 

62 G = nx.empty_graph(l * k) 

63 if k > 1: 

64 for start in range(0, l * k, k): 

65 edges = itertools.combinations(range(start, start + k), 2) 

66 G.add_edges_from(edges) 

67 return G 

68 

69 

70@nx._dispatchable(graphs=None, returns_graph=True) 

71def connected_caveman_graph(l, k): 

72 """Returns a connected caveman graph of `l` cliques of size `k`. 

73 

74 The connected caveman graph is formed by creating `n` cliques of size 

75 `k`, then a single edge in each clique is rewired to a node in an 

76 adjacent clique. 

77 

78 Parameters 

79 ---------- 

80 l : int 

81 number of cliques 

82 k : int 

83 size of cliques (k at least 2 or NetworkXError is raised) 

84 

85 Returns 

86 ------- 

87 G : NetworkX Graph 

88 connected caveman graph 

89 

90 Raises 

91 ------ 

92 NetworkXError 

93 If the size of cliques `k` is smaller than 2. 

94 

95 Notes 

96 ----- 

97 This returns an undirected graph, it can be converted to a directed 

98 graph using :func:`nx.to_directed`, or a multigraph using 

99 ``nx.MultiGraph(nx.caveman_graph(l, k))``. Only the undirected version is 

100 described in [1]_ and it is unclear which of the directed 

101 generalizations is most useful. 

102 

103 Examples 

104 -------- 

105 >>> G = nx.connected_caveman_graph(3, 3) 

106 

107 References 

108 ---------- 

109 .. [1] Watts, D. J. 'Networks, Dynamics, and the Small-World Phenomenon.' 

110 Amer. J. Soc. 105, 493-527, 1999. 

111 """ 

112 if k < 2: 

113 raise nx.NetworkXError( 

114 "The size of cliques in a connected caveman graph must be at least 2." 

115 ) 

116 

117 G = nx.caveman_graph(l, k) 

118 for start in range(0, l * k, k): 

119 G.remove_edge(start, start + 1) 

120 G.add_edge(start, (start - 1) % (l * k)) 

121 return G 

122 

123 

124@py_random_state(3) 

125@nx._dispatchable(graphs=None, returns_graph=True) 

126def relaxed_caveman_graph(l, k, p, seed=None): 

127 """Returns a relaxed caveman graph. 

128 

129 A relaxed caveman graph starts with `l` cliques of size `k`. Edges are 

130 then randomly rewired with probability `p` to link different cliques. 

131 

132 Parameters 

133 ---------- 

134 l : int 

135 Number of groups 

136 k : int 

137 Size of cliques 

138 p : float 

139 Probability of rewiring each edge. 

140 seed : integer, random_state, or None (default) 

141 Indicator of random number generation state. 

142 See :ref:`Randomness<randomness>`. 

143 

144 Returns 

145 ------- 

146 G : NetworkX Graph 

147 Relaxed Caveman Graph 

148 

149 Raises 

150 ------ 

151 NetworkXError 

152 If p is not in [0,1] 

153 

154 Examples 

155 -------- 

156 >>> G = nx.relaxed_caveman_graph(2, 3, 0.1, seed=42) 

157 

158 References 

159 ---------- 

160 .. [1] Santo Fortunato, Community Detection in Graphs, 

161 Physics Reports Volume 486, Issues 3-5, February 2010, Pages 75-174. 

162 https://arxiv.org/abs/0906.0612 

163 """ 

164 G = nx.caveman_graph(l, k) 

165 nodes = list(G) 

166 for u, v in G.edges(): 

167 if seed.random() < p: # rewire the edge 

168 x = seed.choice(nodes) 

169 if G.has_edge(u, x): 

170 continue 

171 G.remove_edge(u, v) 

172 G.add_edge(u, x) 

173 return G 

174 

175 

176@py_random_state(3) 

177@nx._dispatchable(graphs=None, returns_graph=True) 

178def random_partition_graph(sizes, p_in, p_out, seed=None, directed=False): 

179 """Returns the random partition graph with a partition of sizes. 

180 

181 A partition graph is a graph of communities with sizes defined by 

182 s in sizes. Nodes in the same group are connected with probability 

183 p_in and nodes of different groups are connected with probability 

184 p_out. 

185 

186 Parameters 

187 ---------- 

188 sizes : list of ints 

189 Sizes of groups 

190 p_in : float 

191 probability of edges with in groups 

192 p_out : float 

193 probability of edges between groups 

194 directed : boolean optional, default=False 

195 Whether to create a directed graph 

196 seed : integer, random_state, or None (default) 

197 Indicator of random number generation state. 

198 See :ref:`Randomness<randomness>`. 

199 

200 Returns 

201 ------- 

202 G : NetworkX Graph or DiGraph 

203 random partition graph of size sum(gs) 

204 

205 Raises 

206 ------ 

207 NetworkXError 

208 If p_in or p_out is not in [0,1] 

209 

210 Examples 

211 -------- 

212 >>> G = nx.random_partition_graph([10, 10, 10], 0.25, 0.01) 

213 >>> len(G) 

214 30 

215 >>> partition = G.graph["partition"] 

216 >>> len(partition) 

217 3 

218 

219 Notes 

220 ----- 

221 This is a generalization of the planted-l-partition described in 

222 [1]_. It allows for the creation of groups of any size. 

223 

224 The partition is store as a graph attribute 'partition'. 

225 

226 References 

227 ---------- 

228 .. [1] Santo Fortunato 'Community Detection in Graphs' Physical Reports 

229 Volume 486, Issue 3-5 p. 75-174. https://arxiv.org/abs/0906.0612 

230 """ 

231 # Use geometric method for O(n+m) complexity algorithm 

232 # partition = nx.community_sets(nx.get_node_attributes(G, 'affiliation')) 

233 if not 0.0 <= p_in <= 1.0: 

234 raise nx.NetworkXError("p_in must be in [0,1]") 

235 if not 0.0 <= p_out <= 1.0: 

236 raise nx.NetworkXError("p_out must be in [0,1]") 

237 

238 # create connection matrix 

239 num_blocks = len(sizes) 

240 p = [[p_out for s in range(num_blocks)] for r in range(num_blocks)] 

241 for r in range(num_blocks): 

242 p[r][r] = p_in 

243 

244 return stochastic_block_model( 

245 sizes, 

246 p, 

247 nodelist=None, 

248 seed=seed, 

249 directed=directed, 

250 selfloops=False, 

251 sparse=True, 

252 ) 

253 

254 

255@py_random_state(4) 

256@nx._dispatchable(graphs=None, returns_graph=True) 

257def planted_partition_graph(l, k, p_in, p_out, seed=None, directed=False): 

258 """Returns the planted l-partition graph. 

259 

260 This model partitions a graph with n=l*k vertices in 

261 l groups with k vertices each. Vertices of the same 

262 group are linked with a probability p_in, and vertices 

263 of different groups are linked with probability p_out. 

264 

265 Parameters 

266 ---------- 

267 l : int 

268 Number of groups 

269 k : int 

270 Number of vertices in each group 

271 p_in : float 

272 probability of connecting vertices within a group 

273 p_out : float 

274 probability of connected vertices between groups 

275 seed : integer, random_state, or None (default) 

276 Indicator of random number generation state. 

277 See :ref:`Randomness<randomness>`. 

278 directed : bool,optional (default=False) 

279 If True return a directed graph 

280 

281 Returns 

282 ------- 

283 G : NetworkX Graph or DiGraph 

284 planted l-partition graph 

285 

286 Raises 

287 ------ 

288 NetworkXError 

289 If `p_in`, `p_out` are not in `[0, 1]` 

290 

291 Examples 

292 -------- 

293 >>> G = nx.planted_partition_graph(4, 3, 0.5, 0.1, seed=42) 

294 

295 See Also 

296 -------- 

297 random_partition_model 

298 

299 References 

300 ---------- 

301 .. [1] A. Condon, R.M. Karp, Algorithms for graph partitioning 

302 on the planted partition model, 

303 Random Struct. Algor. 18 (2001) 116-140. 

304 

305 .. [2] Santo Fortunato 'Community Detection in Graphs' Physical Reports 

306 Volume 486, Issue 3-5 p. 75-174. https://arxiv.org/abs/0906.0612 

307 """ 

308 return random_partition_graph([k] * l, p_in, p_out, seed=seed, directed=directed) 

309 

310 

311@py_random_state(6) 

312@nx._dispatchable(graphs=None, returns_graph=True) 

313def gaussian_random_partition_graph(n, s, v, p_in, p_out, directed=False, seed=None): 

314 """Generate a Gaussian random partition graph. 

315 

316 A Gaussian random partition graph is created by creating k partitions 

317 each with a size drawn from a normal distribution with mean s and variance 

318 s/v. Nodes are connected within clusters with probability p_in and 

319 between clusters with probability p_out[1] 

320 

321 Parameters 

322 ---------- 

323 n : int 

324 Number of nodes in the graph 

325 s : float 

326 Mean cluster size 

327 v : float 

328 Shape parameter. The variance of cluster size distribution is s/v. 

329 p_in : float 

330 Probability of intra cluster connection. 

331 p_out : float 

332 Probability of inter cluster connection. 

333 directed : boolean, optional default=False 

334 Whether to create a directed graph or not 

335 seed : integer, random_state, or None (default) 

336 Indicator of random number generation state. 

337 See :ref:`Randomness<randomness>`. 

338 

339 Returns 

340 ------- 

341 G : NetworkX Graph or DiGraph 

342 gaussian random partition graph 

343 

344 Raises 

345 ------ 

346 NetworkXError 

347 If s is > n 

348 If p_in or p_out is not in [0,1] 

349 

350 Notes 

351 ----- 

352 Note the number of partitions is dependent on s,v and n, and that the 

353 last partition may be considerably smaller, as it is sized to simply 

354 fill out the nodes [1] 

355 

356 See Also 

357 -------- 

358 random_partition_graph 

359 

360 Examples 

361 -------- 

362 >>> G = nx.gaussian_random_partition_graph(100, 10, 10, 0.25, 0.1) 

363 >>> len(G) 

364 100 

365 

366 References 

367 ---------- 

368 .. [1] Ulrik Brandes, Marco Gaertler, Dorothea Wagner, 

369 Experiments on Graph Clustering Algorithms, 

370 In the proceedings of the 11th Europ. Symp. Algorithms, 2003. 

371 """ 

372 if s > n: 

373 raise nx.NetworkXError("s must be <= n") 

374 assigned = 0 

375 sizes = [] 

376 while True: 

377 size = int(seed.gauss(s, s / v + 0.5)) 

378 if size < 1: # how to handle 0 or negative sizes? 

379 continue 

380 if assigned + size >= n: 

381 sizes.append(n - assigned) 

382 break 

383 assigned += size 

384 sizes.append(size) 

385 return random_partition_graph(sizes, p_in, p_out, seed=seed, directed=directed) 

386 

387 

388@nx._dispatchable(graphs=None, returns_graph=True) 

389def ring_of_cliques(num_cliques, clique_size): 

390 """Defines a "ring of cliques" graph. 

391 

392 A ring of cliques graph is consisting of cliques, connected through single 

393 links. Each clique is a complete graph. 

394 

395 Parameters 

396 ---------- 

397 num_cliques : int 

398 Number of cliques 

399 clique_size : int 

400 Size of cliques 

401 

402 Returns 

403 ------- 

404 G : NetworkX Graph 

405 ring of cliques graph 

406 

407 Raises 

408 ------ 

409 NetworkXError 

410 If the number of cliques is lower than 2 or 

411 if the size of cliques is smaller than 2. 

412 

413 Examples 

414 -------- 

415 >>> G = nx.ring_of_cliques(8, 4) 

416 

417 See Also 

418 -------- 

419 connected_caveman_graph 

420 

421 Notes 

422 ----- 

423 The `connected_caveman_graph` graph removes a link from each clique to 

424 connect it with the next clique. Instead, the `ring_of_cliques` graph 

425 simply adds the link without removing any link from the cliques. 

426 """ 

427 if num_cliques < 2: 

428 raise nx.NetworkXError("A ring of cliques must have at least two cliques") 

429 if clique_size < 2: 

430 raise nx.NetworkXError("The cliques must have at least two nodes") 

431 

432 G = nx.Graph() 

433 for i in range(num_cliques): 

434 edges = itertools.combinations( 

435 range(i * clique_size, i * clique_size + clique_size), 2 

436 ) 

437 G.add_edges_from(edges) 

438 G.add_edge( 

439 i * clique_size + 1, (i + 1) * clique_size % (num_cliques * clique_size) 

440 ) 

441 return G 

442 

443 

444@nx._dispatchable(graphs=None, returns_graph=True) 

445def windmill_graph(n, k): 

446 """Generate a windmill graph. 

447 A windmill graph is a graph of `n` cliques each of size `k` that are all 

448 joined at one node. 

449 It can be thought of as taking a disjoint union of `n` cliques of size `k`, 

450 selecting one point from each, and contracting all of the selected points. 

451 Alternatively, one could generate `n` cliques of size `k-1` and one node 

452 that is connected to all other nodes in the graph. 

453 

454 Parameters 

455 ---------- 

456 n : int 

457 Number of cliques 

458 k : int 

459 Size of cliques 

460 

461 Returns 

462 ------- 

463 G : NetworkX Graph 

464 windmill graph with n cliques of size k 

465 

466 Raises 

467 ------ 

468 NetworkXError 

469 If the number of cliques is less than two 

470 If the size of the cliques are less than two 

471 

472 Examples 

473 -------- 

474 >>> G = nx.windmill_graph(4, 5) 

475 

476 Notes 

477 ----- 

478 The node labeled `0` will be the node connected to all other nodes. 

479 Note that windmill graphs are usually denoted `Wd(k,n)`, so the parameters 

480 are in the opposite order as the parameters of this method. 

481 """ 

482 if n < 2: 

483 msg = "A windmill graph must have at least two cliques" 

484 raise nx.NetworkXError(msg) 

485 if k < 2: 

486 raise nx.NetworkXError("The cliques must have at least two nodes") 

487 

488 G = nx.disjoint_union_all( 

489 itertools.chain( 

490 [nx.complete_graph(k)], (nx.complete_graph(k - 1) for _ in range(n - 1)) 

491 ) 

492 ) 

493 G.add_edges_from((0, i) for i in range(k, G.number_of_nodes())) 

494 return G 

495 

496 

497@py_random_state(3) 

498@nx._dispatchable(graphs=None, returns_graph=True) 

499def stochastic_block_model( 

500 sizes, p, nodelist=None, seed=None, directed=False, selfloops=False, sparse=True 

501): 

502 """Returns a stochastic block model graph. 

503 

504 This model partitions the nodes in blocks of arbitrary sizes, and places 

505 edges between pairs of nodes independently, with a probability that depends 

506 on the blocks. 

507 

508 Parameters 

509 ---------- 

510 sizes : list of ints 

511 Sizes of blocks 

512 p : list of list of floats 

513 Element (r,s) gives the density of edges going from the nodes 

514 of group r to nodes of group s. 

515 p must match the number of groups (len(sizes) == len(p)), 

516 and it must be symmetric if the graph is undirected. 

517 nodelist : list, optional 

518 The block tags are assigned according to the node identifiers 

519 in nodelist. If nodelist is None, then the ordering is the 

520 range [0,sum(sizes)-1]. 

521 seed : integer, random_state, or None (default) 

522 Indicator of random number generation state. 

523 See :ref:`Randomness<randomness>`. 

524 directed : boolean optional, default=False 

525 Whether to create a directed graph or not. 

526 selfloops : boolean optional, default=False 

527 Whether to include self-loops or not. 

528 sparse: boolean optional, default=True 

529 Use the sparse heuristic to speed up the generator. 

530 

531 Returns 

532 ------- 

533 g : NetworkX Graph or DiGraph 

534 Stochastic block model graph of size sum(sizes) 

535 

536 Raises 

537 ------ 

538 NetworkXError 

539 If probabilities are not in [0,1]. 

540 If the probability matrix is not square (directed case). 

541 If the probability matrix is not symmetric (undirected case). 

542 If the sizes list does not match nodelist or the probability matrix. 

543 If nodelist contains duplicate. 

544 

545 Examples 

546 -------- 

547 >>> sizes = [75, 75, 300] 

548 >>> probs = [[0.25, 0.05, 0.02], [0.05, 0.35, 0.07], [0.02, 0.07, 0.40]] 

549 >>> g = nx.stochastic_block_model(sizes, probs, seed=0) 

550 >>> len(g) 

551 450 

552 >>> H = nx.quotient_graph(g, g.graph["partition"], relabel=True) 

553 >>> for v in H.nodes(data=True): 

554 ... print(round(v[1]["density"], 3)) 

555 0.245 

556 0.348 

557 0.405 

558 >>> for v in H.edges(data=True): 

559 ... print(round(1.0 * v[2]["weight"] / (sizes[v[0]] * sizes[v[1]]), 3)) 

560 0.051 

561 0.022 

562 0.07 

563 

564 See Also 

565 -------- 

566 random_partition_graph 

567 planted_partition_graph 

568 gaussian_random_partition_graph 

569 gnp_random_graph 

570 

571 References 

572 ---------- 

573 .. [1] Holland, P. W., Laskey, K. B., & Leinhardt, S., 

574 "Stochastic blockmodels: First steps", 

575 Social networks, 5(2), 109-137, 1983. 

576 """ 

577 # Check if dimensions match 

578 if len(sizes) != len(p): 

579 raise nx.NetworkXException("'sizes' and 'p' do not match.") 

580 # Check for probability symmetry (undirected) and shape (directed) 

581 for row in p: 

582 if len(p) != len(row): 

583 raise nx.NetworkXException("'p' must be a square matrix.") 

584 if not directed: 

585 p_transpose = [list(i) for i in zip(*p)] 

586 for i in zip(p, p_transpose): 

587 for j in zip(i[0], i[1]): 

588 if abs(j[0] - j[1]) > 1e-08: 

589 raise nx.NetworkXException("'p' must be symmetric.") 

590 # Check for probability range 

591 for row in p: 

592 for prob in row: 

593 if prob < 0 or prob > 1: 

594 raise nx.NetworkXException("Entries of 'p' not in [0,1].") 

595 # Check for nodelist consistency 

596 if nodelist is not None: 

597 if len(nodelist) != sum(sizes): 

598 raise nx.NetworkXException("'nodelist' and 'sizes' do not match.") 

599 if len(nodelist) != len(set(nodelist)): 

600 raise nx.NetworkXException("nodelist contains duplicate.") 

601 else: 

602 nodelist = range(sum(sizes)) 

603 

604 # Setup the graph conditionally to the directed switch. 

605 block_range = range(len(sizes)) 

606 if directed: 

607 g = nx.DiGraph() 

608 block_iter = itertools.product(block_range, block_range) 

609 else: 

610 g = nx.Graph() 

611 block_iter = itertools.combinations_with_replacement(block_range, 2) 

612 # Split nodelist in a partition (list of sets). 

613 size_cumsum = [sum(sizes[0:x]) for x in range(len(sizes) + 1)] 

614 g.graph["partition"] = [ 

615 set(nodelist[size_cumsum[x] : size_cumsum[x + 1]]) 

616 for x in range(len(size_cumsum) - 1) 

617 ] 

618 # Setup nodes and graph name 

619 for block_id, nodes in enumerate(g.graph["partition"]): 

620 for node in nodes: 

621 g.add_node(node, block=block_id) 

622 

623 g.name = "stochastic_block_model" 

624 

625 # Test for edge existence 

626 parts = g.graph["partition"] 

627 for i, j in block_iter: 

628 if i == j: 

629 if directed: 

630 if selfloops: 

631 edges = itertools.product(parts[i], parts[i]) 

632 else: 

633 edges = itertools.permutations(parts[i], 2) 

634 else: 

635 edges = itertools.combinations(parts[i], 2) 

636 if selfloops: 

637 edges = itertools.chain(edges, zip(parts[i], parts[i])) 

638 for e in edges: 

639 if seed.random() < p[i][j]: 

640 g.add_edge(*e) 

641 else: 

642 edges = itertools.product(parts[i], parts[j]) 

643 if sparse: 

644 if p[i][j] == 1: # Test edges cases p_ij = 0 or 1 

645 for e in edges: 

646 g.add_edge(*e) 

647 elif p[i][j] > 0: 

648 while True: 

649 try: 

650 logrand = math.log(seed.random()) 

651 skip = math.floor(logrand / math.log(1 - p[i][j])) 

652 # consume "skip" edges 

653 next(itertools.islice(edges, skip, skip), None) 

654 e = next(edges) 

655 g.add_edge(*e) # __safe 

656 except StopIteration: 

657 break 

658 else: 

659 for e in edges: 

660 if seed.random() < p[i][j]: 

661 g.add_edge(*e) # __safe 

662 return g 

663 

664 

665def _zipf_rv_below(gamma, xmin, threshold, seed): 

666 """Returns a random value chosen from the bounded Zipf distribution. 

667 

668 Repeatedly draws values from the Zipf distribution until the 

669 threshold is met, then returns that value. 

670 """ 

671 result = nx.utils.zipf_rv(gamma, xmin, seed) 

672 while result > threshold: 

673 result = nx.utils.zipf_rv(gamma, xmin, seed) 

674 return result 

675 

676 

677def _powerlaw_sequence(gamma, low, high, condition, length, max_iters, seed): 

678 """Returns a list of numbers obeying a constrained power law distribution. 

679 

680 ``gamma`` and ``low`` are the parameters for the Zipf distribution. 

681 

682 ``high`` is the maximum allowed value for values draw from the Zipf 

683 distribution. For more information, see :func:`_zipf_rv_below`. 

684 

685 ``condition`` and ``length`` are Boolean-valued functions on 

686 lists. While generating the list, random values are drawn and 

687 appended to the list until ``length`` is satisfied by the created 

688 list. Once ``condition`` is satisfied, the sequence generated in 

689 this way is returned. 

690 

691 ``max_iters`` indicates the number of times to generate a list 

692 satisfying ``length``. If the number of iterations exceeds this 

693 value, :exc:`~networkx.exception.ExceededMaxIterations` is raised. 

694 

695 seed : integer, random_state, or None (default) 

696 Indicator of random number generation state. 

697 See :ref:`Randomness<randomness>`. 

698 """ 

699 for i in range(max_iters): 

700 seq = [] 

701 while not length(seq): 

702 seq.append(_zipf_rv_below(gamma, low, high, seed)) 

703 if condition(seq): 

704 return seq 

705 raise nx.ExceededMaxIterations("Could not create power law sequence") 

706 

707 

708def _hurwitz_zeta(x, q, tolerance): 

709 """The Hurwitz zeta function, or the Riemann zeta function of two arguments. 

710 

711 ``x`` must be greater than one and ``q`` must be positive. 

712 

713 This function repeatedly computes subsequent partial sums until 

714 convergence, as decided by ``tolerance``. 

715 """ 

716 z = 0 

717 z_prev = -float("inf") 

718 k = 0 

719 while abs(z - z_prev) > tolerance: 

720 z_prev = z 

721 z += 1 / ((k + q) ** x) 

722 k += 1 

723 return z 

724 

725 

726def _generate_min_degree(gamma, average_degree, max_degree, tolerance, max_iters): 

727 """Returns a minimum degree from the given average degree.""" 

728 # Defines zeta function whether or not Scipy is available 

729 try: 

730 from scipy.special import zeta 

731 except ImportError: 

732 

733 def zeta(x, q): 

734 return _hurwitz_zeta(x, q, tolerance) 

735 

736 min_deg_top = max_degree 

737 min_deg_bot = 1 

738 min_deg_mid = (min_deg_top - min_deg_bot) / 2 + min_deg_bot 

739 itrs = 0 

740 mid_avg_deg = 0 

741 while abs(mid_avg_deg - average_degree) > tolerance: 

742 if itrs > max_iters: 

743 raise nx.ExceededMaxIterations("Could not match average_degree") 

744 mid_avg_deg = 0 

745 for x in range(int(min_deg_mid), max_degree + 1): 

746 mid_avg_deg += (x ** (-gamma + 1)) / zeta(gamma, min_deg_mid) 

747 if mid_avg_deg > average_degree: 

748 min_deg_top = min_deg_mid 

749 min_deg_mid = (min_deg_top - min_deg_bot) / 2 + min_deg_bot 

750 else: 

751 min_deg_bot = min_deg_mid 

752 min_deg_mid = (min_deg_top - min_deg_bot) / 2 + min_deg_bot 

753 itrs += 1 

754 # return int(min_deg_mid + 0.5) 

755 return round(min_deg_mid) 

756 

757 

758def _generate_communities(degree_seq, community_sizes, mu, max_iters, seed): 

759 """Returns a list of sets, each of which represents a community. 

760 

761 ``degree_seq`` is the degree sequence that must be met by the 

762 graph. 

763 

764 ``community_sizes`` is the community size distribution that must be 

765 met by the generated list of sets. 

766 

767 ``mu`` is a float in the interval [0, 1] indicating the fraction of 

768 intra-community edges incident to each node. 

769 

770 ``max_iters`` is the number of times to try to add a node to a 

771 community. This must be greater than the length of 

772 ``degree_seq``, otherwise this function will always fail. If 

773 the number of iterations exceeds this value, 

774 :exc:`~networkx.exception.ExceededMaxIterations` is raised. 

775 

776 seed : integer, random_state, or None (default) 

777 Indicator of random number generation state. 

778 See :ref:`Randomness<randomness>`. 

779 

780 The communities returned by this are sets of integers in the set {0, 

781 ..., *n* - 1}, where *n* is the length of ``degree_seq``. 

782 

783 """ 

784 # This assumes the nodes in the graph will be natural numbers. 

785 result = [set() for _ in community_sizes] 

786 n = len(degree_seq) 

787 free = list(range(n)) 

788 for i in range(max_iters): 

789 v = free.pop() 

790 c = seed.choice(range(len(community_sizes))) 

791 # s = int(degree_seq[v] * (1 - mu) + 0.5) 

792 s = round(degree_seq[v] * (1 - mu)) 

793 # If the community is large enough, add the node to the chosen 

794 # community. Otherwise, return it to the list of unaffiliated 

795 # nodes. 

796 if s < community_sizes[c]: 

797 result[c].add(v) 

798 else: 

799 free.append(v) 

800 # If the community is too big, remove a node from it. 

801 if len(result[c]) > community_sizes[c]: 

802 free.append(result[c].pop()) 

803 if not free: 

804 return result 

805 msg = "Could not assign communities; try increasing min_community" 

806 raise nx.ExceededMaxIterations(msg) 

807 

808 

809@py_random_state(11) 

810@nx._dispatchable(graphs=None, returns_graph=True) 

811def LFR_benchmark_graph( 

812 n, 

813 tau1, 

814 tau2, 

815 mu, 

816 average_degree=None, 

817 min_degree=None, 

818 max_degree=None, 

819 min_community=None, 

820 max_community=None, 

821 tol=1.0e-7, 

822 max_iters=500, 

823 seed=None, 

824): 

825 r"""Returns the LFR benchmark graph. 

826 

827 This algorithm proceeds as follows: 

828 

829 1) Find a degree sequence with a power law distribution, and minimum 

830 value ``min_degree``, which has approximate average degree 

831 ``average_degree``. This is accomplished by either 

832 

833 a) specifying ``min_degree`` and not ``average_degree``, 

834 b) specifying ``average_degree`` and not ``min_degree``, in which 

835 case a suitable minimum degree will be found. 

836 

837 ``max_degree`` can also be specified, otherwise it will be set to 

838 ``n``. Each node *u* will have $\mu \mathrm{deg}(u)$ edges 

839 joining it to nodes in communities other than its own and $(1 - 

840 \mu) \mathrm{deg}(u)$ edges joining it to nodes in its own 

841 community. 

842 2) Generate community sizes according to a power law distribution 

843 with exponent ``tau2``. If ``min_community`` and 

844 ``max_community`` are not specified they will be selected to be 

845 ``min_degree`` and ``max_degree``, respectively. Community sizes 

846 are generated until the sum of their sizes equals ``n``. 

847 3) Each node will be randomly assigned a community with the 

848 condition that the community is large enough for the node's 

849 intra-community degree, $(1 - \mu) \mathrm{deg}(u)$ as 

850 described in step 2. If a community grows too large, a random node 

851 will be selected for reassignment to a new community, until all 

852 nodes have been assigned a community. 

853 4) Each node *u* then adds $(1 - \mu) \mathrm{deg}(u)$ 

854 intra-community edges and $\mu \mathrm{deg}(u)$ inter-community 

855 edges. 

856 

857 Parameters 

858 ---------- 

859 n : int 

860 Number of nodes in the created graph. 

861 

862 tau1 : float 

863 Power law exponent for the degree distribution of the created 

864 graph. This value must be strictly greater than one. 

865 

866 tau2 : float 

867 Power law exponent for the community size distribution in the 

868 created graph. This value must be strictly greater than one. 

869 

870 mu : float 

871 Fraction of inter-community edges incident to each node. This 

872 value must be in the interval [0, 1]. 

873 

874 average_degree : float 

875 Desired average degree of nodes in the created graph. This value 

876 must be in the interval [0, *n*]. Exactly one of this and 

877 ``min_degree`` must be specified, otherwise a 

878 :exc:`NetworkXError` is raised. 

879 

880 min_degree : int 

881 Minimum degree of nodes in the created graph. This value must be 

882 in the interval [0, *n*]. Exactly one of this and 

883 ``average_degree`` must be specified, otherwise a 

884 :exc:`NetworkXError` is raised. 

885 

886 max_degree : int 

887 Maximum degree of nodes in the created graph. If not specified, 

888 this is set to ``n``, the total number of nodes in the graph. 

889 

890 min_community : int 

891 Minimum size of communities in the graph. If not specified, this 

892 is set to ``min_degree``. 

893 

894 max_community : int 

895 Maximum size of communities in the graph. If not specified, this 

896 is set to ``n``, the total number of nodes in the graph. 

897 

898 tol : float 

899 Tolerance when comparing floats, specifically when comparing 

900 average degree values. 

901 

902 max_iters : int 

903 Maximum number of iterations to try to create the community sizes, 

904 degree distribution, and community affiliations. 

905 

906 seed : integer, random_state, or None (default) 

907 Indicator of random number generation state. 

908 See :ref:`Randomness<randomness>`. 

909 

910 Returns 

911 ------- 

912 G : NetworkX graph 

913 The LFR benchmark graph generated according to the specified 

914 parameters. 

915 

916 Each node in the graph has a node attribute ``'community'`` that 

917 stores the community (that is, the set of nodes) that includes 

918 it. 

919 

920 Raises 

921 ------ 

922 NetworkXError 

923 If any of the parameters do not meet their upper and lower bounds: 

924 

925 - ``tau1`` and ``tau2`` must be strictly greater than 1. 

926 - ``mu`` must be in [0, 1]. 

927 - ``max_degree`` must be in {1, ..., *n*}. 

928 - ``min_community`` and ``max_community`` must be in {0, ..., 

929 *n*}. 

930 

931 If not exactly one of ``average_degree`` and ``min_degree`` is 

932 specified. 

933 

934 If ``min_degree`` is not specified and a suitable ``min_degree`` 

935 cannot be found. 

936 

937 ExceededMaxIterations 

938 If a valid degree sequence cannot be created within 

939 ``max_iters`` number of iterations. 

940 

941 If a valid set of community sizes cannot be created within 

942 ``max_iters`` number of iterations. 

943 

944 If a valid community assignment cannot be created within ``10 * 

945 n * max_iters`` number of iterations. 

946 

947 Examples 

948 -------- 

949 Basic usage:: 

950 

951 >>> from networkx.generators.community import LFR_benchmark_graph 

952 >>> n = 250 

953 >>> tau1 = 3 

954 >>> tau2 = 1.5 

955 >>> mu = 0.1 

956 >>> G = LFR_benchmark_graph( 

957 ... n, tau1, tau2, mu, average_degree=5, min_community=20, seed=10 

958 ... ) 

959 

960 Continuing the example above, you can get the communities from the 

961 node attributes of the graph:: 

962 

963 >>> communities = {frozenset(G.nodes[v]["community"]) for v in G} 

964 

965 Notes 

966 ----- 

967 This algorithm differs slightly from the original way it was 

968 presented in [1]. 

969 

970 1) Rather than connecting the graph via a configuration model then 

971 rewiring to match the intra-community and inter-community 

972 degrees, we do this wiring explicitly at the end, which should be 

973 equivalent. 

974 2) The code posted on the author's website [2] calculates the random 

975 power law distributed variables and their average using 

976 continuous approximations, whereas we use the discrete 

977 distributions here as both degree and community size are 

978 discrete. 

979 

980 Though the authors describe the algorithm as quite robust, testing 

981 during development indicates that a somewhat narrower parameter set 

982 is likely to successfully produce a graph. Some suggestions have 

983 been provided in the event of exceptions. 

984 

985 References 

986 ---------- 

987 .. [1] "Benchmark graphs for testing community detection algorithms", 

988 Andrea Lancichinetti, Santo Fortunato, and Filippo Radicchi, 

989 Phys. Rev. E 78, 046110 2008 

990 .. [2] https://www.santofortunato.net/resources 

991 

992 """ 

993 # Perform some basic parameter validation. 

994 if not tau1 > 1: 

995 raise nx.NetworkXError("tau1 must be greater than one") 

996 if not tau2 > 1: 

997 raise nx.NetworkXError("tau2 must be greater than one") 

998 if not 0 <= mu <= 1: 

999 raise nx.NetworkXError("mu must be in the interval [0, 1]") 

1000 

1001 # Validate parameters for generating the degree sequence. 

1002 if max_degree is None: 

1003 max_degree = n 

1004 elif not 0 < max_degree <= n: 

1005 raise nx.NetworkXError("max_degree must be in the interval (0, n]") 

1006 if not ((min_degree is None) ^ (average_degree is None)): 

1007 raise nx.NetworkXError( 

1008 "Must assign exactly one of min_degree and average_degree" 

1009 ) 

1010 if min_degree is None: 

1011 min_degree = _generate_min_degree( 

1012 tau1, average_degree, max_degree, tol, max_iters 

1013 ) 

1014 

1015 # Generate a degree sequence with a power law distribution. 

1016 low, high = min_degree, max_degree 

1017 

1018 def condition(seq): 

1019 return sum(seq) % 2 == 0 

1020 

1021 def length(seq): 

1022 return len(seq) >= n 

1023 

1024 deg_seq = _powerlaw_sequence(tau1, low, high, condition, length, max_iters, seed) 

1025 

1026 # Validate parameters for generating the community size sequence. 

1027 if min_community is None: 

1028 min_community = min(deg_seq) 

1029 if max_community is None: 

1030 max_community = max(deg_seq) 

1031 

1032 # Generate a community size sequence with a power law distribution. 

1033 # 

1034 # TODO The original code incremented the number of iterations each 

1035 # time a new Zipf random value was drawn from the distribution. This 

1036 # differed from the way the number of iterations was incremented in 

1037 # `_powerlaw_degree_sequence`, so this code was changed to match 

1038 # that one. As a result, this code is allowed many more chances to 

1039 # generate a valid community size sequence. 

1040 low, high = min_community, max_community 

1041 

1042 def condition(seq): 

1043 return sum(seq) == n 

1044 

1045 def length(seq): 

1046 return sum(seq) >= n 

1047 

1048 comms = _powerlaw_sequence(tau2, low, high, condition, length, max_iters, seed) 

1049 

1050 # Generate the communities based on the given degree sequence and 

1051 # community sizes. 

1052 max_iters *= 10 * n 

1053 communities = _generate_communities(deg_seq, comms, mu, max_iters, seed) 

1054 

1055 # Finally, generate the benchmark graph based on the given 

1056 # communities, joining nodes according to the intra- and 

1057 # inter-community degrees. 

1058 G = nx.Graph() 

1059 G.add_nodes_from(range(n)) 

1060 for c in communities: 

1061 for u in c: 

1062 while G.degree(u) < round(deg_seq[u] * (1 - mu)): 

1063 v = seed.choice(list(c)) 

1064 G.add_edge(u, v) 

1065 while G.degree(u) < deg_seq[u]: 

1066 v = seed.choice(range(n)) 

1067 if v not in c: 

1068 G.add_edge(u, v) 

1069 G.nodes[u]["community"] = c 

1070 return G