Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/networkx/generators/community.py: 13%

267 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-10-20 07:00 +0000

1"""Generators for classes of graphs used in studying social networks.""" 

2import itertools 

3import math 

4 

5import networkx as nx 

6from networkx.utils import py_random_state 

7 

8__all__ = [ 

9 "caveman_graph", 

10 "connected_caveman_graph", 

11 "relaxed_caveman_graph", 

12 "random_partition_graph", 

13 "planted_partition_graph", 

14 "gaussian_random_partition_graph", 

15 "ring_of_cliques", 

16 "windmill_graph", 

17 "stochastic_block_model", 

18 "LFR_benchmark_graph", 

19] 

20 

21 

22@nx._dispatch(graphs=None) 

23def caveman_graph(l, k): 

24 """Returns a caveman graph of `l` cliques of size `k`. 

25 

26 Parameters 

27 ---------- 

28 l : int 

29 Number of cliques 

30 k : int 

31 Size of cliques 

32 

33 Returns 

34 ------- 

35 G : NetworkX Graph 

36 caveman graph 

37 

38 Notes 

39 ----- 

40 This returns an undirected graph, it can be converted to a directed 

41 graph using :func:`nx.to_directed`, or a multigraph using 

42 ``nx.MultiGraph(nx.caveman_graph(l, k))``. Only the undirected version is 

43 described in [1]_ and it is unclear which of the directed 

44 generalizations is most useful. 

45 

46 Examples 

47 -------- 

48 >>> G = nx.caveman_graph(3, 3) 

49 

50 See also 

51 -------- 

52 

53 connected_caveman_graph 

54 

55 References 

56 ---------- 

57 .. [1] Watts, D. J. 'Networks, Dynamics, and the Small-World Phenomenon.' 

58 Amer. J. Soc. 105, 493-527, 1999. 

59 """ 

60 # l disjoint cliques of size k 

61 G = nx.empty_graph(l * k) 

62 if k > 1: 

63 for start in range(0, l * k, k): 

64 edges = itertools.combinations(range(start, start + k), 2) 

65 G.add_edges_from(edges) 

66 return G 

67 

68 

69@nx._dispatch(graphs=None) 

70def connected_caveman_graph(l, k): 

71 """Returns a connected caveman graph of `l` cliques of size `k`. 

72 

73 The connected caveman graph is formed by creating `n` cliques of size 

74 `k`, then a single edge in each clique is rewired to a node in an 

75 adjacent clique. 

76 

77 Parameters 

78 ---------- 

79 l : int 

80 number of cliques 

81 k : int 

82 size of cliques (k at least 2 or NetworkXError is raised) 

83 

84 Returns 

85 ------- 

86 G : NetworkX Graph 

87 connected caveman graph 

88 

89 Raises 

90 ------ 

91 NetworkXError 

92 If the size of cliques `k` is smaller than 2. 

93 

94 Notes 

95 ----- 

96 This returns an undirected graph, it can be converted to a directed 

97 graph using :func:`nx.to_directed`, or a multigraph using 

98 ``nx.MultiGraph(nx.caveman_graph(l, k))``. Only the undirected version is 

99 described in [1]_ and it is unclear which of the directed 

100 generalizations is most useful. 

101 

102 Examples 

103 -------- 

104 >>> G = nx.connected_caveman_graph(3, 3) 

105 

106 References 

107 ---------- 

108 .. [1] Watts, D. J. 'Networks, Dynamics, and the Small-World Phenomenon.' 

109 Amer. J. Soc. 105, 493-527, 1999. 

110 """ 

111 if k < 2: 

112 raise nx.NetworkXError( 

113 "The size of cliques in a connected caveman graph " "must be at least 2." 

114 ) 

115 

116 G = nx.caveman_graph(l, k) 

117 for start in range(0, l * k, k): 

118 G.remove_edge(start, start + 1) 

119 G.add_edge(start, (start - 1) % (l * k)) 

120 return G 

121 

122 

123@py_random_state(3) 

124@nx._dispatch(graphs=None) 

125def relaxed_caveman_graph(l, k, p, seed=None): 

126 """Returns a relaxed caveman graph. 

127 

128 A relaxed caveman graph starts with `l` cliques of size `k`. Edges are 

129 then randomly rewired with probability `p` to link different cliques. 

130 

131 Parameters 

132 ---------- 

133 l : int 

134 Number of groups 

135 k : int 

136 Size of cliques 

137 p : float 

138 Probability of rewiring each edge. 

139 seed : integer, random_state, or None (default) 

140 Indicator of random number generation state. 

141 See :ref:`Randomness<randomness>`. 

142 

143 Returns 

144 ------- 

145 G : NetworkX Graph 

146 Relaxed Caveman Graph 

147 

148 Raises 

149 ------ 

150 NetworkXError 

151 If p is not in [0,1] 

152 

153 Examples 

154 -------- 

155 >>> G = nx.relaxed_caveman_graph(2, 3, 0.1, seed=42) 

156 

157 References 

158 ---------- 

159 .. [1] Santo Fortunato, Community Detection in Graphs, 

160 Physics Reports Volume 486, Issues 3-5, February 2010, Pages 75-174. 

161 https://arxiv.org/abs/0906.0612 

162 """ 

163 G = nx.caveman_graph(l, k) 

164 nodes = list(G) 

165 for u, v in G.edges(): 

166 if seed.random() < p: # rewire the edge 

167 x = seed.choice(nodes) 

168 if G.has_edge(u, x): 

169 continue 

170 G.remove_edge(u, v) 

171 G.add_edge(u, x) 

172 return G 

173 

174 

175@py_random_state(3) 

176@nx._dispatch(graphs=None) 

177def random_partition_graph(sizes, p_in, p_out, seed=None, directed=False): 

178 """Returns the random partition graph with a partition of sizes. 

179 

180 A partition graph is a graph of communities with sizes defined by 

181 s in sizes. Nodes in the same group are connected with probability 

182 p_in and nodes of different groups are connected with probability 

183 p_out. 

184 

185 Parameters 

186 ---------- 

187 sizes : list of ints 

188 Sizes of groups 

189 p_in : float 

190 probability of edges with in groups 

191 p_out : float 

192 probability of edges between groups 

193 directed : boolean optional, default=False 

194 Whether to create a directed graph 

195 seed : integer, random_state, or None (default) 

196 Indicator of random number generation state. 

197 See :ref:`Randomness<randomness>`. 

198 

199 Returns 

200 ------- 

201 G : NetworkX Graph or DiGraph 

202 random partition graph of size sum(gs) 

203 

204 Raises 

205 ------ 

206 NetworkXError 

207 If p_in or p_out is not in [0,1] 

208 

209 Examples 

210 -------- 

211 >>> G = nx.random_partition_graph([10, 10, 10], 0.25, 0.01) 

212 >>> len(G) 

213 30 

214 >>> partition = G.graph["partition"] 

215 >>> len(partition) 

216 3 

217 

218 Notes 

219 ----- 

220 This is a generalization of the planted-l-partition described in 

221 [1]_. It allows for the creation of groups of any size. 

222 

223 The partition is store as a graph attribute 'partition'. 

224 

225 References 

226 ---------- 

227 .. [1] Santo Fortunato 'Community Detection in Graphs' Physical Reports 

228 Volume 486, Issue 3-5 p. 75-174. https://arxiv.org/abs/0906.0612 

229 """ 

230 # Use geometric method for O(n+m) complexity algorithm 

231 # partition = nx.community_sets(nx.get_node_attributes(G, 'affiliation')) 

232 if not 0.0 <= p_in <= 1.0: 

233 raise nx.NetworkXError("p_in must be in [0,1]") 

234 if not 0.0 <= p_out <= 1.0: 

235 raise nx.NetworkXError("p_out must be in [0,1]") 

236 

237 # create connection matrix 

238 num_blocks = len(sizes) 

239 p = [[p_out for s in range(num_blocks)] for r in range(num_blocks)] 

240 for r in range(num_blocks): 

241 p[r][r] = p_in 

242 

243 return stochastic_block_model( 

244 sizes, 

245 p, 

246 nodelist=None, 

247 seed=seed, 

248 directed=directed, 

249 selfloops=False, 

250 sparse=True, 

251 ) 

252 

253 

254@py_random_state(4) 

255@nx._dispatch(graphs=None) 

256def planted_partition_graph(l, k, p_in, p_out, seed=None, directed=False): 

257 """Returns the planted l-partition graph. 

258 

259 This model partitions a graph with n=l*k vertices in 

260 l groups with k vertices each. Vertices of the same 

261 group are linked with a probability p_in, and vertices 

262 of different groups are linked with probability p_out. 

263 

264 Parameters 

265 ---------- 

266 l : int 

267 Number of groups 

268 k : int 

269 Number of vertices in each group 

270 p_in : float 

271 probability of connecting vertices within a group 

272 p_out : float 

273 probability of connected vertices between groups 

274 seed : integer, random_state, or None (default) 

275 Indicator of random number generation state. 

276 See :ref:`Randomness<randomness>`. 

277 directed : bool,optional (default=False) 

278 If True return a directed graph 

279 

280 Returns 

281 ------- 

282 G : NetworkX Graph or DiGraph 

283 planted l-partition graph 

284 

285 Raises 

286 ------ 

287 NetworkXError 

288 If p_in,p_out are not in [0,1] or 

289 

290 Examples 

291 -------- 

292 >>> G = nx.planted_partition_graph(4, 3, 0.5, 0.1, seed=42) 

293 

294 See Also 

295 -------- 

296 random_partition_model 

297 

298 References 

299 ---------- 

300 .. [1] A. Condon, R.M. Karp, Algorithms for graph partitioning 

301 on the planted partition model, 

302 Random Struct. Algor. 18 (2001) 116-140. 

303 

304 .. [2] Santo Fortunato 'Community Detection in Graphs' Physical Reports 

305 Volume 486, Issue 3-5 p. 75-174. https://arxiv.org/abs/0906.0612 

306 """ 

307 return random_partition_graph([k] * l, p_in, p_out, seed=seed, directed=directed) 

308 

309 

310@py_random_state(6) 

311@nx._dispatch(graphs=None) 

312def gaussian_random_partition_graph(n, s, v, p_in, p_out, directed=False, seed=None): 

313 """Generate a Gaussian random partition graph. 

314 

315 A Gaussian random partition graph is created by creating k partitions 

316 each with a size drawn from a normal distribution with mean s and variance 

317 s/v. Nodes are connected within clusters with probability p_in and 

318 between clusters with probability p_out[1] 

319 

320 Parameters 

321 ---------- 

322 n : int 

323 Number of nodes in the graph 

324 s : float 

325 Mean cluster size 

326 v : float 

327 Shape parameter. The variance of cluster size distribution is s/v. 

328 p_in : float 

329 Probability of intra cluster connection. 

330 p_out : float 

331 Probability of inter cluster connection. 

332 directed : boolean, optional default=False 

333 Whether to create a directed graph or not 

334 seed : integer, random_state, or None (default) 

335 Indicator of random number generation state. 

336 See :ref:`Randomness<randomness>`. 

337 

338 Returns 

339 ------- 

340 G : NetworkX Graph or DiGraph 

341 gaussian random partition graph 

342 

343 Raises 

344 ------ 

345 NetworkXError 

346 If s is > n 

347 If p_in or p_out is not in [0,1] 

348 

349 Notes 

350 ----- 

351 Note the number of partitions is dependent on s,v and n, and that the 

352 last partition may be considerably smaller, as it is sized to simply 

353 fill out the nodes [1] 

354 

355 See Also 

356 -------- 

357 random_partition_graph 

358 

359 Examples 

360 -------- 

361 >>> G = nx.gaussian_random_partition_graph(100, 10, 10, 0.25, 0.1) 

362 >>> len(G) 

363 100 

364 

365 References 

366 ---------- 

367 .. [1] Ulrik Brandes, Marco Gaertler, Dorothea Wagner, 

368 Experiments on Graph Clustering Algorithms, 

369 In the proceedings of the 11th Europ. Symp. Algorithms, 2003. 

370 """ 

371 if s > n: 

372 raise nx.NetworkXError("s must be <= n") 

373 assigned = 0 

374 sizes = [] 

375 while True: 

376 size = int(seed.gauss(s, s / v + 0.5)) 

377 if size < 1: # how to handle 0 or negative sizes? 

378 continue 

379 if assigned + size >= n: 

380 sizes.append(n - assigned) 

381 break 

382 assigned += size 

383 sizes.append(size) 

384 return random_partition_graph(sizes, p_in, p_out, seed=seed, directed=directed) 

385 

386 

387@nx._dispatch(graphs=None) 

388def ring_of_cliques(num_cliques, clique_size): 

389 """Defines a "ring of cliques" graph. 

390 

391 A ring of cliques graph is consisting of cliques, connected through single 

392 links. Each clique is a complete graph. 

393 

394 Parameters 

395 ---------- 

396 num_cliques : int 

397 Number of cliques 

398 clique_size : int 

399 Size of cliques 

400 

401 Returns 

402 ------- 

403 G : NetworkX Graph 

404 ring of cliques graph 

405 

406 Raises 

407 ------ 

408 NetworkXError 

409 If the number of cliques is lower than 2 or 

410 if the size of cliques is smaller than 2. 

411 

412 Examples 

413 -------- 

414 >>> G = nx.ring_of_cliques(8, 4) 

415 

416 See Also 

417 -------- 

418 connected_caveman_graph 

419 

420 Notes 

421 ----- 

422 The `connected_caveman_graph` graph removes a link from each clique to 

423 connect it with the next clique. Instead, the `ring_of_cliques` graph 

424 simply adds the link without removing any link from the cliques. 

425 """ 

426 if num_cliques < 2: 

427 raise nx.NetworkXError("A ring of cliques must have at least " "two cliques") 

428 if clique_size < 2: 

429 raise nx.NetworkXError("The cliques must have at least two nodes") 

430 

431 G = nx.Graph() 

432 for i in range(num_cliques): 

433 edges = itertools.combinations( 

434 range(i * clique_size, i * clique_size + clique_size), 2 

435 ) 

436 G.add_edges_from(edges) 

437 G.add_edge( 

438 i * clique_size + 1, (i + 1) * clique_size % (num_cliques * clique_size) 

439 ) 

440 return G 

441 

442 

443@nx._dispatch(graphs=None) 

444def windmill_graph(n, k): 

445 """Generate a windmill graph. 

446 A windmill graph is a graph of `n` cliques each of size `k` that are all 

447 joined at one node. 

448 It can be thought of as taking a disjoint union of `n` cliques of size `k`, 

449 selecting one point from each, and contracting all of the selected points. 

450 Alternatively, one could generate `n` cliques of size `k-1` and one node 

451 that is connected to all other nodes in the graph. 

452 

453 Parameters 

454 ---------- 

455 n : int 

456 Number of cliques 

457 k : int 

458 Size of cliques 

459 

460 Returns 

461 ------- 

462 G : NetworkX Graph 

463 windmill graph with n cliques of size k 

464 

465 Raises 

466 ------ 

467 NetworkXError 

468 If the number of cliques is less than two 

469 If the size of the cliques are less than two 

470 

471 Examples 

472 -------- 

473 >>> G = nx.windmill_graph(4, 5) 

474 

475 Notes 

476 ----- 

477 The node labeled `0` will be the node connected to all other nodes. 

478 Note that windmill graphs are usually denoted `Wd(k,n)`, so the parameters 

479 are in the opposite order as the parameters of this method. 

480 """ 

481 if n < 2: 

482 msg = "A windmill graph must have at least two cliques" 

483 raise nx.NetworkXError(msg) 

484 if k < 2: 

485 raise nx.NetworkXError("The cliques must have at least two nodes") 

486 

487 G = nx.disjoint_union_all( 

488 itertools.chain( 

489 [nx.complete_graph(k)], (nx.complete_graph(k - 1) for _ in range(n - 1)) 

490 ) 

491 ) 

492 G.add_edges_from((0, i) for i in range(k, G.number_of_nodes())) 

493 return G 

494 

495 

496@py_random_state(3) 

497@nx._dispatch(graphs=None) 

498def stochastic_block_model( 

499 sizes, p, nodelist=None, seed=None, directed=False, selfloops=False, sparse=True 

500): 

501 """Returns a stochastic block model graph. 

502 

503 This model partitions the nodes in blocks of arbitrary sizes, and places 

504 edges between pairs of nodes independently, with a probability that depends 

505 on the blocks. 

506 

507 Parameters 

508 ---------- 

509 sizes : list of ints 

510 Sizes of blocks 

511 p : list of list of floats 

512 Element (r,s) gives the density of edges going from the nodes 

513 of group r to nodes of group s. 

514 p must match the number of groups (len(sizes) == len(p)), 

515 and it must be symmetric if the graph is undirected. 

516 nodelist : list, optional 

517 The block tags are assigned according to the node identifiers 

518 in nodelist. If nodelist is None, then the ordering is the 

519 range [0,sum(sizes)-1]. 

520 seed : integer, random_state, or None (default) 

521 Indicator of random number generation state. 

522 See :ref:`Randomness<randomness>`. 

523 directed : boolean optional, default=False 

524 Whether to create a directed graph or not. 

525 selfloops : boolean optional, default=False 

526 Whether to include self-loops or not. 

527 sparse: boolean optional, default=True 

528 Use the sparse heuristic to speed up the generator. 

529 

530 Returns 

531 ------- 

532 g : NetworkX Graph or DiGraph 

533 Stochastic block model graph of size sum(sizes) 

534 

535 Raises 

536 ------ 

537 NetworkXError 

538 If probabilities are not in [0,1]. 

539 If the probability matrix is not square (directed case). 

540 If the probability matrix is not symmetric (undirected case). 

541 If the sizes list does not match nodelist or the probability matrix. 

542 If nodelist contains duplicate. 

543 

544 Examples 

545 -------- 

546 >>> sizes = [75, 75, 300] 

547 >>> probs = [[0.25, 0.05, 0.02], [0.05, 0.35, 0.07], [0.02, 0.07, 0.40]] 

548 >>> g = nx.stochastic_block_model(sizes, probs, seed=0) 

549 >>> len(g) 

550 450 

551 >>> H = nx.quotient_graph(g, g.graph["partition"], relabel=True) 

552 >>> for v in H.nodes(data=True): 

553 ... print(round(v[1]["density"], 3)) 

554 ... 

555 0.245 

556 0.348 

557 0.405 

558 >>> for v in H.edges(data=True): 

559 ... print(round(1.0 * v[2]["weight"] / (sizes[v[0]] * sizes[v[1]]), 3)) 

560 ... 

561 0.051 

562 0.022 

563 0.07 

564 

565 See Also 

566 -------- 

567 random_partition_graph 

568 planted_partition_graph 

569 gaussian_random_partition_graph 

570 gnp_random_graph 

571 

572 References 

573 ---------- 

574 .. [1] Holland, P. W., Laskey, K. B., & Leinhardt, S., 

575 "Stochastic blockmodels: First steps", 

576 Social networks, 5(2), 109-137, 1983. 

577 """ 

578 # Check if dimensions match 

579 if len(sizes) != len(p): 

580 raise nx.NetworkXException("'sizes' and 'p' do not match.") 

581 # Check for probability symmetry (undirected) and shape (directed) 

582 for row in p: 

583 if len(p) != len(row): 

584 raise nx.NetworkXException("'p' must be a square matrix.") 

585 if not directed: 

586 p_transpose = [list(i) for i in zip(*p)] 

587 for i in zip(p, p_transpose): 

588 for j in zip(i[0], i[1]): 

589 if abs(j[0] - j[1]) > 1e-08: 

590 raise nx.NetworkXException("'p' must be symmetric.") 

591 # Check for probability range 

592 for row in p: 

593 for prob in row: 

594 if prob < 0 or prob > 1: 

595 raise nx.NetworkXException("Entries of 'p' not in [0,1].") 

596 # Check for nodelist consistency 

597 if nodelist is not None: 

598 if len(nodelist) != sum(sizes): 

599 raise nx.NetworkXException("'nodelist' and 'sizes' do not match.") 

600 if len(nodelist) != len(set(nodelist)): 

601 raise nx.NetworkXException("nodelist contains duplicate.") 

602 else: 

603 nodelist = range(sum(sizes)) 

604 

605 # Setup the graph conditionally to the directed switch. 

606 block_range = range(len(sizes)) 

607 if directed: 

608 g = nx.DiGraph() 

609 block_iter = itertools.product(block_range, block_range) 

610 else: 

611 g = nx.Graph() 

612 block_iter = itertools.combinations_with_replacement(block_range, 2) 

613 # Split nodelist in a partition (list of sets). 

614 size_cumsum = [sum(sizes[0:x]) for x in range(len(sizes) + 1)] 

615 g.graph["partition"] = [ 

616 set(nodelist[size_cumsum[x] : size_cumsum[x + 1]]) 

617 for x in range(len(size_cumsum) - 1) 

618 ] 

619 # Setup nodes and graph name 

620 for block_id, nodes in enumerate(g.graph["partition"]): 

621 for node in nodes: 

622 g.add_node(node, block=block_id) 

623 

624 g.name = "stochastic_block_model" 

625 

626 # Test for edge existence 

627 parts = g.graph["partition"] 

628 for i, j in block_iter: 

629 if i == j: 

630 if directed: 

631 if selfloops: 

632 edges = itertools.product(parts[i], parts[i]) 

633 else: 

634 edges = itertools.permutations(parts[i], 2) 

635 else: 

636 edges = itertools.combinations(parts[i], 2) 

637 if selfloops: 

638 edges = itertools.chain(edges, zip(parts[i], parts[i])) 

639 for e in edges: 

640 if seed.random() < p[i][j]: 

641 g.add_edge(*e) 

642 else: 

643 edges = itertools.product(parts[i], parts[j]) 

644 if sparse: 

645 if p[i][j] == 1: # Test edges cases p_ij = 0 or 1 

646 for e in edges: 

647 g.add_edge(*e) 

648 elif p[i][j] > 0: 

649 while True: 

650 try: 

651 logrand = math.log(seed.random()) 

652 skip = math.floor(logrand / math.log(1 - p[i][j])) 

653 # consume "skip" edges 

654 next(itertools.islice(edges, skip, skip), None) 

655 e = next(edges) 

656 g.add_edge(*e) # __safe 

657 except StopIteration: 

658 break 

659 else: 

660 for e in edges: 

661 if seed.random() < p[i][j]: 

662 g.add_edge(*e) # __safe 

663 return g 

664 

665 

666def _zipf_rv_below(gamma, xmin, threshold, seed): 

667 """Returns a random value chosen from the bounded Zipf distribution. 

668 

669 Repeatedly draws values from the Zipf distribution until the 

670 threshold is met, then returns that value. 

671 """ 

672 result = nx.utils.zipf_rv(gamma, xmin, seed) 

673 while result > threshold: 

674 result = nx.utils.zipf_rv(gamma, xmin, seed) 

675 return result 

676 

677 

678def _powerlaw_sequence(gamma, low, high, condition, length, max_iters, seed): 

679 """Returns a list of numbers obeying a constrained power law distribution. 

680 

681 ``gamma`` and ``low`` are the parameters for the Zipf distribution. 

682 

683 ``high`` is the maximum allowed value for values draw from the Zipf 

684 distribution. For more information, see :func:`_zipf_rv_below`. 

685 

686 ``condition`` and ``length`` are Boolean-valued functions on 

687 lists. While generating the list, random values are drawn and 

688 appended to the list until ``length`` is satisfied by the created 

689 list. Once ``condition`` is satisfied, the sequence generated in 

690 this way is returned. 

691 

692 ``max_iters`` indicates the number of times to generate a list 

693 satisfying ``length``. If the number of iterations exceeds this 

694 value, :exc:`~networkx.exception.ExceededMaxIterations` is raised. 

695 

696 seed : integer, random_state, or None (default) 

697 Indicator of random number generation state. 

698 See :ref:`Randomness<randomness>`. 

699 """ 

700 for i in range(max_iters): 

701 seq = [] 

702 while not length(seq): 

703 seq.append(_zipf_rv_below(gamma, low, high, seed)) 

704 if condition(seq): 

705 return seq 

706 raise nx.ExceededMaxIterations("Could not create power law sequence") 

707 

708 

709def _hurwitz_zeta(x, q, tolerance): 

710 """The Hurwitz zeta function, or the Riemann zeta function of two arguments. 

711 

712 ``x`` must be greater than one and ``q`` must be positive. 

713 

714 This function repeatedly computes subsequent partial sums until 

715 convergence, as decided by ``tolerance``. 

716 """ 

717 z = 0 

718 z_prev = -float("inf") 

719 k = 0 

720 while abs(z - z_prev) > tolerance: 

721 z_prev = z 

722 z += 1 / ((k + q) ** x) 

723 k += 1 

724 return z 

725 

726 

727def _generate_min_degree(gamma, average_degree, max_degree, tolerance, max_iters): 

728 """Returns a minimum degree from the given average degree.""" 

729 # Defines zeta function whether or not Scipy is available 

730 try: 

731 from scipy.special import zeta 

732 except ImportError: 

733 

734 def zeta(x, q): 

735 return _hurwitz_zeta(x, q, tolerance) 

736 

737 min_deg_top = max_degree 

738 min_deg_bot = 1 

739 min_deg_mid = (min_deg_top - min_deg_bot) / 2 + min_deg_bot 

740 itrs = 0 

741 mid_avg_deg = 0 

742 while abs(mid_avg_deg - average_degree) > tolerance: 

743 if itrs > max_iters: 

744 raise nx.ExceededMaxIterations("Could not match average_degree") 

745 mid_avg_deg = 0 

746 for x in range(int(min_deg_mid), max_degree + 1): 

747 mid_avg_deg += (x ** (-gamma + 1)) / zeta(gamma, min_deg_mid) 

748 if mid_avg_deg > average_degree: 

749 min_deg_top = min_deg_mid 

750 min_deg_mid = (min_deg_top - min_deg_bot) / 2 + min_deg_bot 

751 else: 

752 min_deg_bot = min_deg_mid 

753 min_deg_mid = (min_deg_top - min_deg_bot) / 2 + min_deg_bot 

754 itrs += 1 

755 # return int(min_deg_mid + 0.5) 

756 return round(min_deg_mid) 

757 

758 

759def _generate_communities(degree_seq, community_sizes, mu, max_iters, seed): 

760 """Returns a list of sets, each of which represents a community. 

761 

762 ``degree_seq`` is the degree sequence that must be met by the 

763 graph. 

764 

765 ``community_sizes`` is the community size distribution that must be 

766 met by the generated list of sets. 

767 

768 ``mu`` is a float in the interval [0, 1] indicating the fraction of 

769 intra-community edges incident to each node. 

770 

771 ``max_iters`` is the number of times to try to add a node to a 

772 community. This must be greater than the length of 

773 ``degree_seq``, otherwise this function will always fail. If 

774 the number of iterations exceeds this value, 

775 :exc:`~networkx.exception.ExceededMaxIterations` is raised. 

776 

777 seed : integer, random_state, or None (default) 

778 Indicator of random number generation state. 

779 See :ref:`Randomness<randomness>`. 

780 

781 The communities returned by this are sets of integers in the set {0, 

782 ..., *n* - 1}, where *n* is the length of ``degree_seq``. 

783 

784 """ 

785 # This assumes the nodes in the graph will be natural numbers. 

786 result = [set() for _ in community_sizes] 

787 n = len(degree_seq) 

788 free = list(range(n)) 

789 for i in range(max_iters): 

790 v = free.pop() 

791 c = seed.choice(range(len(community_sizes))) 

792 # s = int(degree_seq[v] * (1 - mu) + 0.5) 

793 s = round(degree_seq[v] * (1 - mu)) 

794 # If the community is large enough, add the node to the chosen 

795 # community. Otherwise, return it to the list of unaffiliated 

796 # nodes. 

797 if s < community_sizes[c]: 

798 result[c].add(v) 

799 else: 

800 free.append(v) 

801 # If the community is too big, remove a node from it. 

802 if len(result[c]) > community_sizes[c]: 

803 free.append(result[c].pop()) 

804 if not free: 

805 return result 

806 msg = "Could not assign communities; try increasing min_community" 

807 raise nx.ExceededMaxIterations(msg) 

808 

809 

810@py_random_state(11) 

811@nx._dispatch(graphs=None) 

812def LFR_benchmark_graph( 

813 n, 

814 tau1, 

815 tau2, 

816 mu, 

817 average_degree=None, 

818 min_degree=None, 

819 max_degree=None, 

820 min_community=None, 

821 max_community=None, 

822 tol=1.0e-7, 

823 max_iters=500, 

824 seed=None, 

825): 

826 r"""Returns the LFR benchmark graph. 

827 

828 This algorithm proceeds as follows: 

829 

830 1) Find a degree sequence with a power law distribution, and minimum 

831 value ``min_degree``, which has approximate average degree 

832 ``average_degree``. This is accomplished by either 

833 

834 a) specifying ``min_degree`` and not ``average_degree``, 

835 b) specifying ``average_degree`` and not ``min_degree``, in which 

836 case a suitable minimum degree will be found. 

837 

838 ``max_degree`` can also be specified, otherwise it will be set to 

839 ``n``. Each node *u* will have $\mu \mathrm{deg}(u)$ edges 

840 joining it to nodes in communities other than its own and $(1 - 

841 \mu) \mathrm{deg}(u)$ edges joining it to nodes in its own 

842 community. 

843 2) Generate community sizes according to a power law distribution 

844 with exponent ``tau2``. If ``min_community`` and 

845 ``max_community`` are not specified they will be selected to be 

846 ``min_degree`` and ``max_degree``, respectively. Community sizes 

847 are generated until the sum of their sizes equals ``n``. 

848 3) Each node will be randomly assigned a community with the 

849 condition that the community is large enough for the node's 

850 intra-community degree, $(1 - \mu) \mathrm{deg}(u)$ as 

851 described in step 2. If a community grows too large, a random node 

852 will be selected for reassignment to a new community, until all 

853 nodes have been assigned a community. 

854 4) Each node *u* then adds $(1 - \mu) \mathrm{deg}(u)$ 

855 intra-community edges and $\mu \mathrm{deg}(u)$ inter-community 

856 edges. 

857 

858 Parameters 

859 ---------- 

860 n : int 

861 Number of nodes in the created graph. 

862 

863 tau1 : float 

864 Power law exponent for the degree distribution of the created 

865 graph. This value must be strictly greater than one. 

866 

867 tau2 : float 

868 Power law exponent for the community size distribution in the 

869 created graph. This value must be strictly greater than one. 

870 

871 mu : float 

872 Fraction of inter-community edges incident to each node. This 

873 value must be in the interval [0, 1]. 

874 

875 average_degree : float 

876 Desired average degree of nodes in the created graph. This value 

877 must be in the interval [0, *n*]. Exactly one of this and 

878 ``min_degree`` must be specified, otherwise a 

879 :exc:`NetworkXError` is raised. 

880 

881 min_degree : int 

882 Minimum degree of nodes in the created graph. This value must be 

883 in the interval [0, *n*]. Exactly one of this and 

884 ``average_degree`` must be specified, otherwise a 

885 :exc:`NetworkXError` is raised. 

886 

887 max_degree : int 

888 Maximum degree of nodes in the created graph. If not specified, 

889 this is set to ``n``, the total number of nodes in the graph. 

890 

891 min_community : int 

892 Minimum size of communities in the graph. If not specified, this 

893 is set to ``min_degree``. 

894 

895 max_community : int 

896 Maximum size of communities in the graph. If not specified, this 

897 is set to ``n``, the total number of nodes in the graph. 

898 

899 tol : float 

900 Tolerance when comparing floats, specifically when comparing 

901 average degree values. 

902 

903 max_iters : int 

904 Maximum number of iterations to try to create the community sizes, 

905 degree distribution, and community affiliations. 

906 

907 seed : integer, random_state, or None (default) 

908 Indicator of random number generation state. 

909 See :ref:`Randomness<randomness>`. 

910 

911 Returns 

912 ------- 

913 G : NetworkX graph 

914 The LFR benchmark graph generated according to the specified 

915 parameters. 

916 

917 Each node in the graph has a node attribute ``'community'`` that 

918 stores the community (that is, the set of nodes) that includes 

919 it. 

920 

921 Raises 

922 ------ 

923 NetworkXError 

924 If any of the parameters do not meet their upper and lower bounds: 

925 

926 - ``tau1`` and ``tau2`` must be strictly greater than 1. 

927 - ``mu`` must be in [0, 1]. 

928 - ``max_degree`` must be in {1, ..., *n*}. 

929 - ``min_community`` and ``max_community`` must be in {0, ..., 

930 *n*}. 

931 

932 If not exactly one of ``average_degree`` and ``min_degree`` is 

933 specified. 

934 

935 If ``min_degree`` is not specified and a suitable ``min_degree`` 

936 cannot be found. 

937 

938 ExceededMaxIterations 

939 If a valid degree sequence cannot be created within 

940 ``max_iters`` number of iterations. 

941 

942 If a valid set of community sizes cannot be created within 

943 ``max_iters`` number of iterations. 

944 

945 If a valid community assignment cannot be created within ``10 * 

946 n * max_iters`` number of iterations. 

947 

948 Examples 

949 -------- 

950 Basic usage:: 

951 

952 >>> from networkx.generators.community import LFR_benchmark_graph 

953 >>> n = 250 

954 >>> tau1 = 3 

955 >>> tau2 = 1.5 

956 >>> mu = 0.1 

957 >>> G = LFR_benchmark_graph( 

958 ... n, tau1, tau2, mu, average_degree=5, min_community=20, seed=10 

959 ... ) 

960 

961 Continuing the example above, you can get the communities from the 

962 node attributes of the graph:: 

963 

964 >>> communities = {frozenset(G.nodes[v]["community"]) for v in G} 

965 

966 Notes 

967 ----- 

968 This algorithm differs slightly from the original way it was 

969 presented in [1]. 

970 

971 1) Rather than connecting the graph via a configuration model then 

972 rewiring to match the intra-community and inter-community 

973 degrees, we do this wiring explicitly at the end, which should be 

974 equivalent. 

975 2) The code posted on the author's website [2] calculates the random 

976 power law distributed variables and their average using 

977 continuous approximations, whereas we use the discrete 

978 distributions here as both degree and community size are 

979 discrete. 

980 

981 Though the authors describe the algorithm as quite robust, testing 

982 during development indicates that a somewhat narrower parameter set 

983 is likely to successfully produce a graph. Some suggestions have 

984 been provided in the event of exceptions. 

985 

986 References 

987 ---------- 

988 .. [1] "Benchmark graphs for testing community detection algorithms", 

989 Andrea Lancichinetti, Santo Fortunato, and Filippo Radicchi, 

990 Phys. Rev. E 78, 046110 2008 

991 .. [2] https://www.santofortunato.net/resources 

992 

993 """ 

994 # Perform some basic parameter validation. 

995 if not tau1 > 1: 

996 raise nx.NetworkXError("tau1 must be greater than one") 

997 if not tau2 > 1: 

998 raise nx.NetworkXError("tau2 must be greater than one") 

999 if not 0 <= mu <= 1: 

1000 raise nx.NetworkXError("mu must be in the interval [0, 1]") 

1001 

1002 # Validate parameters for generating the degree sequence. 

1003 if max_degree is None: 

1004 max_degree = n 

1005 elif not 0 < max_degree <= n: 

1006 raise nx.NetworkXError("max_degree must be in the interval (0, n]") 

1007 if not ((min_degree is None) ^ (average_degree is None)): 

1008 raise nx.NetworkXError( 

1009 "Must assign exactly one of min_degree and" " average_degree" 

1010 ) 

1011 if min_degree is None: 

1012 min_degree = _generate_min_degree( 

1013 tau1, average_degree, max_degree, tol, max_iters 

1014 ) 

1015 

1016 # Generate a degree sequence with a power law distribution. 

1017 low, high = min_degree, max_degree 

1018 

1019 def condition(seq): 

1020 return sum(seq) % 2 == 0 

1021 

1022 def length(seq): 

1023 return len(seq) >= n 

1024 

1025 deg_seq = _powerlaw_sequence(tau1, low, high, condition, length, max_iters, seed) 

1026 

1027 # Validate parameters for generating the community size sequence. 

1028 if min_community is None: 

1029 min_community = min(deg_seq) 

1030 if max_community is None: 

1031 max_community = max(deg_seq) 

1032 

1033 # Generate a community size sequence with a power law distribution. 

1034 # 

1035 # TODO The original code incremented the number of iterations each 

1036 # time a new Zipf random value was drawn from the distribution. This 

1037 # differed from the way the number of iterations was incremented in 

1038 # `_powerlaw_degree_sequence`, so this code was changed to match 

1039 # that one. As a result, this code is allowed many more chances to 

1040 # generate a valid community size sequence. 

1041 low, high = min_community, max_community 

1042 

1043 def condition(seq): 

1044 return sum(seq) == n 

1045 

1046 def length(seq): 

1047 return sum(seq) >= n 

1048 

1049 comms = _powerlaw_sequence(tau2, low, high, condition, length, max_iters, seed) 

1050 

1051 # Generate the communities based on the given degree sequence and 

1052 # community sizes. 

1053 max_iters *= 10 * n 

1054 communities = _generate_communities(deg_seq, comms, mu, max_iters, seed) 

1055 

1056 # Finally, generate the benchmark graph based on the given 

1057 # communities, joining nodes according to the intra- and 

1058 # inter-community degrees. 

1059 G = nx.Graph() 

1060 G.add_nodes_from(range(n)) 

1061 for c in communities: 

1062 for u in c: 

1063 while G.degree(u) < round(deg_seq[u] * (1 - mu)): 

1064 v = seed.choice(list(c)) 

1065 G.add_edge(u, v) 

1066 while G.degree(u) < deg_seq[u]: 

1067 v = seed.choice(range(n)) 

1068 if v not in c: 

1069 G.add_edge(u, v) 

1070 G.nodes[u]["community"] = c 

1071 return G