1"""Functions for generating graphs based on the "duplication" method.
2
3These graph generators start with a small initial graph then duplicate
4nodes and (partially) duplicate their edges. These functions are
5generally inspired by biological networks.
6
7"""
8
9import networkx as nx
10from networkx.exception import NetworkXError
11from networkx.utils import py_random_state
12from networkx.utils.misc import check_create_using
13
14__all__ = ["partial_duplication_graph", "duplication_divergence_graph"]
15
16
17@py_random_state(4)
18@nx._dispatchable(graphs=None, returns_graph=True)
19def partial_duplication_graph(N, n, p, q, seed=None, *, create_using=None):
20 """Returns a random graph using the partial duplication model.
21
22 Parameters
23 ----------
24 N : int
25 The total number of nodes in the final graph.
26
27 n : int
28 The number of nodes in the initial clique.
29
30 p : float
31 The probability of joining each neighbor of a node to the
32 duplicate node. Must be a number in the between zero and one,
33 inclusive.
34
35 q : float
36 The probability of joining the source node to the duplicate
37 node. Must be a number in the between zero and one, inclusive.
38
39 seed : integer, random_state, or None (default)
40 Indicator of random number generation state.
41 See :ref:`Randomness<randomness>`.
42
43 create_using : Graph constructor, optional (default=nx.Graph)
44 Graph type to create. If graph instance, then cleared before populated.
45 Multigraph and directed types are not supported and raise a ``NetworkXError``.
46
47 Notes
48 -----
49 A graph of nodes is grown by creating a fully connected graph
50 of size `n`. The following procedure is then repeated until
51 a total of `N` nodes have been reached.
52
53 1. A random node, *u*, is picked and a new node, *v*, is created.
54 2. For each neighbor of *u* an edge from the neighbor to *v* is created
55 with probability `p`.
56 3. An edge from *u* to *v* is created with probability `q`.
57
58 This algorithm appears in [1].
59
60 This implementation allows the possibility of generating
61 disconnected graphs.
62
63 References
64 ----------
65 .. [1] Knudsen Michael, and Carsten Wiuf. "A Markov chain approach to
66 randomly grown graphs." Journal of Applied Mathematics 2008.
67 <https://doi.org/10.1155/2008/190836>
68
69 """
70 create_using = check_create_using(create_using, directed=False, multigraph=False)
71 if p < 0 or p > 1 or q < 0 or q > 1:
72 msg = "partial duplication graph must have 0 <= p, q <= 1."
73 raise NetworkXError(msg)
74 if n > N:
75 raise NetworkXError("partial duplication graph must have n <= N.")
76
77 G = nx.complete_graph(n, create_using)
78 for new_node in range(n, N):
79 # Pick a random vertex, u, already in the graph.
80 src_node = seed.randint(0, new_node - 1)
81
82 # Add a new vertex, v, to the graph.
83 G.add_node(new_node)
84
85 # For each neighbor of u...
86 for nbr_node in list(nx.all_neighbors(G, src_node)):
87 # Add the neighbor to v with probability p.
88 if seed.random() < p:
89 G.add_edge(new_node, nbr_node)
90
91 # Join v and u with probability q.
92 if seed.random() < q:
93 G.add_edge(new_node, src_node)
94 return G
95
96
97@py_random_state(2)
98@nx._dispatchable(graphs=None, returns_graph=True)
99def duplication_divergence_graph(n, p, seed=None, *, create_using=None):
100 """Returns an undirected graph using the duplication-divergence model.
101
102 A graph of `n` nodes is created by duplicating the initial nodes
103 and retaining edges incident to the original nodes with a retention
104 probability `p`.
105
106 Parameters
107 ----------
108 n : int
109 The desired number of nodes in the graph.
110 p : float
111 The probability for retaining the edge of the replicated node.
112 seed : integer, random_state, or None (default)
113 Indicator of random number generation state.
114 See :ref:`Randomness<randomness>`.
115 create_using : Graph constructor, optional (default=nx.Graph)
116 Graph type to create. If graph instance, then cleared before populated.
117 Multigraph and directed types are not supported and raise a ``NetworkXError``.
118
119 Returns
120 -------
121 G : Graph
122
123 Raises
124 ------
125 NetworkXError
126 If `p` is not a valid probability.
127 If `n` is less than 2.
128
129 Notes
130 -----
131 This algorithm appears in [1].
132
133 This implementation disallows the possibility of generating
134 disconnected graphs.
135
136 References
137 ----------
138 .. [1] I. Ispolatov, P. L. Krapivsky, A. Yuryev,
139 "Duplication-divergence model of protein interaction network",
140 Phys. Rev. E, 71, 061911, 2005.
141
142 """
143 if p > 1 or p < 0:
144 msg = f"NetworkXError p={p} is not in [0,1]."
145 raise nx.NetworkXError(msg)
146 if n < 2:
147 msg = "n must be greater than or equal to 2"
148 raise nx.NetworkXError(msg)
149
150 create_using = check_create_using(create_using, directed=False, multigraph=False)
151 G = nx.empty_graph(create_using=create_using)
152
153 # Initialize the graph with two connected nodes.
154 G.add_edge(0, 1)
155 i = 2
156 while i < n:
157 # Choose a random node from current graph to duplicate.
158 random_node = seed.choice(list(G))
159 # Make the replica.
160 G.add_node(i)
161 # flag indicates whether at least one edge is connected on the replica.
162 flag = False
163 for nbr in G.neighbors(random_node):
164 if seed.random() < p:
165 # Link retention step.
166 G.add_edge(i, nbr)
167 flag = True
168 if not flag:
169 # Delete replica if no edges retained.
170 G.remove_node(i)
171 else:
172 # Successful duplication.
173 i += 1
174 return G