1"""
2Utilities for generating random numbers, random sequences, and
3random selections.
4"""
5
6import networkx as nx
7from networkx.utils import py_random_state
8
9__all__ = [
10 "powerlaw_sequence",
11 "is_valid_tree_degree_sequence",
12 "zipf_rv",
13 "cumulative_distribution",
14 "discrete_sequence",
15 "random_weighted_sample",
16 "weighted_choice",
17]
18
19
20# The same helpers for choosing random sequences from distributions
21# uses Python's random module
22# https://docs.python.org/3/library/random.html
23
24
25@py_random_state(2)
26def powerlaw_sequence(n, exponent=2.0, seed=None):
27 """
28 Return sample sequence of length n from a power law distribution.
29 """
30 return [seed.paretovariate(exponent - 1) for i in range(n)]
31
32
33def is_valid_tree_degree_sequence(degree_sequence):
34 """Check if a degree sequence is valid for a tree.
35
36 Two conditions must be met for a degree sequence to be valid for a tree:
37
38 1. The number of nodes must be one more than the number of edges.
39 2. The degree sequence must be trivial or have only strictly positive
40 node degrees.
41
42 Parameters
43 ----------
44 degree_sequence : iterable
45 Iterable of node degrees.
46
47 Returns
48 -------
49 bool
50 Whether the degree sequence is valid for a tree.
51 str
52 Reason for invalidity, or dummy string if valid.
53 """
54 seq = list(degree_sequence)
55 number_of_nodes = len(seq)
56 twice_number_of_edges = sum(seq)
57
58 if 2 * number_of_nodes - twice_number_of_edges != 2:
59 return False, "tree must have one more node than number of edges"
60 elif seq != [0] and any(d <= 0 for d in seq):
61 return False, "nontrivial tree must have strictly positive node degrees"
62 return True, ""
63
64
65@py_random_state(2)
66def zipf_rv(alpha, xmin=1, seed=None):
67 r"""Returns a random value chosen from the Zipf distribution.
68
69 The return value is an integer drawn from the probability distribution
70
71 .. math::
72
73 p(x)=\frac{x^{-\alpha}}{\zeta(\alpha, x_{\min})},
74
75 where $\zeta(\alpha, x_{\min})$ is the Hurwitz zeta function.
76
77 Parameters
78 ----------
79 alpha : float
80 Exponent value of the distribution
81 xmin : int
82 Minimum value
83 seed : integer, random_state, or None (default)
84 Indicator of random number generation state.
85 See :ref:`Randomness<randomness>`.
86
87 Returns
88 -------
89 x : int
90 Random value from Zipf distribution
91
92 Raises
93 ------
94 ValueError:
95 If xmin < 1 or
96 If alpha <= 1
97
98 Notes
99 -----
100 The rejection algorithm generates random values for a the power-law
101 distribution in uniformly bounded expected time dependent on
102 parameters. See [1]_ for details on its operation.
103
104 Examples
105 --------
106 >>> nx.utils.zipf_rv(alpha=2, xmin=3, seed=42)
107 8
108
109 References
110 ----------
111 .. [1] Luc Devroye, Non-Uniform Random Variate Generation,
112 Springer-Verlag, New York, 1986.
113 """
114 if xmin < 1:
115 raise ValueError("xmin < 1")
116 if alpha <= 1:
117 raise ValueError("a <= 1.0")
118 a1 = alpha - 1.0
119 b = 2**a1
120 while True:
121 u = 1.0 - seed.random() # u in (0,1]
122 v = seed.random() # v in [0,1)
123 x = int(xmin * u ** -(1.0 / a1))
124 t = (1.0 + (1.0 / x)) ** a1
125 if v * x * (t - 1.0) / (b - 1.0) <= t / b:
126 break
127 return x
128
129
130def cumulative_distribution(distribution):
131 """Returns normalized cumulative distribution from discrete distribution."""
132
133 cdf = [0.0]
134 cumulative = 0.0
135 for element in distribution:
136 cumulative += element
137 cdf.append(cumulative)
138 return [element / cumulative for element in cdf]
139
140
141@py_random_state(3)
142def discrete_sequence(n, distribution=None, cdistribution=None, seed=None):
143 """
144 Return sample sequence of length n from a given discrete distribution
145 or discrete cumulative distribution.
146
147 One of the following must be specified.
148
149 distribution = histogram of values, will be normalized
150
151 cdistribution = normalized discrete cumulative distribution
152
153 """
154 import bisect
155
156 if cdistribution is not None:
157 cdf = cdistribution
158 elif distribution is not None:
159 cdf = cumulative_distribution(distribution)
160 else:
161 raise nx.NetworkXError(
162 "discrete_sequence: distribution or cdistribution missing"
163 )
164
165 # get a uniform random number
166 inputseq = [seed.random() for i in range(n)]
167
168 # choose from CDF
169 seq = [bisect.bisect_left(cdf, s) - 1 for s in inputseq]
170 return seq
171
172
173@py_random_state(2)
174def random_weighted_sample(mapping, k, seed=None):
175 """Returns k items without replacement from a weighted sample.
176
177 The input is a dictionary of items with weights as values.
178 """
179 if k > len(mapping):
180 raise ValueError("sample larger than population")
181 sample = set()
182 while len(sample) < k:
183 sample.add(weighted_choice(mapping, seed))
184 return list(sample)
185
186
187@py_random_state(1)
188def weighted_choice(mapping, seed=None):
189 """Returns a single element from a weighted sample.
190
191 The input is a dictionary of items with weights as values.
192 """
193 # use roulette method
194 rnd = seed.random() * sum(mapping.values())
195 for k, w in mapping.items():
196 rnd -= w
197 if rnd < 0:
198 return k