1"""
2Utilities for generating random numbers, random sequences, and
3random selections.
4"""
5
6import networkx as nx
7from networkx.utils import py_random_state
8
9__all__ = [
10 "powerlaw_sequence",
11 "is_valid_tree_degree_sequence",
12 "zipf_rv",
13 "cumulative_distribution",
14 "discrete_sequence",
15 "random_weighted_sample",
16 "weighted_choice",
17]
18
19
20# The same helpers for choosing random sequences from distributions
21# uses Python's random module
22# https://docs.python.org/3/library/random.html
23
24
25@py_random_state(2)
26def powerlaw_sequence(n, exponent=2.0, seed=None):
27 """
28 Return sample sequence of length n from a power law distribution.
29 """
30 return [seed.paretovariate(exponent - 1) for i in range(n)]
31
32
33def is_valid_tree_degree_sequence(degree_sequence):
34 """Check if a degree sequence is valid for a tree.
35
36 Two conditions must be met for a degree sequence to be valid for a tree:
37
38 1. The number of nodes must be one more than the number of edges.
39 2. The degree sequence must be trivial or have only strictly positive
40 node degrees.
41
42 Parameters
43 ----------
44 degree_sequence : iterable
45 Iterable of node degrees.
46
47 Returns
48 -------
49 bool
50 Whether the degree sequence is valid for a tree.
51 str
52 Reason for invalidity, or dummy string if valid.
53 """
54 seq = list(degree_sequence)
55 number_of_nodes = len(seq)
56 twice_number_of_edges = sum(seq)
57
58 if 2 * number_of_nodes - twice_number_of_edges != 2:
59 return False, "tree must have one more node than number of edges"
60 elif seq != [0] and any(d <= 0 for d in seq):
61 return False, "nontrivial tree must have strictly positive node degrees"
62 return True, ""
63
64
65@py_random_state(2)
66def zipf_rv(alpha, xmin=1, seed=None):
67 r"""Returns a random value chosen from the Zipf distribution.
68
69 The return value is an integer drawn from the probability distribution
70
71 .. math::
72
73 p(x)=\frac{x^{-\alpha}}{\zeta(\alpha, x_{\min})},
74
75 where $\zeta(\alpha, x_{\min})$ is the Hurwitz zeta function.
76
77 Parameters
78 ----------
79 alpha : float
80 Exponent value of the distribution
81 xmin : int
82 Minimum value
83 seed : integer, random_state, or None (default)
84 Indicator of random number generation state.
85 See :ref:`Randomness<randomness>`.
86
87 Returns
88 -------
89 x : int
90 Random value from Zipf distribution
91
92 Raises
93 ------
94 ValueError:
95 If xmin < 1 or
96 If alpha <= 1
97
98 Notes
99 -----
100 The rejection algorithm generates random values for a the power-law
101 distribution in uniformly bounded expected time dependent on
102 parameters. See [1]_ for details on its operation.
103
104 Examples
105 --------
106 >>> nx.utils.zipf_rv(alpha=2, xmin=3, seed=42)
107 8
108
109 References
110 ----------
111 .. [1] Luc Devroye, Non-Uniform Random Variate Generation,
112 Springer-Verlag, New York, 1986.
113 """
114 if xmin < 1:
115 raise ValueError("xmin < 1")
116 if alpha <= 1:
117 raise ValueError("a <= 1.0")
118 a1 = alpha - 1.0
119 b = 2**a1
120 while True:
121 u = 1.0 - seed.random() # u in (0,1]
122 v = seed.random() # v in [0,1)
123 x = int(xmin * u ** -(1.0 / a1))
124 t = (1.0 + (1.0 / x)) ** a1
125 if v * x * (t - 1.0) / (b - 1.0) <= t / b:
126 break
127 return x
128
129
130def cumulative_distribution(distribution):
131 """Returns normalized cumulative distribution from discrete distribution."""
132
133 cdf = [0.0]
134 psum = sum(distribution)
135 for i in range(len(distribution)):
136 cdf.append(cdf[i] + distribution[i] / psum)
137 return cdf
138
139
140@py_random_state(3)
141def discrete_sequence(n, distribution=None, cdistribution=None, seed=None):
142 """
143 Return sample sequence of length n from a given discrete distribution
144 or discrete cumulative distribution.
145
146 One of the following must be specified.
147
148 distribution = histogram of values, will be normalized
149
150 cdistribution = normalized discrete cumulative distribution
151
152 """
153 import bisect
154
155 if cdistribution is not None:
156 cdf = cdistribution
157 elif distribution is not None:
158 cdf = cumulative_distribution(distribution)
159 else:
160 raise nx.NetworkXError(
161 "discrete_sequence: distribution or cdistribution missing"
162 )
163
164 # get a uniform random number
165 inputseq = [seed.random() for i in range(n)]
166
167 # choose from CDF
168 seq = [bisect.bisect_left(cdf, s) - 1 for s in inputseq]
169 return seq
170
171
172@py_random_state(2)
173def random_weighted_sample(mapping, k, seed=None):
174 """Returns k items without replacement from a weighted sample.
175
176 The input is a dictionary of items with weights as values.
177 """
178 if k > len(mapping):
179 raise ValueError("sample larger than population")
180 sample = set()
181 while len(sample) < k:
182 sample.add(weighted_choice(mapping, seed))
183 return list(sample)
184
185
186@py_random_state(1)
187def weighted_choice(mapping, seed=None):
188 """Returns a single element from a weighted sample.
189
190 The input is a dictionary of items with weights as values.
191 """
192 # use roulette method
193 rnd = seed.random() * sum(mapping.values())
194 for k, w in mapping.items():
195 rnd -= w
196 if rnd < 0:
197 return k