Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/scipy/stats/_relative_risk.py: 26%
57 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-12-12 06:31 +0000
« prev ^ index » next coverage.py v7.3.2, created at 2023-12-12 06:31 +0000
2import operator
3from dataclasses import dataclass
4import numpy as np
5from scipy.special import ndtri
6from ._common import ConfidenceInterval
9def _validate_int(n, bound, name):
10 msg = f'{name} must be an integer not less than {bound}, but got {n!r}'
11 try:
12 n = operator.index(n)
13 except TypeError:
14 raise TypeError(msg) from None
15 if n < bound:
16 raise ValueError(msg)
17 return n
20@dataclass
21class RelativeRiskResult:
22 """
23 Result of `scipy.stats.contingency.relative_risk`.
25 Attributes
26 ----------
27 relative_risk : float
28 This is::
30 (exposed_cases/exposed_total) / (control_cases/control_total)
32 exposed_cases : int
33 The number of "cases" (i.e. occurrence of disease or other event
34 of interest) among the sample of "exposed" individuals.
35 exposed_total : int
36 The total number of "exposed" individuals in the sample.
37 control_cases : int
38 The number of "cases" among the sample of "control" or non-exposed
39 individuals.
40 control_total : int
41 The total number of "control" individuals in the sample.
43 Methods
44 -------
45 confidence_interval :
46 Compute the confidence interval for the relative risk estimate.
47 """
49 relative_risk: float
50 exposed_cases: int
51 exposed_total: int
52 control_cases: int
53 control_total: int
55 def confidence_interval(self, confidence_level=0.95):
56 """
57 Compute the confidence interval for the relative risk.
59 The confidence interval is computed using the Katz method
60 (i.e. "Method C" of [1]_; see also [2]_, section 3.1.2).
62 Parameters
63 ----------
64 confidence_level : float, optional
65 The confidence level to use for the confidence interval.
66 Default is 0.95.
68 Returns
69 -------
70 ci : ConfidenceInterval instance
71 The return value is an object with attributes ``low`` and
72 ``high`` that hold the confidence interval.
74 References
75 ----------
76 .. [1] D. Katz, J. Baptista, S. P. Azen and M. C. Pike, "Obtaining
77 confidence intervals for the risk ratio in cohort studies",
78 Biometrics, 34, 469-474 (1978).
79 .. [2] Hardeo Sahai and Anwer Khurshid, Statistics in Epidemiology,
80 CRC Press LLC, Boca Raton, FL, USA (1996).
83 Examples
84 --------
85 >>> from scipy.stats.contingency import relative_risk
86 >>> result = relative_risk(exposed_cases=10, exposed_total=75,
87 ... control_cases=12, control_total=225)
88 >>> result.relative_risk
89 2.5
90 >>> result.confidence_interval()
91 ConfidenceInterval(low=1.1261564003469628, high=5.549850800541033)
92 """
93 if not 0 <= confidence_level <= 1:
94 raise ValueError('confidence_level must be in the interval '
95 '[0, 1].')
97 # Handle edge cases where either exposed_cases or control_cases
98 # is zero. We follow the convention of the R function riskratio
99 # from the epitools library.
100 if self.exposed_cases == 0 and self.control_cases == 0:
101 # relative risk is nan.
102 return ConfidenceInterval(low=np.nan, high=np.nan)
103 elif self.exposed_cases == 0:
104 # relative risk is 0.
105 return ConfidenceInterval(low=0.0, high=np.nan)
106 elif self.control_cases == 0:
107 # relative risk is inf
108 return ConfidenceInterval(low=np.nan, high=np.inf)
110 alpha = 1 - confidence_level
111 z = ndtri(1 - alpha/2)
112 rr = self.relative_risk
114 # Estimate of the variance of log(rr) is
115 # var(log(rr)) = 1/exposed_cases - 1/exposed_total +
116 # 1/control_cases - 1/control_total
117 # and the standard error is the square root of that.
118 se = np.sqrt(1/self.exposed_cases - 1/self.exposed_total +
119 1/self.control_cases - 1/self.control_total)
120 delta = z*se
121 katz_lo = rr*np.exp(-delta)
122 katz_hi = rr*np.exp(delta)
123 return ConfidenceInterval(low=katz_lo, high=katz_hi)
126def relative_risk(exposed_cases, exposed_total, control_cases, control_total):
127 """
128 Compute the relative risk (also known as the risk ratio).
130 This function computes the relative risk associated with a 2x2
131 contingency table ([1]_, section 2.2.3; [2]_, section 3.1.2). Instead
132 of accepting a table as an argument, the individual numbers that are
133 used to compute the relative risk are given as separate parameters.
134 This is to avoid the ambiguity of which row or column of the contingency
135 table corresponds to the "exposed" cases and which corresponds to the
136 "control" cases. Unlike, say, the odds ratio, the relative risk is not
137 invariant under an interchange of the rows or columns.
139 Parameters
140 ----------
141 exposed_cases : nonnegative int
142 The number of "cases" (i.e. occurrence of disease or other event
143 of interest) among the sample of "exposed" individuals.
144 exposed_total : positive int
145 The total number of "exposed" individuals in the sample.
146 control_cases : nonnegative int
147 The number of "cases" among the sample of "control" or non-exposed
148 individuals.
149 control_total : positive int
150 The total number of "control" individuals in the sample.
152 Returns
153 -------
154 result : instance of `~scipy.stats._result_classes.RelativeRiskResult`
155 The object has the float attribute ``relative_risk``, which is::
157 rr = (exposed_cases/exposed_total) / (control_cases/control_total)
159 The object also has the method ``confidence_interval`` to compute
160 the confidence interval of the relative risk for a given confidence
161 level.
163 See Also
164 --------
165 odds_ratio
167 Notes
168 -----
169 The R package epitools has the function `riskratio`, which accepts
170 a table with the following layout::
172 disease=0 disease=1
173 exposed=0 (ref) n00 n01
174 exposed=1 n10 n11
176 With a 2x2 table in the above format, the estimate of the CI is
177 computed by `riskratio` when the argument method="wald" is given,
178 or with the function `riskratio.wald`.
180 For example, in a test of the incidence of lung cancer among a
181 sample of smokers and nonsmokers, the "exposed" category would
182 correspond to "is a smoker" and the "disease" category would
183 correspond to "has or had lung cancer".
185 To pass the same data to ``relative_risk``, use::
187 relative_risk(n11, n10 + n11, n01, n00 + n01)
189 .. versionadded:: 1.7.0
191 References
192 ----------
193 .. [1] Alan Agresti, An Introduction to Categorical Data Analysis
194 (second edition), Wiley, Hoboken, NJ, USA (2007).
195 .. [2] Hardeo Sahai and Anwer Khurshid, Statistics in Epidemiology,
196 CRC Press LLC, Boca Raton, FL, USA (1996).
198 Examples
199 --------
200 >>> from scipy.stats.contingency import relative_risk
202 This example is from Example 3.1 of [2]_. The results of a heart
203 disease study are summarized in the following table::
205 High CAT Low CAT Total
206 -------- ------- -----
207 CHD 27 44 71
208 No CHD 95 443 538
210 Total 122 487 609
212 CHD is coronary heart disease, and CAT refers to the level of
213 circulating catecholamine. CAT is the "exposure" variable, and
214 high CAT is the "exposed" category. So the data from the table
215 to be passed to ``relative_risk`` is::
217 exposed_cases = 27
218 exposed_total = 122
219 control_cases = 44
220 control_total = 487
222 >>> result = relative_risk(27, 122, 44, 487)
223 >>> result.relative_risk
224 2.4495156482861398
226 Find the confidence interval for the relative risk.
228 >>> result.confidence_interval(confidence_level=0.95)
229 ConfidenceInterval(low=1.5836990926700116, high=3.7886786315466354)
231 The interval does not contain 1, so the data supports the statement
232 that high CAT is associated with greater risk of CHD.
233 """
234 # Relative risk is a trivial calculation. The nontrivial part is in the
235 # `confidence_interval` method of the RelativeRiskResult class.
237 exposed_cases = _validate_int(exposed_cases, 0, "exposed_cases")
238 exposed_total = _validate_int(exposed_total, 1, "exposed_total")
239 control_cases = _validate_int(control_cases, 0, "control_cases")
240 control_total = _validate_int(control_total, 1, "control_total")
242 if exposed_cases > exposed_total:
243 raise ValueError('exposed_cases must not exceed exposed_total.')
244 if control_cases > control_total:
245 raise ValueError('control_cases must not exceed control_total.')
247 if exposed_cases == 0 and control_cases == 0:
248 # relative risk is 0/0.
249 rr = np.nan
250 elif exposed_cases == 0:
251 # relative risk is 0/nonzero
252 rr = 0.0
253 elif control_cases == 0:
254 # relative risk is nonzero/0.
255 rr = np.inf
256 else:
257 p1 = exposed_cases / exposed_total
258 p2 = control_cases / control_total
259 rr = p1 / p2
260 return RelativeRiskResult(relative_risk=rr,
261 exposed_cases=exposed_cases,
262 exposed_total=exposed_total,
263 control_cases=control_cases,
264 control_total=control_total)