Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/scipy/stats/_relative

2import operator

3from dataclasses import dataclass

4import numpy as np

5from scipy.special import ndtri

6from ._common import ConfidenceInterval

9def _validate_int(n, bound, name):

10 msg = f'{name} must be an integer not less than {bound}, but got {n!r}'

11 try:

12 n = operator.index(n)

13 except TypeError:

14 raise TypeError(msg) from None

15 if n < bound:

16 raise ValueError(msg)

17 return n

20@dataclass

21class RelativeRiskResult:

22 """

23 Result of `scipy.stats.contingency.relative_risk`.

25 Attributes

26 ----------

27 relative_risk : float

28 This is::

30 (exposed_cases/exposed_total) / (control_cases/control_total)

32 exposed_cases : int

33 The number of "cases" (i.e. occurrence of disease or other event

34 of interest) among the sample of "exposed" individuals.

35 exposed_total : int

36 The total number of "exposed" individuals in the sample.

37 control_cases : int

38 The number of "cases" among the sample of "control" or non-exposed

39 individuals.

40 control_total : int

41 The total number of "control" individuals in the sample.

43 Methods

44 -------

45 confidence_interval :

46 Compute the confidence interval for the relative risk estimate.

47 """

49 relative_risk: float

50 exposed_cases: int

51 exposed_total: int

52 control_cases: int

53 control_total: int

55 def confidence_interval(self, confidence_level=0.95):

56 """

57 Compute the confidence interval for the relative risk.

59 The confidence interval is computed using the Katz method

60 (i.e. "Method C" of [1]_; see also [2]_, section 3.1.2).

62 Parameters

63 ----------

64 confidence_level : float, optional

65 The confidence level to use for the confidence interval.

66 Default is 0.95.

68 Returns

69 -------

70 ci : ConfidenceInterval instance

71 The return value is an object with attributes ``low`` and

72 ``high`` that hold the confidence interval.

74 References

75 ----------

76 .. [1] D. Katz, J. Baptista, S. P. Azen and M. C. Pike, "Obtaining

77 confidence intervals for the risk ratio in cohort studies",

78 Biometrics, 34, 469-474 (1978).

79 .. [2] Hardeo Sahai and Anwer Khurshid, Statistics in Epidemiology,

80 CRC Press LLC, Boca Raton, FL, USA (1996).

83 Examples

84 --------

85 >>> from scipy.stats.contingency import relative_risk

86 >>> result = relative_risk(exposed_cases=10, exposed_total=75,

87 ... control_cases=12, control_total=225)

88 >>> result.relative_risk

89 2.5

90 >>> result.confidence_interval()

91 ConfidenceInterval(low=1.1261564003469628, high=5.549850800541033)

92 """

93 if not 0 <= confidence_level <= 1:

94 raise ValueError('confidence_level must be in the interval '

95 '[0, 1].')

97 # Handle edge cases where either exposed_cases or control_cases

98 # is zero. We follow the convention of the R function riskratio

99 # from the epitools library.

100 if self.exposed_cases == 0 and self.control_cases == 0:

101 # relative risk is nan.

102 return ConfidenceInterval(low=np.nan, high=np.nan)

103 elif self.exposed_cases == 0:

104 # relative risk is 0.

105 return ConfidenceInterval(low=0.0, high=np.nan)

106 elif self.control_cases == 0:

107 # relative risk is inf

108 return ConfidenceInterval(low=np.nan, high=np.inf)

109

110 alpha = 1 - confidence_level

111 z = ndtri(1 - alpha/2)

112 rr = self.relative_risk

113

114 # Estimate of the variance of log(rr) is

115 # var(log(rr)) = 1/exposed_cases - 1/exposed_total +

116 # 1/control_cases - 1/control_total

117 # and the standard error is the square root of that.

118 se = np.sqrt(1/self.exposed_cases - 1/self.exposed_total +

119 1/self.control_cases - 1/self.control_total)

120 delta = z*se

121 katz_lo = rr*np.exp(-delta)

122 katz_hi = rr*np.exp(delta)

123 return ConfidenceInterval(low=katz_lo, high=katz_hi)

124

125

126def relative_risk(exposed_cases, exposed_total, control_cases, control_total):

127 """

128 Compute the relative risk (also known as the risk ratio).

129

130 This function computes the relative risk associated with a 2x2

131 contingency table ([1]_, section 2.2.3; [2]_, section 3.1.2). Instead

132 of accepting a table as an argument, the individual numbers that are

133 used to compute the relative risk are given as separate parameters.

134 This is to avoid the ambiguity of which row or column of the contingency

135 table corresponds to the "exposed" cases and which corresponds to the

136 "control" cases. Unlike, say, the odds ratio, the relative risk is not

137 invariant under an interchange of the rows or columns.

138

139 Parameters

140 ----------

141 exposed_cases : nonnegative int

142 The number of "cases" (i.e. occurrence of disease or other event

143 of interest) among the sample of "exposed" individuals.

144 exposed_total : positive int

145 The total number of "exposed" individuals in the sample.

146 control_cases : nonnegative int

147 The number of "cases" among the sample of "control" or non-exposed

148 individuals.

149 control_total : positive int

150 The total number of "control" individuals in the sample.

151

152 Returns

153 -------

154 result : instance of `~scipy.stats._result_classes.RelativeRiskResult`

155 The object has the float attribute ``relative_risk``, which is::

156

157 rr = (exposed_cases/exposed_total) / (control_cases/control_total)

158

159 The object also has the method ``confidence_interval`` to compute

160 the confidence interval of the relative risk for a given confidence

161 level.

162

163 See Also

164 --------

165 odds_ratio

166

167 Notes

168 -----

169 The R package epitools has the function `riskratio`, which accepts

170 a table with the following layout::

171

172 disease=0 disease=1

173 exposed=0 (ref) n00 n01

174 exposed=1 n10 n11

175

176 With a 2x2 table in the above format, the estimate of the CI is

177 computed by `riskratio` when the argument method="wald" is given,

178 or with the function `riskratio.wald`.

179

180 For example, in a test of the incidence of lung cancer among a

181 sample of smokers and nonsmokers, the "exposed" category would

182 correspond to "is a smoker" and the "disease" category would

183 correspond to "has or had lung cancer".

184

185 To pass the same data to ``relative_risk``, use::

186

187 relative_risk(n11, n10 + n11, n01, n00 + n01)

188

189 .. versionadded:: 1.7.0

190

191 References

192 ----------

193 .. [1] Alan Agresti, An Introduction to Categorical Data Analysis

194 (second edition), Wiley, Hoboken, NJ, USA (2007).

195 .. [2] Hardeo Sahai and Anwer Khurshid, Statistics in Epidemiology,

196 CRC Press LLC, Boca Raton, FL, USA (1996).

197

198 Examples

199 --------

200 >>> from scipy.stats.contingency import relative_risk

201

202 This example is from Example 3.1 of [2]_. The results of a heart

203 disease study are summarized in the following table::

204

205 High CAT Low CAT Total

206 -------- ------- -----

207 CHD 27 44 71

208 No CHD 95 443 538

209

210 Total 122 487 609

211

212 CHD is coronary heart disease, and CAT refers to the level of

213 circulating catecholamine. CAT is the "exposure" variable, and

214 high CAT is the "exposed" category. So the data from the table

215 to be passed to ``relative_risk`` is::

216

217 exposed_cases = 27

218 exposed_total = 122

219 control_cases = 44

220 control_total = 487

221

222 >>> result = relative_risk(27, 122, 44, 487)

223 >>> result.relative_risk

224 2.4495156482861398

225

226 Find the confidence interval for the relative risk.

227

228 >>> result.confidence_interval(confidence_level=0.95)

229 ConfidenceInterval(low=1.5836990926700116, high=3.7886786315466354)

230

231 The interval does not contain 1, so the data supports the statement

232 that high CAT is associated with greater risk of CHD.

233 """

234 # Relative risk is a trivial calculation. The nontrivial part is in the

235 # `confidence_interval` method of the RelativeRiskResult class.

236

237 exposed_cases = _validate_int(exposed_cases, 0, "exposed_cases")

238 exposed_total = _validate_int(exposed_total, 1, "exposed_total")

239 control_cases = _validate_int(control_cases, 0, "control_cases")

240 control_total = _validate_int(control_total, 1, "control_total")

241

242 if exposed_cases > exposed_total:

243 raise ValueError('exposed_cases must not exceed exposed_total.')

244 if control_cases > control_total:

245 raise ValueError('control_cases must not exceed control_total.')

246

247 if exposed_cases == 0 and control_cases == 0:

248 # relative risk is 0/0.

249 rr = np.nan

250 elif exposed_cases == 0:

251 # relative risk is 0/nonzero

252 rr = 0.0

253 elif control_cases == 0:

254 # relative risk is nonzero/0.

255 rr = np.inf

256 else:

257 p1 = exposed_cases / exposed_total

258 p2 = control_cases / control_total

259 rr = p1 / p2

260 return RelativeRiskResult(relative_risk=rr,

261 exposed_cases=exposed_cases,

262 exposed_total=exposed_total,

263 control_cases=control_cases,

264 control_total=control_total)

Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/scipy/stats/_relative_risk.py: 26%

57 statements