Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/scipy/stats/_relative_risk.py: 26%

57 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-12-12 06:31 +0000

1 

2import operator 

3from dataclasses import dataclass 

4import numpy as np 

5from scipy.special import ndtri 

6from ._common import ConfidenceInterval 

7 

8 

9def _validate_int(n, bound, name): 

10 msg = f'{name} must be an integer not less than {bound}, but got {n!r}' 

11 try: 

12 n = operator.index(n) 

13 except TypeError: 

14 raise TypeError(msg) from None 

15 if n < bound: 

16 raise ValueError(msg) 

17 return n 

18 

19 

20@dataclass 

21class RelativeRiskResult: 

22 """ 

23 Result of `scipy.stats.contingency.relative_risk`. 

24 

25 Attributes 

26 ---------- 

27 relative_risk : float 

28 This is:: 

29 

30 (exposed_cases/exposed_total) / (control_cases/control_total) 

31 

32 exposed_cases : int 

33 The number of "cases" (i.e. occurrence of disease or other event 

34 of interest) among the sample of "exposed" individuals. 

35 exposed_total : int 

36 The total number of "exposed" individuals in the sample. 

37 control_cases : int 

38 The number of "cases" among the sample of "control" or non-exposed 

39 individuals. 

40 control_total : int 

41 The total number of "control" individuals in the sample. 

42 

43 Methods 

44 ------- 

45 confidence_interval : 

46 Compute the confidence interval for the relative risk estimate. 

47 """ 

48 

49 relative_risk: float 

50 exposed_cases: int 

51 exposed_total: int 

52 control_cases: int 

53 control_total: int 

54 

55 def confidence_interval(self, confidence_level=0.95): 

56 """ 

57 Compute the confidence interval for the relative risk. 

58 

59 The confidence interval is computed using the Katz method 

60 (i.e. "Method C" of [1]_; see also [2]_, section 3.1.2). 

61 

62 Parameters 

63 ---------- 

64 confidence_level : float, optional 

65 The confidence level to use for the confidence interval. 

66 Default is 0.95. 

67 

68 Returns 

69 ------- 

70 ci : ConfidenceInterval instance 

71 The return value is an object with attributes ``low`` and 

72 ``high`` that hold the confidence interval. 

73 

74 References 

75 ---------- 

76 .. [1] D. Katz, J. Baptista, S. P. Azen and M. C. Pike, "Obtaining 

77 confidence intervals for the risk ratio in cohort studies", 

78 Biometrics, 34, 469-474 (1978). 

79 .. [2] Hardeo Sahai and Anwer Khurshid, Statistics in Epidemiology, 

80 CRC Press LLC, Boca Raton, FL, USA (1996). 

81 

82 

83 Examples 

84 -------- 

85 >>> from scipy.stats.contingency import relative_risk 

86 >>> result = relative_risk(exposed_cases=10, exposed_total=75, 

87 ... control_cases=12, control_total=225) 

88 >>> result.relative_risk 

89 2.5 

90 >>> result.confidence_interval() 

91 ConfidenceInterval(low=1.1261564003469628, high=5.549850800541033) 

92 """ 

93 if not 0 <= confidence_level <= 1: 

94 raise ValueError('confidence_level must be in the interval ' 

95 '[0, 1].') 

96 

97 # Handle edge cases where either exposed_cases or control_cases 

98 # is zero. We follow the convention of the R function riskratio 

99 # from the epitools library. 

100 if self.exposed_cases == 0 and self.control_cases == 0: 

101 # relative risk is nan. 

102 return ConfidenceInterval(low=np.nan, high=np.nan) 

103 elif self.exposed_cases == 0: 

104 # relative risk is 0. 

105 return ConfidenceInterval(low=0.0, high=np.nan) 

106 elif self.control_cases == 0: 

107 # relative risk is inf 

108 return ConfidenceInterval(low=np.nan, high=np.inf) 

109 

110 alpha = 1 - confidence_level 

111 z = ndtri(1 - alpha/2) 

112 rr = self.relative_risk 

113 

114 # Estimate of the variance of log(rr) is 

115 # var(log(rr)) = 1/exposed_cases - 1/exposed_total + 

116 # 1/control_cases - 1/control_total 

117 # and the standard error is the square root of that. 

118 se = np.sqrt(1/self.exposed_cases - 1/self.exposed_total + 

119 1/self.control_cases - 1/self.control_total) 

120 delta = z*se 

121 katz_lo = rr*np.exp(-delta) 

122 katz_hi = rr*np.exp(delta) 

123 return ConfidenceInterval(low=katz_lo, high=katz_hi) 

124 

125 

126def relative_risk(exposed_cases, exposed_total, control_cases, control_total): 

127 """ 

128 Compute the relative risk (also known as the risk ratio). 

129 

130 This function computes the relative risk associated with a 2x2 

131 contingency table ([1]_, section 2.2.3; [2]_, section 3.1.2). Instead 

132 of accepting a table as an argument, the individual numbers that are 

133 used to compute the relative risk are given as separate parameters. 

134 This is to avoid the ambiguity of which row or column of the contingency 

135 table corresponds to the "exposed" cases and which corresponds to the 

136 "control" cases. Unlike, say, the odds ratio, the relative risk is not 

137 invariant under an interchange of the rows or columns. 

138 

139 Parameters 

140 ---------- 

141 exposed_cases : nonnegative int 

142 The number of "cases" (i.e. occurrence of disease or other event 

143 of interest) among the sample of "exposed" individuals. 

144 exposed_total : positive int 

145 The total number of "exposed" individuals in the sample. 

146 control_cases : nonnegative int 

147 The number of "cases" among the sample of "control" or non-exposed 

148 individuals. 

149 control_total : positive int 

150 The total number of "control" individuals in the sample. 

151 

152 Returns 

153 ------- 

154 result : instance of `~scipy.stats._result_classes.RelativeRiskResult` 

155 The object has the float attribute ``relative_risk``, which is:: 

156 

157 rr = (exposed_cases/exposed_total) / (control_cases/control_total) 

158 

159 The object also has the method ``confidence_interval`` to compute 

160 the confidence interval of the relative risk for a given confidence 

161 level. 

162 

163 See Also 

164 -------- 

165 odds_ratio 

166 

167 Notes 

168 ----- 

169 The R package epitools has the function `riskratio`, which accepts 

170 a table with the following layout:: 

171 

172 disease=0 disease=1 

173 exposed=0 (ref) n00 n01 

174 exposed=1 n10 n11 

175 

176 With a 2x2 table in the above format, the estimate of the CI is 

177 computed by `riskratio` when the argument method="wald" is given, 

178 or with the function `riskratio.wald`. 

179 

180 For example, in a test of the incidence of lung cancer among a 

181 sample of smokers and nonsmokers, the "exposed" category would 

182 correspond to "is a smoker" and the "disease" category would 

183 correspond to "has or had lung cancer". 

184 

185 To pass the same data to ``relative_risk``, use:: 

186 

187 relative_risk(n11, n10 + n11, n01, n00 + n01) 

188 

189 .. versionadded:: 1.7.0 

190 

191 References 

192 ---------- 

193 .. [1] Alan Agresti, An Introduction to Categorical Data Analysis 

194 (second edition), Wiley, Hoboken, NJ, USA (2007). 

195 .. [2] Hardeo Sahai and Anwer Khurshid, Statistics in Epidemiology, 

196 CRC Press LLC, Boca Raton, FL, USA (1996). 

197 

198 Examples 

199 -------- 

200 >>> from scipy.stats.contingency import relative_risk 

201 

202 This example is from Example 3.1 of [2]_. The results of a heart 

203 disease study are summarized in the following table:: 

204 

205 High CAT Low CAT Total 

206 -------- ------- ----- 

207 CHD 27 44 71 

208 No CHD 95 443 538 

209 

210 Total 122 487 609 

211 

212 CHD is coronary heart disease, and CAT refers to the level of 

213 circulating catecholamine. CAT is the "exposure" variable, and 

214 high CAT is the "exposed" category. So the data from the table 

215 to be passed to ``relative_risk`` is:: 

216 

217 exposed_cases = 27 

218 exposed_total = 122 

219 control_cases = 44 

220 control_total = 487 

221 

222 >>> result = relative_risk(27, 122, 44, 487) 

223 >>> result.relative_risk 

224 2.4495156482861398 

225 

226 Find the confidence interval for the relative risk. 

227 

228 >>> result.confidence_interval(confidence_level=0.95) 

229 ConfidenceInterval(low=1.5836990926700116, high=3.7886786315466354) 

230 

231 The interval does not contain 1, so the data supports the statement 

232 that high CAT is associated with greater risk of CHD. 

233 """ 

234 # Relative risk is a trivial calculation. The nontrivial part is in the 

235 # `confidence_interval` method of the RelativeRiskResult class. 

236 

237 exposed_cases = _validate_int(exposed_cases, 0, "exposed_cases") 

238 exposed_total = _validate_int(exposed_total, 1, "exposed_total") 

239 control_cases = _validate_int(control_cases, 0, "control_cases") 

240 control_total = _validate_int(control_total, 1, "control_total") 

241 

242 if exposed_cases > exposed_total: 

243 raise ValueError('exposed_cases must not exceed exposed_total.') 

244 if control_cases > control_total: 

245 raise ValueError('control_cases must not exceed control_total.') 

246 

247 if exposed_cases == 0 and control_cases == 0: 

248 # relative risk is 0/0. 

249 rr = np.nan 

250 elif exposed_cases == 0: 

251 # relative risk is 0/nonzero 

252 rr = 0.0 

253 elif control_cases == 0: 

254 # relative risk is nonzero/0. 

255 rr = np.inf 

256 else: 

257 p1 = exposed_cases / exposed_total 

258 p2 = control_cases / control_total 

259 rr = p1 / p2 

260 return RelativeRiskResult(relative_risk=rr, 

261 exposed_cases=exposed_cases, 

262 exposed_total=exposed_total, 

263 control_cases=control_cases, 

264 control_total=control_total)