Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/pandas/core/groupby/indexing.py: 25%

1from __future__ import annotations

3from collections.abc import Iterable

4from typing import (

5 TYPE_CHECKING,

6 Literal,

7 cast,

10import numpy as np

12from pandas.util._decorators import (

13 cache_readonly,

14 doc,

15)

17from pandas.core.dtypes.common import (

18 is_integer,

19 is_list_like,

20)

22if TYPE_CHECKING:

23 from pandas._typing import PositionalIndexer

25 from pandas import (

26 DataFrame,

27 Series,

28 )

29 from pandas.core.groupby import groupby

32class GroupByIndexingMixin:

33 """

34 Mixin for adding ._positional_selector to GroupBy.

35 """

37 @cache_readonly

38 def _positional_selector(self) -> GroupByPositionalSelector:

39 """

40 Return positional selection for each group.

42 ``groupby._positional_selector[i:j]`` is similar to

43 ``groupby.apply(lambda x: x.iloc[i:j])``

44 but much faster and preserves the original index and order.

46 ``_positional_selector[]`` is compatible with and extends :meth:`~GroupBy.head`

47 and :meth:`~GroupBy.tail`. For example:

49 - ``head(5)``

50 - ``_positional_selector[5:-5]``

51 - ``tail(5)``

53 together return all the rows.

55 Allowed inputs for the index are:

57 - An integer valued iterable, e.g. ``range(2, 4)``.

58 - A comma separated list of integers and slices, e.g. ``5``, ``2, 4``, ``2:4``.

60 The output format is the same as :meth:`~GroupBy.head` and

61 :meth:`~GroupBy.tail`, namely

62 a subset of the ``DataFrame`` or ``Series`` with the index and order preserved.

64 Returns

65 -------

66 Series

67 The filtered subset of the original Series.

68 DataFrame

69 The filtered subset of the original DataFrame.

71 See Also

72 --------

73 DataFrame.iloc : Purely integer-location based indexing for selection by

74 position.

75 GroupBy.head : Return first n rows of each group.

76 GroupBy.tail : Return last n rows of each group.

77 GroupBy.nth : Take the nth row from each group if n is an int, or a

78 subset of rows, if n is a list of ints.

80 Notes

81 -----

82 - The slice step cannot be negative.

83 - If the index specification results in overlaps, the item is not duplicated.

84 - If the index specification changes the order of items, then

85 they are returned in their original order.

86 By contrast, ``DataFrame.iloc`` can change the row order.

87 - ``groupby()`` parameters such as as_index and dropna are ignored.

89 The differences between ``_positional_selector[]`` and :meth:`~GroupBy.nth`

90 with ``as_index=False`` are:

92 - Input to ``_positional_selector`` can include

93 one or more slices whereas ``nth``

94 just handles an integer or a list of integers.

95 - ``_positional_selector`` can accept a slice relative to the

96 last row of each group.

97 - ``_positional_selector`` does not have an equivalent to the

98 ``nth()`` ``dropna`` parameter.

100 Examples

101 --------

102 >>> df = pd.DataFrame([["a", 1], ["a", 2], ["a", 3], ["b", 4], ["b", 5]],

103 ... columns=["A", "B"])

104 >>> df.groupby("A")._positional_selector[1:2]

105 A B

106 1 a 2

107 4 b 5

108

109 >>> df.groupby("A")._positional_selector[1, -1]

110 A B

111 1 a 2

112 2 a 3

113 4 b 5

114 """

115 if TYPE_CHECKING:

116 # pylint: disable-next=used-before-assignment

117 groupby_self = cast(groupby.GroupBy, self)

118 else:

119 groupby_self = self

120

121 return GroupByPositionalSelector(groupby_self)

122

123 def _make_mask_from_positional_indexer(

124 self,

125 arg: PositionalIndexer | tuple,

126 ) -> np.ndarray:

127 if is_list_like(arg):

128 if all(is_integer(i) for i in cast(Iterable, arg)):

129 mask = self._make_mask_from_list(cast(Iterable[int], arg))

130 else:

131 mask = self._make_mask_from_tuple(cast(tuple, arg))

132

133 elif isinstance(arg, slice):

134 mask = self._make_mask_from_slice(arg)

135 elif is_integer(arg):

136 mask = self._make_mask_from_int(cast(int, arg))

137 else:

138 raise TypeError(

139 f"Invalid index {type(arg)}. "

140 "Must be integer, list-like, slice or a tuple of "

141 "integers and slices"

142 )

143

144 if isinstance(mask, bool):

145 if mask:

146 mask = self._ascending_count >= 0

147 else:

148 mask = self._ascending_count < 0

149

150 return cast(np.ndarray, mask)

151

152 def _make_mask_from_int(self, arg: int) -> np.ndarray:

153 if arg >= 0:

154 return self._ascending_count == arg

155 else:

156 return self._descending_count == (-arg - 1)

157

158 def _make_mask_from_list(self, args: Iterable[int]) -> bool | np.ndarray:

159 positive = [arg for arg in args if arg >= 0]

160 negative = [-arg - 1 for arg in args if arg < 0]

161

162 mask: bool | np.ndarray = False

163

164 if positive:

165 mask |= np.isin(self._ascending_count, positive)

166

167 if negative:

168 mask |= np.isin(self._descending_count, negative)

169

170 return mask

171

172 def _make_mask_from_tuple(self, args: tuple) -> bool | np.ndarray:

173 mask: bool | np.ndarray = False

174

175 for arg in args:

176 if is_integer(arg):

177 mask |= self._make_mask_from_int(cast(int, arg))

178 elif isinstance(arg, slice):

179 mask |= self._make_mask_from_slice(arg)

180 else:

181 raise ValueError(

182 f"Invalid argument {type(arg)}. Should be int or slice."

183 )

184

185 return mask

186

187 def _make_mask_from_slice(self, arg: slice) -> bool | np.ndarray:

188 start = arg.start

189 stop = arg.stop

190 step = arg.step

191

192 if step is not None and step < 0:

193 raise ValueError(f"Invalid step {step}. Must be non-negative")

194

195 mask: bool | np.ndarray = True

196

197 if step is None:

198 step = 1

199

200 if start is None:

201 if step > 1:

202 mask &= self._ascending_count % step == 0

203

204 elif start >= 0:

205 mask &= self._ascending_count >= start

206

207 if step > 1:

208 mask &= (self._ascending_count - start) % step == 0

209

210 else:

211 mask &= self._descending_count < -start

212

213 offset_array = self._descending_count + start + 1

214 limit_array = (

215 self._ascending_count + self._descending_count + (start + 1)

216 ) < 0

217 offset_array = np.where(limit_array, self._ascending_count, offset_array)

218

219 mask &= offset_array % step == 0

220

221 if stop is not None:

222 if stop >= 0:

223 mask &= self._ascending_count < stop

224 else:

225 mask &= self._descending_count >= -stop

226

227 return mask

228

229 @cache_readonly

230 def _ascending_count(self) -> np.ndarray:

231 if TYPE_CHECKING:

232 groupby_self = cast(groupby.GroupBy, self)

233 else:

234 groupby_self = self

235

236 return groupby_self._cumcount_array()

237

238 @cache_readonly

239 def _descending_count(self) -> np.ndarray:

240 if TYPE_CHECKING:

241 groupby_self = cast(groupby.GroupBy, self)

242 else:

243 groupby_self = self

244

245 return groupby_self._cumcount_array(ascending=False)

246

247

248@doc(GroupByIndexingMixin._positional_selector)

249class GroupByPositionalSelector:

250 def __init__(self, groupby_object: groupby.GroupBy) -> None:

251 self.groupby_object = groupby_object

252

253 def __getitem__(self, arg: PositionalIndexer | tuple) -> DataFrame | Series:

254 """

255 Select by positional index per group.

256

257 Implements GroupBy._positional_selector

258

259 Parameters

260 ----------

261 arg : PositionalIndexer | tuple

262 Allowed values are:

263 - int

264 - int valued iterable such as list or range

265 - slice with step either None or positive

266 - tuple of integers and slices

267

268 Returns

269 -------

270 Series

271 The filtered subset of the original groupby Series.

272 DataFrame

273 The filtered subset of the original groupby DataFrame.

274

275 See Also

276 --------

277 DataFrame.iloc : Integer-location based indexing for selection by position.

278 GroupBy.head : Return first n rows of each group.

279 GroupBy.tail : Return last n rows of each group.

280 GroupBy._positional_selector : Return positional selection for each group.

281 GroupBy.nth : Take the nth row from each group if n is an int, or a

282 subset of rows, if n is a list of ints.

283 """

284 mask = self.groupby_object._make_mask_from_positional_indexer(arg)

285 return self.groupby_object._mask_selected_obj(mask)

286

287

288class GroupByNthSelector:

289 """

290 Dynamically substituted for GroupBy.nth to enable both call and index

291 """

292

293 def __init__(self, groupby_object: groupby.GroupBy) -> None:

294 self.groupby_object = groupby_object

295

296 def __call__(

297 self,

298 n: PositionalIndexer | tuple,

299 dropna: Literal["any", "all", None] = None,

300 ) -> DataFrame | Series:

301 return self.groupby_object._nth(n, dropna)

302

303 def __getitem__(self, n: PositionalIndexer | tuple) -> DataFrame | Series:

304 return self.groupby_object._nth(n)