Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pandas/core/indexes/category.py: 40%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

123 statements  

1from __future__ import annotations 

2 

3from typing import ( 

4 Any, 

5 Hashable, 

6) 

7 

8import numpy as np 

9 

10from pandas._libs import index as libindex 

11from pandas._typing import ( 

12 Dtype, 

13 DtypeObj, 

14 npt, 

15) 

16from pandas.util._decorators import ( 

17 cache_readonly, 

18 doc, 

19) 

20 

21from pandas.core.dtypes.common import ( 

22 is_categorical_dtype, 

23 is_scalar, 

24) 

25from pandas.core.dtypes.missing import ( 

26 is_valid_na_for_dtype, 

27 isna, 

28 notna, 

29) 

30 

31from pandas.core.arrays.categorical import ( 

32 Categorical, 

33 contains, 

34) 

35from pandas.core.construction import extract_array 

36import pandas.core.indexes.base as ibase 

37from pandas.core.indexes.base import ( 

38 Index, 

39 maybe_extract_name, 

40) 

41from pandas.core.indexes.extension import ( 

42 NDArrayBackedExtensionIndex, 

43 inherit_names, 

44) 

45 

46from pandas.io.formats.printing import pprint_thing 

47 

48_index_doc_kwargs: dict[str, str] = dict(ibase._index_doc_kwargs) 

49_index_doc_kwargs.update({"target_klass": "CategoricalIndex"}) 

50 

51 

52@inherit_names( 

53 [ 

54 "argsort", 

55 "tolist", 

56 "codes", 

57 "categories", 

58 "ordered", 

59 "_reverse_indexer", 

60 "searchsorted", 

61 "min", 

62 "max", 

63 ], 

64 Categorical, 

65) 

66@inherit_names( 

67 [ 

68 "rename_categories", 

69 "reorder_categories", 

70 "add_categories", 

71 "remove_categories", 

72 "remove_unused_categories", 

73 "set_categories", 

74 "as_ordered", 

75 "as_unordered", 

76 ], 

77 Categorical, 

78 wrap=True, 

79) 

80class CategoricalIndex(NDArrayBackedExtensionIndex): 

81 """ 

82 Index based on an underlying :class:`Categorical`. 

83 

84 CategoricalIndex, like Categorical, can only take on a limited, 

85 and usually fixed, number of possible values (`categories`). Also, 

86 like Categorical, it might have an order, but numerical operations 

87 (additions, divisions, ...) are not possible. 

88 

89 Parameters 

90 ---------- 

91 data : array-like (1-dimensional) 

92 The values of the categorical. If `categories` are given, values not in 

93 `categories` will be replaced with NaN. 

94 categories : index-like, optional 

95 The categories for the categorical. Items need to be unique. 

96 If the categories are not given here (and also not in `dtype`), they 

97 will be inferred from the `data`. 

98 ordered : bool, optional 

99 Whether or not this categorical is treated as an ordered 

100 categorical. If not given here or in `dtype`, the resulting 

101 categorical will be unordered. 

102 dtype : CategoricalDtype or "category", optional 

103 If :class:`CategoricalDtype`, cannot be used together with 

104 `categories` or `ordered`. 

105 copy : bool, default False 

106 Make a copy of input ndarray. 

107 name : object, optional 

108 Name to be stored in the index. 

109 

110 Attributes 

111 ---------- 

112 codes 

113 categories 

114 ordered 

115 

116 Methods 

117 ------- 

118 rename_categories 

119 reorder_categories 

120 add_categories 

121 remove_categories 

122 remove_unused_categories 

123 set_categories 

124 as_ordered 

125 as_unordered 

126 map 

127 

128 Raises 

129 ------ 

130 ValueError 

131 If the categories do not validate. 

132 TypeError 

133 If an explicit ``ordered=True`` is given but no `categories` and the 

134 `values` are not sortable. 

135 

136 See Also 

137 -------- 

138 Index : The base pandas Index type. 

139 Categorical : A categorical array. 

140 CategoricalDtype : Type for categorical data. 

141 

142 Notes 

143 ----- 

144 See the `user guide 

145 <https://pandas.pydata.org/pandas-docs/stable/user_guide/advanced.html#categoricalindex>`__ 

146 for more. 

147 

148 Examples 

149 -------- 

150 >>> pd.CategoricalIndex(["a", "b", "c", "a", "b", "c"]) 

151 CategoricalIndex(['a', 'b', 'c', 'a', 'b', 'c'], 

152 categories=['a', 'b', 'c'], ordered=False, dtype='category') 

153 

154 ``CategoricalIndex`` can also be instantiated from a ``Categorical``: 

155 

156 >>> c = pd.Categorical(["a", "b", "c", "a", "b", "c"]) 

157 >>> pd.CategoricalIndex(c) 

158 CategoricalIndex(['a', 'b', 'c', 'a', 'b', 'c'], 

159 categories=['a', 'b', 'c'], ordered=False, dtype='category') 

160 

161 Ordered ``CategoricalIndex`` can have a min and max value. 

162 

163 >>> ci = pd.CategoricalIndex( 

164 ... ["a", "b", "c", "a", "b", "c"], ordered=True, categories=["c", "b", "a"] 

165 ... ) 

166 >>> ci 

167 CategoricalIndex(['a', 'b', 'c', 'a', 'b', 'c'], 

168 categories=['c', 'b', 'a'], ordered=True, dtype='category') 

169 >>> ci.min() 

170 'c' 

171 """ 

172 

173 _typ = "categoricalindex" 

174 _data_cls = Categorical 

175 

176 @property 

177 def _can_hold_strings(self): 

178 return self.categories._can_hold_strings 

179 

180 @cache_readonly 

181 def _should_fallback_to_positional(self) -> bool: 

182 return self.categories._should_fallback_to_positional 

183 

184 codes: np.ndarray 

185 categories: Index 

186 ordered: bool | None 

187 _data: Categorical 

188 _values: Categorical 

189 

190 @property 

191 def _engine_type(self) -> type[libindex.IndexEngine]: 

192 # self.codes can have dtype int8, int16, int32 or int64, so we need 

193 # to return the corresponding engine type (libindex.Int8Engine, etc.). 

194 return { 

195 np.int8: libindex.Int8Engine, 

196 np.int16: libindex.Int16Engine, 

197 np.int32: libindex.Int32Engine, 

198 np.int64: libindex.Int64Engine, 

199 }[self.codes.dtype.type] 

200 

201 # -------------------------------------------------------------------- 

202 # Constructors 

203 

204 def __new__( 

205 cls, 

206 data=None, 

207 categories=None, 

208 ordered=None, 

209 dtype: Dtype | None = None, 

210 copy: bool = False, 

211 name: Hashable = None, 

212 ) -> CategoricalIndex: 

213 name = maybe_extract_name(name, data, cls) 

214 

215 if is_scalar(data): 

216 # GH#38944 include None here, which pre-2.0 subbed in [] 

217 cls._raise_scalar_data_error(data) 

218 

219 data = Categorical( 

220 data, categories=categories, ordered=ordered, dtype=dtype, copy=copy 

221 ) 

222 

223 return cls._simple_new(data, name=name) 

224 

225 # -------------------------------------------------------------------- 

226 

227 def _is_dtype_compat(self, other) -> Categorical: 

228 """ 

229 *this is an internal non-public method* 

230 

231 provide a comparison between the dtype of self and other (coercing if 

232 needed) 

233 

234 Parameters 

235 ---------- 

236 other : Index 

237 

238 Returns 

239 ------- 

240 Categorical 

241 

242 Raises 

243 ------ 

244 TypeError if the dtypes are not compatible 

245 """ 

246 if is_categorical_dtype(other): 

247 other = extract_array(other) 

248 if not other._categories_match_up_to_permutation(self): 

249 raise TypeError( 

250 "categories must match existing categories when appending" 

251 ) 

252 

253 elif other._is_multi: 

254 # preempt raising NotImplementedError in isna call 

255 raise TypeError("MultiIndex is not dtype-compatible with CategoricalIndex") 

256 else: 

257 values = other 

258 

259 cat = Categorical(other, dtype=self.dtype) 

260 other = CategoricalIndex(cat) 

261 if not other.isin(values).all(): 

262 raise TypeError( 

263 "cannot append a non-category item to a CategoricalIndex" 

264 ) 

265 other = other._values 

266 

267 if not ((other == values) | (isna(other) & isna(values))).all(): 

268 # GH#37667 see test_equals_non_category 

269 raise TypeError( 

270 "categories must match existing categories when appending" 

271 ) 

272 

273 return other 

274 

275 def equals(self, other: object) -> bool: 

276 """ 

277 Determine if two CategoricalIndex objects contain the same elements. 

278 

279 Returns 

280 ------- 

281 bool 

282 If two CategoricalIndex objects have equal elements True, 

283 otherwise False. 

284 """ 

285 if self.is_(other): 

286 return True 

287 

288 if not isinstance(other, Index): 

289 return False 

290 

291 try: 

292 other = self._is_dtype_compat(other) 

293 except (TypeError, ValueError): 

294 return False 

295 

296 return self._data.equals(other) 

297 

298 # -------------------------------------------------------------------- 

299 # Rendering Methods 

300 

301 @property 

302 def _formatter_func(self): 

303 return self.categories._formatter_func 

304 

305 def _format_attrs(self): 

306 """ 

307 Return a list of tuples of the (attr,formatted_value) 

308 """ 

309 attrs: list[tuple[str, str | int | bool | None]] 

310 

311 attrs = [ 

312 ( 

313 "categories", 

314 f"[{', '.join(self._data._repr_categories())}]", 

315 ), 

316 ("ordered", self.ordered), 

317 ] 

318 extra = super()._format_attrs() 

319 return attrs + extra 

320 

321 def _format_with_header(self, header: list[str], na_rep: str) -> list[str]: 

322 result = [ 

323 pprint_thing(x, escape_chars=("\t", "\r", "\n")) if notna(x) else na_rep 

324 for x in self._values 

325 ] 

326 return header + result 

327 

328 # -------------------------------------------------------------------- 

329 

330 @property 

331 def inferred_type(self) -> str: 

332 return "categorical" 

333 

334 @doc(Index.__contains__) 

335 def __contains__(self, key: Any) -> bool: 

336 # if key is a NaN, check if any NaN is in self. 

337 if is_valid_na_for_dtype(key, self.categories.dtype): 

338 return self.hasnans 

339 

340 return contains(self, key, container=self._engine) 

341 

342 def reindex( 

343 self, target, method=None, level=None, limit=None, tolerance=None 

344 ) -> tuple[Index, npt.NDArray[np.intp] | None]: 

345 """ 

346 Create index with target's values (move/add/delete values as necessary) 

347 

348 Returns 

349 ------- 

350 new_index : pd.Index 

351 Resulting index 

352 indexer : np.ndarray[np.intp] or None 

353 Indices of output values in original index 

354 

355 """ 

356 if method is not None: 

357 raise NotImplementedError( 

358 "argument method is not implemented for CategoricalIndex.reindex" 

359 ) 

360 if level is not None: 

361 raise NotImplementedError( 

362 "argument level is not implemented for CategoricalIndex.reindex" 

363 ) 

364 if limit is not None: 

365 raise NotImplementedError( 

366 "argument limit is not implemented for CategoricalIndex.reindex" 

367 ) 

368 return super().reindex(target) 

369 

370 # -------------------------------------------------------------------- 

371 # Indexing Methods 

372 

373 def _maybe_cast_indexer(self, key) -> int: 

374 # GH#41933: we have to do this instead of self._data._validate_scalar 

375 # because this will correctly get partial-indexing on Interval categories 

376 try: 

377 return self._data._unbox_scalar(key) 

378 except KeyError: 

379 if is_valid_na_for_dtype(key, self.categories.dtype): 

380 return -1 

381 raise 

382 

383 def _maybe_cast_listlike_indexer(self, values) -> CategoricalIndex: 

384 if isinstance(values, CategoricalIndex): 

385 values = values._data 

386 if isinstance(values, Categorical): 

387 # Indexing on codes is more efficient if categories are the same, 

388 # so we can apply some optimizations based on the degree of 

389 # dtype-matching. 

390 cat = self._data._encode_with_my_categories(values) 

391 codes = cat._codes 

392 else: 

393 codes = self.categories.get_indexer(values) 

394 codes = codes.astype(self.codes.dtype, copy=False) 

395 cat = self._data._from_backing_data(codes) 

396 return type(self)._simple_new(cat) 

397 

398 # -------------------------------------------------------------------- 

399 

400 def _is_comparable_dtype(self, dtype: DtypeObj) -> bool: 

401 return self.categories._is_comparable_dtype(dtype) 

402 

403 def map(self, mapper): 

404 """ 

405 Map values using input an input mapping or function. 

406 

407 Maps the values (their categories, not the codes) of the index to new 

408 categories. If the mapping correspondence is one-to-one the result is a 

409 :class:`~pandas.CategoricalIndex` which has the same order property as 

410 the original, otherwise an :class:`~pandas.Index` is returned. 

411 

412 If a `dict` or :class:`~pandas.Series` is used any unmapped category is 

413 mapped to `NaN`. Note that if this happens an :class:`~pandas.Index` 

414 will be returned. 

415 

416 Parameters 

417 ---------- 

418 mapper : function, dict, or Series 

419 Mapping correspondence. 

420 

421 Returns 

422 ------- 

423 pandas.CategoricalIndex or pandas.Index 

424 Mapped index. 

425 

426 See Also 

427 -------- 

428 Index.map : Apply a mapping correspondence on an 

429 :class:`~pandas.Index`. 

430 Series.map : Apply a mapping correspondence on a 

431 :class:`~pandas.Series`. 

432 Series.apply : Apply more complex functions on a 

433 :class:`~pandas.Series`. 

434 

435 Examples 

436 -------- 

437 >>> idx = pd.CategoricalIndex(['a', 'b', 'c']) 

438 >>> idx 

439 CategoricalIndex(['a', 'b', 'c'], categories=['a', 'b', 'c'], 

440 ordered=False, dtype='category') 

441 >>> idx.map(lambda x: x.upper()) 

442 CategoricalIndex(['A', 'B', 'C'], categories=['A', 'B', 'C'], 

443 ordered=False, dtype='category') 

444 >>> idx.map({'a': 'first', 'b': 'second', 'c': 'third'}) 

445 CategoricalIndex(['first', 'second', 'third'], categories=['first', 

446 'second', 'third'], ordered=False, dtype='category') 

447 

448 If the mapping is one-to-one the ordering of the categories is 

449 preserved: 

450 

451 >>> idx = pd.CategoricalIndex(['a', 'b', 'c'], ordered=True) 

452 >>> idx 

453 CategoricalIndex(['a', 'b', 'c'], categories=['a', 'b', 'c'], 

454 ordered=True, dtype='category') 

455 >>> idx.map({'a': 3, 'b': 2, 'c': 1}) 

456 CategoricalIndex([3, 2, 1], categories=[3, 2, 1], ordered=True, 

457 dtype='category') 

458 

459 If the mapping is not one-to-one an :class:`~pandas.Index` is returned: 

460 

461 >>> idx.map({'a': 'first', 'b': 'second', 'c': 'first'}) 

462 Index(['first', 'second', 'first'], dtype='object') 

463 

464 If a `dict` is used, all unmapped categories are mapped to `NaN` and 

465 the result is an :class:`~pandas.Index`: 

466 

467 >>> idx.map({'a': 'first', 'b': 'second'}) 

468 Index(['first', 'second', nan], dtype='object') 

469 """ 

470 mapped = self._values.map(mapper) 

471 return Index(mapped, name=self.name) 

472 

473 def _concat(self, to_concat: list[Index], name: Hashable) -> Index: 

474 # if calling index is category, don't check dtype of others 

475 try: 

476 cat = Categorical._concat_same_type( 

477 [self._is_dtype_compat(c) for c in to_concat] 

478 ) 

479 except TypeError: 

480 # not all to_concat elements are among our categories (or NA) 

481 from pandas.core.dtypes.concat import concat_compat 

482 

483 res = concat_compat([x._values for x in to_concat]) 

484 return Index(res, name=name) 

485 else: 

486 return type(self)._simple_new(cat, name=name)