Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pandas/core/indexes/category.py: 40%

1from __future__ import annotations

3from typing import (

4 Any,

5 Hashable,

8import numpy as np

10from pandas._libs import index as libindex

11from pandas._typing import (

12 Dtype,

13 DtypeObj,

14 npt,

15)

16from pandas.util._decorators import (

17 cache_readonly,

18 doc,

19)

21from pandas.core.dtypes.common import (

22 is_categorical_dtype,

23 is_scalar,

24)

25from pandas.core.dtypes.missing import (

26 is_valid_na_for_dtype,

27 isna,

28 notna,

29)

31from pandas.core.arrays.categorical import (

32 Categorical,

33 contains,

34)

35from pandas.core.construction import extract_array

36import pandas.core.indexes.base as ibase

37from pandas.core.indexes.base import (

38 Index,

39 maybe_extract_name,

40)

41from pandas.core.indexes.extension import (

42 NDArrayBackedExtensionIndex,

43 inherit_names,

44)

46from pandas.io.formats.printing import pprint_thing

48_index_doc_kwargs: dict[str, str] = dict(ibase._index_doc_kwargs)

49_index_doc_kwargs.update({"target_klass": "CategoricalIndex"})

52@inherit_names(

53 [

54 "argsort",

55 "tolist",

56 "codes",

57 "categories",

58 "ordered",

59 "_reverse_indexer",

60 "searchsorted",

61 "min",

62 "max",

63 ],

64 Categorical,

65)

66@inherit_names(

67 [

68 "rename_categories",

69 "reorder_categories",

70 "add_categories",

71 "remove_categories",

72 "remove_unused_categories",

73 "set_categories",

74 "as_ordered",

75 "as_unordered",

76 ],

77 Categorical,

78 wrap=True,

79)

80class CategoricalIndex(NDArrayBackedExtensionIndex):

81 """

82 Index based on an underlying :class:`Categorical`.

84 CategoricalIndex, like Categorical, can only take on a limited,

85 and usually fixed, number of possible values (`categories`). Also,

86 like Categorical, it might have an order, but numerical operations

87 (additions, divisions, ...) are not possible.

89 Parameters

90 ----------

91 data : array-like (1-dimensional)

92 The values of the categorical. If `categories` are given, values not in

93 `categories` will be replaced with NaN.

94 categories : index-like, optional

95 The categories for the categorical. Items need to be unique.

96 If the categories are not given here (and also not in `dtype`), they

97 will be inferred from the `data`.

98 ordered : bool, optional

99 Whether or not this categorical is treated as an ordered

100 categorical. If not given here or in `dtype`, the resulting

101 categorical will be unordered.

102 dtype : CategoricalDtype or "category", optional

103 If :class:`CategoricalDtype`, cannot be used together with

104 `categories` or `ordered`.

105 copy : bool, default False

106 Make a copy of input ndarray.

107 name : object, optional

108 Name to be stored in the index.

109

110 Attributes

111 ----------

112 codes

113 categories

114 ordered

115

116 Methods

117 -------

118 rename_categories

119 reorder_categories

120 add_categories

121 remove_categories

122 remove_unused_categories

123 set_categories

124 as_ordered

125 as_unordered

126 map

127

128 Raises

129 ------

130 ValueError

131 If the categories do not validate.

132 TypeError

133 If an explicit ``ordered=True`` is given but no `categories` and the

134 `values` are not sortable.

135

136 See Also

137 --------

138 Index : The base pandas Index type.

139 Categorical : A categorical array.

140 CategoricalDtype : Type for categorical data.

141

142 Notes

143 -----

144 See the `user guide

145 <https://pandas.pydata.org/pandas-docs/stable/user_guide/advanced.html#categoricalindex>`__

146 for more.

147

148 Examples

149 --------

150 >>> pd.CategoricalIndex(["a", "b", "c", "a", "b", "c"])

151 CategoricalIndex(['a', 'b', 'c', 'a', 'b', 'c'],

152 categories=['a', 'b', 'c'], ordered=False, dtype='category')

153

154 ``CategoricalIndex`` can also be instantiated from a ``Categorical``:

155

156 >>> c = pd.Categorical(["a", "b", "c", "a", "b", "c"])

157 >>> pd.CategoricalIndex(c)

158 CategoricalIndex(['a', 'b', 'c', 'a', 'b', 'c'],

159 categories=['a', 'b', 'c'], ordered=False, dtype='category')

160

161 Ordered ``CategoricalIndex`` can have a min and max value.

162

163 >>> ci = pd.CategoricalIndex(

164 ... ["a", "b", "c", "a", "b", "c"], ordered=True, categories=["c", "b", "a"]

165 ... )

166 >>> ci

167 CategoricalIndex(['a', 'b', 'c', 'a', 'b', 'c'],

168 categories=['c', 'b', 'a'], ordered=True, dtype='category')

169 >>> ci.min()

170 'c'

171 """

172

173 _typ = "categoricalindex"

174 _data_cls = Categorical

175

176 @property

177 def _can_hold_strings(self):

178 return self.categories._can_hold_strings

179

180 @cache_readonly

181 def _should_fallback_to_positional(self) -> bool:

182 return self.categories._should_fallback_to_positional

183

184 codes: np.ndarray

185 categories: Index

186 ordered: bool | None

187 _data: Categorical

188 _values: Categorical

189

190 @property

191 def _engine_type(self) -> type[libindex.IndexEngine]:

192 # self.codes can have dtype int8, int16, int32 or int64, so we need

193 # to return the corresponding engine type (libindex.Int8Engine, etc.).

194 return {

195 np.int8: libindex.Int8Engine,

196 np.int16: libindex.Int16Engine,

197 np.int32: libindex.Int32Engine,

198 np.int64: libindex.Int64Engine,

199 }[self.codes.dtype.type]

200

201 # --------------------------------------------------------------------

202 # Constructors

203

204 def __new__(

205 cls,

206 data=None,

207 categories=None,

208 ordered=None,

209 dtype: Dtype | None = None,

210 copy: bool = False,

211 name: Hashable = None,

212 ) -> CategoricalIndex:

213 name = maybe_extract_name(name, data, cls)

214

215 if is_scalar(data):

216 # GH#38944 include None here, which pre-2.0 subbed in []

217 cls._raise_scalar_data_error(data)

218

219 data = Categorical(

220 data, categories=categories, ordered=ordered, dtype=dtype, copy=copy

221 )

222

223 return cls._simple_new(data, name=name)

224

225 # --------------------------------------------------------------------

226

227 def _is_dtype_compat(self, other) -> Categorical:

228 """

229 *this is an internal non-public method*

230

231 provide a comparison between the dtype of self and other (coercing if

232 needed)

233

234 Parameters

235 ----------

236 other : Index

237

238 Returns

239 -------

240 Categorical

241

242 Raises

243 ------

244 TypeError if the dtypes are not compatible

245 """

246 if is_categorical_dtype(other):

247 other = extract_array(other)

248 if not other._categories_match_up_to_permutation(self):

249 raise TypeError(

250 "categories must match existing categories when appending"

251 )

252

253 elif other._is_multi:

254 # preempt raising NotImplementedError in isna call

255 raise TypeError("MultiIndex is not dtype-compatible with CategoricalIndex")

256 else:

257 values = other

258

259 cat = Categorical(other, dtype=self.dtype)

260 other = CategoricalIndex(cat)

261 if not other.isin(values).all():

262 raise TypeError(

263 "cannot append a non-category item to a CategoricalIndex"

264 )

265 other = other._values

266

267 if not ((other == values) | (isna(other) & isna(values))).all():

268 # GH#37667 see test_equals_non_category

269 raise TypeError(

270 "categories must match existing categories when appending"

271 )

272

273 return other

274

275 def equals(self, other: object) -> bool:

276 """

277 Determine if two CategoricalIndex objects contain the same elements.

278

279 Returns

280 -------

281 bool

282 If two CategoricalIndex objects have equal elements True,

283 otherwise False.

284 """

285 if self.is_(other):

286 return True

287

288 if not isinstance(other, Index):

289 return False

290

291 try:

292 other = self._is_dtype_compat(other)

293 except (TypeError, ValueError):

294 return False

295

296 return self._data.equals(other)

297

298 # --------------------------------------------------------------------

299 # Rendering Methods

300

301 @property

302 def _formatter_func(self):

303 return self.categories._formatter_func

304

305 def _format_attrs(self):

306 """

307 Return a list of tuples of the (attr,formatted_value)

308 """

309 attrs: list[tuple[str, str | int | bool | None]]

310

311 attrs = [

312 (

313 "categories",

314 f"[{', '.join(self._data._repr_categories())}]",

315 ),

316 ("ordered", self.ordered),

317 ]

318 extra = super()._format_attrs()

319 return attrs + extra

320

321 def _format_with_header(self, header: list[str], na_rep: str) -> list[str]:

322 result = [

323 pprint_thing(x, escape_chars=("\t", "\r", "\n")) if notna(x) else na_rep

324 for x in self._values

325 ]

326 return header + result

327

328 # --------------------------------------------------------------------

329

330 @property

331 def inferred_type(self) -> str:

332 return "categorical"

333

334 @doc(Index.__contains__)

335 def __contains__(self, key: Any) -> bool:

336 # if key is a NaN, check if any NaN is in self.

337 if is_valid_na_for_dtype(key, self.categories.dtype):

338 return self.hasnans

339

340 return contains(self, key, container=self._engine)

341

342 def reindex(

343 self, target, method=None, level=None, limit=None, tolerance=None

344 ) -> tuple[Index, npt.NDArray[np.intp] | None]:

345 """

346 Create index with target's values (move/add/delete values as necessary)

347

348 Returns

349 -------

350 new_index : pd.Index

351 Resulting index

352 indexer : np.ndarray[np.intp] or None

353 Indices of output values in original index

354

355 """

356 if method is not None:

357 raise NotImplementedError(

358 "argument method is not implemented for CategoricalIndex.reindex"

359 )

360 if level is not None:

361 raise NotImplementedError(

362 "argument level is not implemented for CategoricalIndex.reindex"

363 )

364 if limit is not None:

365 raise NotImplementedError(

366 "argument limit is not implemented for CategoricalIndex.reindex"

367 )

368 return super().reindex(target)

369

370 # --------------------------------------------------------------------

371 # Indexing Methods

372

373 def _maybe_cast_indexer(self, key) -> int:

374 # GH#41933: we have to do this instead of self._data._validate_scalar

375 # because this will correctly get partial-indexing on Interval categories

376 try:

377 return self._data._unbox_scalar(key)

378 except KeyError:

379 if is_valid_na_for_dtype(key, self.categories.dtype):

380 return -1

381 raise

382

383 def _maybe_cast_listlike_indexer(self, values) -> CategoricalIndex:

384 if isinstance(values, CategoricalIndex):

385 values = values._data

386 if isinstance(values, Categorical):

387 # Indexing on codes is more efficient if categories are the same,

388 # so we can apply some optimizations based on the degree of

389 # dtype-matching.

390 cat = self._data._encode_with_my_categories(values)

391 codes = cat._codes

392 else:

393 codes = self.categories.get_indexer(values)

394 codes = codes.astype(self.codes.dtype, copy=False)

395 cat = self._data._from_backing_data(codes)

396 return type(self)._simple_new(cat)

397

398 # --------------------------------------------------------------------

399

400 def _is_comparable_dtype(self, dtype: DtypeObj) -> bool:

401 return self.categories._is_comparable_dtype(dtype)

402

403 def map(self, mapper):

404 """

405 Map values using input an input mapping or function.

406

407 Maps the values (their categories, not the codes) of the index to new

408 categories. If the mapping correspondence is one-to-one the result is a

409 :class:`~pandas.CategoricalIndex` which has the same order property as

410 the original, otherwise an :class:`~pandas.Index` is returned.

411

412 If a `dict` or :class:`~pandas.Series` is used any unmapped category is

413 mapped to `NaN`. Note that if this happens an :class:`~pandas.Index`

414 will be returned.

415

416 Parameters

417 ----------

418 mapper : function, dict, or Series

419 Mapping correspondence.

420

421 Returns

422 -------

423 pandas.CategoricalIndex or pandas.Index

424 Mapped index.

425

426 See Also

427 --------

428 Index.map : Apply a mapping correspondence on an

429 :class:`~pandas.Index`.

430 Series.map : Apply a mapping correspondence on a

431 :class:`~pandas.Series`.

432 Series.apply : Apply more complex functions on a

433 :class:`~pandas.Series`.

434

435 Examples

436 --------

437 >>> idx = pd.CategoricalIndex(['a', 'b', 'c'])

438 >>> idx

439 CategoricalIndex(['a', 'b', 'c'], categories=['a', 'b', 'c'],

440 ordered=False, dtype='category')

441 >>> idx.map(lambda x: x.upper())

442 CategoricalIndex(['A', 'B', 'C'], categories=['A', 'B', 'C'],

443 ordered=False, dtype='category')

444 >>> idx.map({'a': 'first', 'b': 'second', 'c': 'third'})

445 CategoricalIndex(['first', 'second', 'third'], categories=['first',

446 'second', 'third'], ordered=False, dtype='category')

447

448 If the mapping is one-to-one the ordering of the categories is

449 preserved:

450

451 >>> idx = pd.CategoricalIndex(['a', 'b', 'c'], ordered=True)

452 >>> idx

453 CategoricalIndex(['a', 'b', 'c'], categories=['a', 'b', 'c'],

454 ordered=True, dtype='category')

455 >>> idx.map({'a': 3, 'b': 2, 'c': 1})

456 CategoricalIndex([3, 2, 1], categories=[3, 2, 1], ordered=True,

457 dtype='category')

458

459 If the mapping is not one-to-one an :class:`~pandas.Index` is returned:

460

461 >>> idx.map({'a': 'first', 'b': 'second', 'c': 'first'})

462 Index(['first', 'second', 'first'], dtype='object')

463

464 If a `dict` is used, all unmapped categories are mapped to `NaN` and

465 the result is an :class:`~pandas.Index`:

466

467 >>> idx.map({'a': 'first', 'b': 'second'})

468 Index(['first', 'second', nan], dtype='object')

469 """

470 mapped = self._values.map(mapper)

471 return Index(mapped, name=self.name)

472

473 def _concat(self, to_concat: list[Index], name: Hashable) -> Index:

474 # if calling index is category, don't check dtype of others

475 try:

476 cat = Categorical._concat_same_type(

477 [self._is_dtype_compat(c) for c in to_concat]

478 )

479 except TypeError:

480 # not all to_concat elements are among our categories (or NA)

481 from pandas.core.dtypes.concat import concat_compat

482

483 res = concat_compat([x._values for x in to_concat])

484 return Index(res, name=name)

485 else:

486 return type(self)._simple_new(cat, name=name)