1from __future__ import annotations
2
3import textwrap
4from typing import (
5 TYPE_CHECKING,
6 cast,
7)
8
9import numpy as np
10
11from pandas._libs import (
12 NaT,
13 lib,
14)
15from pandas.errors import InvalidIndexError
16
17from pandas.core.dtypes.cast import find_common_type
18
19from pandas.core.algorithms import safe_sort
20from pandas.core.indexes.base import (
21 Index,
22 _new_Index,
23 ensure_index,
24 ensure_index_from_sequences,
25 get_unanimous_names,
26)
27from pandas.core.indexes.category import CategoricalIndex
28from pandas.core.indexes.datetimes import DatetimeIndex
29from pandas.core.indexes.interval import IntervalIndex
30from pandas.core.indexes.multi import MultiIndex
31from pandas.core.indexes.period import PeriodIndex
32from pandas.core.indexes.range import RangeIndex
33from pandas.core.indexes.timedeltas import TimedeltaIndex
34
35if TYPE_CHECKING:
36 from pandas._typing import Axis
37_sort_msg = textwrap.dedent(
38 """\
39Sorting because non-concatenation axis is not aligned. A future version
40of pandas will change to not sort by default.
41
42To accept the future behavior, pass 'sort=False'.
43
44To retain the current behavior and silence the warning, pass 'sort=True'.
45"""
46)
47
48
49__all__ = [
50 "Index",
51 "MultiIndex",
52 "CategoricalIndex",
53 "IntervalIndex",
54 "RangeIndex",
55 "InvalidIndexError",
56 "TimedeltaIndex",
57 "PeriodIndex",
58 "DatetimeIndex",
59 "_new_Index",
60 "NaT",
61 "ensure_index",
62 "ensure_index_from_sequences",
63 "get_objs_combined_axis",
64 "union_indexes",
65 "get_unanimous_names",
66 "all_indexes_same",
67 "default_index",
68 "safe_sort_index",
69]
70
71
72def get_objs_combined_axis(
73 objs,
74 intersect: bool = False,
75 axis: Axis = 0,
76 sort: bool = True,
77 copy: bool = False,
78) -> Index:
79 """
80 Extract combined index: return intersection or union (depending on the
81 value of "intersect") of indexes on given axis, or None if all objects
82 lack indexes (e.g. they are numpy arrays).
83
84 Parameters
85 ----------
86 objs : list
87 Series or DataFrame objects, may be mix of the two.
88 intersect : bool, default False
89 If True, calculate the intersection between indexes. Otherwise,
90 calculate the union.
91 axis : {0 or 'index', 1 or 'outer'}, default 0
92 The axis to extract indexes from.
93 sort : bool, default True
94 Whether the result index should come out sorted or not.
95 copy : bool, default False
96 If True, return a copy of the combined index.
97
98 Returns
99 -------
100 Index
101 """
102 obs_idxes = [obj._get_axis(axis) for obj in objs]
103 return _get_combined_index(obs_idxes, intersect=intersect, sort=sort, copy=copy)
104
105
106def _get_distinct_objs(objs: list[Index]) -> list[Index]:
107 """
108 Return a list with distinct elements of "objs" (different ids).
109 Preserves order.
110 """
111 ids: set[int] = set()
112 res = []
113 for obj in objs:
114 if id(obj) not in ids:
115 ids.add(id(obj))
116 res.append(obj)
117 return res
118
119
120def _get_combined_index(
121 indexes: list[Index],
122 intersect: bool = False,
123 sort: bool = False,
124 copy: bool = False,
125) -> Index:
126 """
127 Return the union or intersection of indexes.
128
129 Parameters
130 ----------
131 indexes : list of Index or list objects
132 When intersect=True, do not accept list of lists.
133 intersect : bool, default False
134 If True, calculate the intersection between indexes. Otherwise,
135 calculate the union.
136 sort : bool, default False
137 Whether the result index should come out sorted or not.
138 copy : bool, default False
139 If True, return a copy of the combined index.
140
141 Returns
142 -------
143 Index
144 """
145 # TODO: handle index names!
146 indexes = _get_distinct_objs(indexes)
147 if len(indexes) == 0:
148 index = Index([])
149 elif len(indexes) == 1:
150 index = indexes[0]
151 elif intersect:
152 index = indexes[0]
153 for other in indexes[1:]:
154 index = index.intersection(other)
155 else:
156 index = union_indexes(indexes, sort=False)
157 index = ensure_index(index)
158
159 if sort:
160 index = safe_sort_index(index)
161 # GH 29879
162 if copy:
163 index = index.copy()
164
165 return index
166
167
168def safe_sort_index(index: Index) -> Index:
169 """
170 Returns the sorted index
171
172 We keep the dtypes and the name attributes.
173
174 Parameters
175 ----------
176 index : an Index
177
178 Returns
179 -------
180 Index
181 """
182 if index.is_monotonic_increasing:
183 return index
184
185 try:
186 array_sorted = safe_sort(index)
187 except TypeError:
188 pass
189 else:
190 if isinstance(array_sorted, Index):
191 return array_sorted
192
193 array_sorted = cast(np.ndarray, array_sorted)
194 if isinstance(index, MultiIndex):
195 index = MultiIndex.from_tuples(array_sorted, names=index.names)
196 else:
197 index = Index(array_sorted, name=index.name, dtype=index.dtype)
198
199 return index
200
201
202def union_indexes(indexes, sort: bool | None = True) -> Index:
203 """
204 Return the union of indexes.
205
206 The behavior of sort and names is not consistent.
207
208 Parameters
209 ----------
210 indexes : list of Index or list objects
211 sort : bool, default True
212 Whether the result index should come out sorted or not.
213
214 Returns
215 -------
216 Index
217 """
218 if len(indexes) == 0:
219 raise AssertionError("Must have at least 1 Index to union")
220 if len(indexes) == 1:
221 result = indexes[0]
222 if isinstance(result, list):
223 if not sort:
224 result = Index(result)
225 else:
226 result = Index(sorted(result))
227 return result
228
229 indexes, kind = _sanitize_and_check(indexes)
230
231 def _unique_indices(inds, dtype) -> Index:
232 """
233 Concatenate indices and remove duplicates.
234
235 Parameters
236 ----------
237 inds : list of Index or list objects
238 dtype : dtype to set for the resulting Index
239
240 Returns
241 -------
242 Index
243 """
244 if all(isinstance(ind, Index) for ind in inds):
245 inds = [ind.astype(dtype, copy=False) for ind in inds]
246 result = inds[0].unique()
247 other = inds[1].append(inds[2:])
248 diff = other[result.get_indexer_for(other) == -1]
249 if len(diff):
250 result = result.append(diff.unique())
251 if sort:
252 result = result.sort_values()
253 return result
254
255 def conv(i):
256 if isinstance(i, Index):
257 i = i.tolist()
258 return i
259
260 return Index(
261 lib.fast_unique_multiple_list([conv(i) for i in inds], sort=sort),
262 dtype=dtype,
263 )
264
265 def _find_common_index_dtype(inds):
266 """
267 Finds a common type for the indexes to pass through to resulting index.
268
269 Parameters
270 ----------
271 inds: list of Index or list objects
272
273 Returns
274 -------
275 The common type or None if no indexes were given
276 """
277 dtypes = [idx.dtype for idx in indexes if isinstance(idx, Index)]
278 if dtypes:
279 dtype = find_common_type(dtypes)
280 else:
281 dtype = None
282
283 return dtype
284
285 if kind == "special":
286 result = indexes[0]
287
288 dtis = [x for x in indexes if isinstance(x, DatetimeIndex)]
289 dti_tzs = [x for x in dtis if x.tz is not None]
290 if len(dti_tzs) not in [0, len(dtis)]:
291 # TODO: this behavior is not tested (so may not be desired),
292 # but is kept in order to keep behavior the same when
293 # deprecating union_many
294 # test_frame_from_dict_with_mixed_indexes
295 raise TypeError("Cannot join tz-naive with tz-aware DatetimeIndex")
296
297 if len(dtis) == len(indexes):
298 sort = True
299 result = indexes[0]
300
301 elif len(dtis) > 1:
302 # If we have mixed timezones, our casting behavior may depend on
303 # the order of indexes, which we don't want.
304 sort = False
305
306 # TODO: what about Categorical[dt64]?
307 # test_frame_from_dict_with_mixed_indexes
308 indexes = [x.astype(object, copy=False) for x in indexes]
309 result = indexes[0]
310
311 for other in indexes[1:]:
312 result = result.union(other, sort=None if sort else False)
313 return result
314
315 elif kind == "array":
316 dtype = _find_common_index_dtype(indexes)
317 index = indexes[0]
318 if not all(index.equals(other) for other in indexes[1:]):
319 index = _unique_indices(indexes, dtype)
320
321 name = get_unanimous_names(*indexes)[0]
322 if name != index.name:
323 index = index.rename(name)
324 return index
325 else: # kind='list'
326 dtype = _find_common_index_dtype(indexes)
327 return _unique_indices(indexes, dtype)
328
329
330def _sanitize_and_check(indexes):
331 """
332 Verify the type of indexes and convert lists to Index.
333
334 Cases:
335
336 - [list, list, ...]: Return ([list, list, ...], 'list')
337 - [list, Index, ...]: Return _sanitize_and_check([Index, Index, ...])
338 Lists are sorted and converted to Index.
339 - [Index, Index, ...]: Return ([Index, Index, ...], TYPE)
340 TYPE = 'special' if at least one special type, 'array' otherwise.
341
342 Parameters
343 ----------
344 indexes : list of Index or list objects
345
346 Returns
347 -------
348 sanitized_indexes : list of Index or list objects
349 type : {'list', 'array', 'special'}
350 """
351 kinds = list({type(index) for index in indexes})
352
353 if list in kinds:
354 if len(kinds) > 1:
355 indexes = [
356 Index(list(x)) if not isinstance(x, Index) else x for x in indexes
357 ]
358 kinds.remove(list)
359 else:
360 return indexes, "list"
361
362 if len(kinds) > 1 or Index not in kinds:
363 return indexes, "special"
364 else:
365 return indexes, "array"
366
367
368def all_indexes_same(indexes) -> bool:
369 """
370 Determine if all indexes contain the same elements.
371
372 Parameters
373 ----------
374 indexes : iterable of Index objects
375
376 Returns
377 -------
378 bool
379 True if all indexes contain the same elements, False otherwise.
380 """
381 itr = iter(indexes)
382 first = next(itr)
383 return all(first.equals(index) for index in itr)
384
385
386def default_index(n: int) -> RangeIndex:
387 rng = range(n)
388 return RangeIndex._simple_new(rng, name=None)