1from __future__ import annotations
2
3import textwrap
4from typing import cast
5
6import numpy as np
7
8from pandas._libs import (
9 NaT,
10 lib,
11)
12from pandas._typing import Axis
13from pandas.errors import InvalidIndexError
14
15from pandas.core.dtypes.cast import find_common_type
16
17from pandas.core.algorithms import safe_sort
18from pandas.core.indexes.base import (
19 Index,
20 _new_Index,
21 ensure_index,
22 ensure_index_from_sequences,
23 get_unanimous_names,
24)
25from pandas.core.indexes.category import CategoricalIndex
26from pandas.core.indexes.datetimes import DatetimeIndex
27from pandas.core.indexes.interval import IntervalIndex
28from pandas.core.indexes.multi import MultiIndex
29from pandas.core.indexes.period import PeriodIndex
30from pandas.core.indexes.range import RangeIndex
31from pandas.core.indexes.timedeltas import TimedeltaIndex
32
33_sort_msg = textwrap.dedent(
34 """\
35Sorting because non-concatenation axis is not aligned. A future version
36of pandas will change to not sort by default.
37
38To accept the future behavior, pass 'sort=False'.
39
40To retain the current behavior and silence the warning, pass 'sort=True'.
41"""
42)
43
44
45__all__ = [
46 "Index",
47 "MultiIndex",
48 "CategoricalIndex",
49 "IntervalIndex",
50 "RangeIndex",
51 "InvalidIndexError",
52 "TimedeltaIndex",
53 "PeriodIndex",
54 "DatetimeIndex",
55 "_new_Index",
56 "NaT",
57 "ensure_index",
58 "ensure_index_from_sequences",
59 "get_objs_combined_axis",
60 "union_indexes",
61 "get_unanimous_names",
62 "all_indexes_same",
63 "default_index",
64 "safe_sort_index",
65]
66
67
68def get_objs_combined_axis(
69 objs, intersect: bool = False, axis: Axis = 0, sort: bool = True, copy: bool = False
70) -> Index:
71 """
72 Extract combined index: return intersection or union (depending on the
73 value of "intersect") of indexes on given axis, or None if all objects
74 lack indexes (e.g. they are numpy arrays).
75
76 Parameters
77 ----------
78 objs : list
79 Series or DataFrame objects, may be mix of the two.
80 intersect : bool, default False
81 If True, calculate the intersection between indexes. Otherwise,
82 calculate the union.
83 axis : {0 or 'index', 1 or 'outer'}, default 0
84 The axis to extract indexes from.
85 sort : bool, default True
86 Whether the result index should come out sorted or not.
87 copy : bool, default False
88 If True, return a copy of the combined index.
89
90 Returns
91 -------
92 Index
93 """
94 obs_idxes = [obj._get_axis(axis) for obj in objs]
95 return _get_combined_index(obs_idxes, intersect=intersect, sort=sort, copy=copy)
96
97
98def _get_distinct_objs(objs: list[Index]) -> list[Index]:
99 """
100 Return a list with distinct elements of "objs" (different ids).
101 Preserves order.
102 """
103 ids: set[int] = set()
104 res = []
105 for obj in objs:
106 if id(obj) not in ids:
107 ids.add(id(obj))
108 res.append(obj)
109 return res
110
111
112def _get_combined_index(
113 indexes: list[Index],
114 intersect: bool = False,
115 sort: bool = False,
116 copy: bool = False,
117) -> Index:
118 """
119 Return the union or intersection of indexes.
120
121 Parameters
122 ----------
123 indexes : list of Index or list objects
124 When intersect=True, do not accept list of lists.
125 intersect : bool, default False
126 If True, calculate the intersection between indexes. Otherwise,
127 calculate the union.
128 sort : bool, default False
129 Whether the result index should come out sorted or not.
130 copy : bool, default False
131 If True, return a copy of the combined index.
132
133 Returns
134 -------
135 Index
136 """
137 # TODO: handle index names!
138 indexes = _get_distinct_objs(indexes)
139 if len(indexes) == 0:
140 index = Index([])
141 elif len(indexes) == 1:
142 index = indexes[0]
143 elif intersect:
144 index = indexes[0]
145 for other in indexes[1:]:
146 index = index.intersection(other)
147 else:
148 index = union_indexes(indexes, sort=False)
149 index = ensure_index(index)
150
151 if sort:
152 index = safe_sort_index(index)
153 # GH 29879
154 if copy:
155 index = index.copy()
156
157 return index
158
159
160def safe_sort_index(index: Index) -> Index:
161 """
162 Returns the sorted index
163
164 We keep the dtypes and the name attributes.
165
166 Parameters
167 ----------
168 index : an Index
169
170 Returns
171 -------
172 Index
173 """
174 if index.is_monotonic_increasing:
175 return index
176
177 try:
178 array_sorted = safe_sort(index)
179 except TypeError:
180 pass
181 else:
182 if isinstance(array_sorted, Index):
183 return array_sorted
184
185 array_sorted = cast(np.ndarray, array_sorted)
186 if isinstance(index, MultiIndex):
187 index = MultiIndex.from_tuples(array_sorted, names=index.names)
188 else:
189 index = Index(array_sorted, name=index.name, dtype=index.dtype)
190
191 return index
192
193
194def union_indexes(indexes, sort: bool | None = True) -> Index:
195 """
196 Return the union of indexes.
197
198 The behavior of sort and names is not consistent.
199
200 Parameters
201 ----------
202 indexes : list of Index or list objects
203 sort : bool, default True
204 Whether the result index should come out sorted or not.
205
206 Returns
207 -------
208 Index
209 """
210 if len(indexes) == 0:
211 raise AssertionError("Must have at least 1 Index to union")
212 if len(indexes) == 1:
213 result = indexes[0]
214 if isinstance(result, list):
215 result = Index(sorted(result))
216 return result
217
218 indexes, kind = _sanitize_and_check(indexes)
219
220 def _unique_indices(inds, dtype) -> Index:
221 """
222 Convert indexes to lists and concatenate them, removing duplicates.
223
224 The final dtype is inferred.
225
226 Parameters
227 ----------
228 inds : list of Index or list objects
229 dtype : dtype to set for the resulting Index
230
231 Returns
232 -------
233 Index
234 """
235
236 def conv(i):
237 if isinstance(i, Index):
238 i = i.tolist()
239 return i
240
241 return Index(
242 lib.fast_unique_multiple_list([conv(i) for i in inds], sort=sort),
243 dtype=dtype,
244 )
245
246 def _find_common_index_dtype(inds):
247 """
248 Finds a common type for the indexes to pass through to resulting index.
249
250 Parameters
251 ----------
252 inds: list of Index or list objects
253
254 Returns
255 -------
256 The common type or None if no indexes were given
257 """
258 dtypes = [idx.dtype for idx in indexes if isinstance(idx, Index)]
259 if dtypes:
260 dtype = find_common_type(dtypes)
261 else:
262 dtype = None
263
264 return dtype
265
266 if kind == "special":
267 result = indexes[0]
268
269 dtis = [x for x in indexes if isinstance(x, DatetimeIndex)]
270 dti_tzs = [x for x in dtis if x.tz is not None]
271 if len(dti_tzs) not in [0, len(dtis)]:
272 # TODO: this behavior is not tested (so may not be desired),
273 # but is kept in order to keep behavior the same when
274 # deprecating union_many
275 # test_frame_from_dict_with_mixed_indexes
276 raise TypeError("Cannot join tz-naive with tz-aware DatetimeIndex")
277
278 if len(dtis) == len(indexes):
279 sort = True
280 result = indexes[0]
281
282 elif len(dtis) > 1:
283 # If we have mixed timezones, our casting behavior may depend on
284 # the order of indexes, which we don't want.
285 sort = False
286
287 # TODO: what about Categorical[dt64]?
288 # test_frame_from_dict_with_mixed_indexes
289 indexes = [x.astype(object, copy=False) for x in indexes]
290 result = indexes[0]
291
292 for other in indexes[1:]:
293 result = result.union(other, sort=None if sort else False)
294 return result
295
296 elif kind == "array":
297 dtype = _find_common_index_dtype(indexes)
298 index = indexes[0]
299 if not all(index.equals(other) for other in indexes[1:]):
300 index = _unique_indices(indexes, dtype)
301
302 name = get_unanimous_names(*indexes)[0]
303 if name != index.name:
304 index = index.rename(name)
305 return index
306 else: # kind='list'
307 dtype = _find_common_index_dtype(indexes)
308 return _unique_indices(indexes, dtype)
309
310
311def _sanitize_and_check(indexes):
312 """
313 Verify the type of indexes and convert lists to Index.
314
315 Cases:
316
317 - [list, list, ...]: Return ([list, list, ...], 'list')
318 - [list, Index, ...]: Return _sanitize_and_check([Index, Index, ...])
319 Lists are sorted and converted to Index.
320 - [Index, Index, ...]: Return ([Index, Index, ...], TYPE)
321 TYPE = 'special' if at least one special type, 'array' otherwise.
322
323 Parameters
324 ----------
325 indexes : list of Index or list objects
326
327 Returns
328 -------
329 sanitized_indexes : list of Index or list objects
330 type : {'list', 'array', 'special'}
331 """
332 kinds = list({type(index) for index in indexes})
333
334 if list in kinds:
335 if len(kinds) > 1:
336 indexes = [
337 Index(list(x)) if not isinstance(x, Index) else x for x in indexes
338 ]
339 kinds.remove(list)
340 else:
341 return indexes, "list"
342
343 if len(kinds) > 1 or Index not in kinds:
344 return indexes, "special"
345 else:
346 return indexes, "array"
347
348
349def all_indexes_same(indexes) -> bool:
350 """
351 Determine if all indexes contain the same elements.
352
353 Parameters
354 ----------
355 indexes : iterable of Index objects
356
357 Returns
358 -------
359 bool
360 True if all indexes contain the same elements, False otherwise.
361 """
362 itr = iter(indexes)
363 first = next(itr)
364 return all(first.equals(index) for index in itr)
365
366
367def default_index(n: int) -> RangeIndex:
368 rng = range(0, n)
369 return RangeIndex._simple_new(rng, name=None)