Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pandas/core/indexes/api.py: 21%

1from __future__ import annotations

3import textwrap

4from typing import cast

6import numpy as np

8from pandas._libs import (

9 NaT,

10 lib,

11)

12from pandas._typing import Axis

13from pandas.errors import InvalidIndexError

15from pandas.core.dtypes.cast import find_common_type

17from pandas.core.algorithms import safe_sort

18from pandas.core.indexes.base import (

19 Index,

20 _new_Index,

21 ensure_index,

22 ensure_index_from_sequences,

23 get_unanimous_names,

24)

25from pandas.core.indexes.category import CategoricalIndex

26from pandas.core.indexes.datetimes import DatetimeIndex

27from pandas.core.indexes.interval import IntervalIndex

28from pandas.core.indexes.multi import MultiIndex

29from pandas.core.indexes.period import PeriodIndex

30from pandas.core.indexes.range import RangeIndex

31from pandas.core.indexes.timedeltas import TimedeltaIndex

33_sort_msg = textwrap.dedent(

34 """\

35Sorting because non-concatenation axis is not aligned. A future version

36of pandas will change to not sort by default.

38To accept the future behavior, pass 'sort=False'.

40To retain the current behavior and silence the warning, pass 'sort=True'.

41"""

42)

45__all__ = [

46 "Index",

47 "MultiIndex",

48 "CategoricalIndex",

49 "IntervalIndex",

50 "RangeIndex",

51 "InvalidIndexError",

52 "TimedeltaIndex",

53 "PeriodIndex",

54 "DatetimeIndex",

55 "_new_Index",

56 "NaT",

57 "ensure_index",

58 "ensure_index_from_sequences",

59 "get_objs_combined_axis",

60 "union_indexes",

61 "get_unanimous_names",

62 "all_indexes_same",

63 "default_index",

64 "safe_sort_index",

65]

68def get_objs_combined_axis(

69 objs, intersect: bool = False, axis: Axis = 0, sort: bool = True, copy: bool = False

70) -> Index:

71 """

72 Extract combined index: return intersection or union (depending on the

73 value of "intersect") of indexes on given axis, or None if all objects

74 lack indexes (e.g. they are numpy arrays).

76 Parameters

77 ----------

78 objs : list

79 Series or DataFrame objects, may be mix of the two.

80 intersect : bool, default False

81 If True, calculate the intersection between indexes. Otherwise,

82 calculate the union.

83 axis : {0 or 'index', 1 or 'outer'}, default 0

84 The axis to extract indexes from.

85 sort : bool, default True

86 Whether the result index should come out sorted or not.

87 copy : bool, default False

88 If True, return a copy of the combined index.

90 Returns

91 -------

92 Index

93 """

94 obs_idxes = [obj._get_axis(axis) for obj in objs]

95 return _get_combined_index(obs_idxes, intersect=intersect, sort=sort, copy=copy)

98def _get_distinct_objs(objs: list[Index]) -> list[Index]:

99 """

100 Return a list with distinct elements of "objs" (different ids).

101 Preserves order.

102 """

103 ids: set[int] = set()

104 res = []

105 for obj in objs:

106 if id(obj) not in ids:

107 ids.add(id(obj))

108 res.append(obj)

109 return res

110

111

112def _get_combined_index(

113 indexes: list[Index],

114 intersect: bool = False,

115 sort: bool = False,

116 copy: bool = False,

117) -> Index:

118 """

119 Return the union or intersection of indexes.

120

121 Parameters

122 ----------

123 indexes : list of Index or list objects

124 When intersect=True, do not accept list of lists.

125 intersect : bool, default False

126 If True, calculate the intersection between indexes. Otherwise,

127 calculate the union.

128 sort : bool, default False

129 Whether the result index should come out sorted or not.

130 copy : bool, default False

131 If True, return a copy of the combined index.

132

133 Returns

134 -------

135 Index

136 """

137 # TODO: handle index names!

138 indexes = _get_distinct_objs(indexes)

139 if len(indexes) == 0:

140 index = Index([])

141 elif len(indexes) == 1:

142 index = indexes[0]

143 elif intersect:

144 index = indexes[0]

145 for other in indexes[1:]:

146 index = index.intersection(other)

147 else:

148 index = union_indexes(indexes, sort=False)

149 index = ensure_index(index)

150

151 if sort:

152 index = safe_sort_index(index)

153 # GH 29879

154 if copy:

155 index = index.copy()

156

157 return index

158

159

160def safe_sort_index(index: Index) -> Index:

161 """

162 Returns the sorted index

163

164 We keep the dtypes and the name attributes.

165

166 Parameters

167 ----------

168 index : an Index

169

170 Returns

171 -------

172 Index

173 """

174 if index.is_monotonic_increasing:

175 return index

176

177 try:

178 array_sorted = safe_sort(index)

179 except TypeError:

180 pass

181 else:

182 if isinstance(array_sorted, Index):

183 return array_sorted

184

185 array_sorted = cast(np.ndarray, array_sorted)

186 if isinstance(index, MultiIndex):

187 index = MultiIndex.from_tuples(array_sorted, names=index.names)

188 else:

189 index = Index(array_sorted, name=index.name, dtype=index.dtype)

190

191 return index

192

193

194def union_indexes(indexes, sort: bool | None = True) -> Index:

195 """

196 Return the union of indexes.

197

198 The behavior of sort and names is not consistent.

199

200 Parameters

201 ----------

202 indexes : list of Index or list objects

203 sort : bool, default True

204 Whether the result index should come out sorted or not.

205

206 Returns

207 -------

208 Index

209 """

210 if len(indexes) == 0:

211 raise AssertionError("Must have at least 1 Index to union")

212 if len(indexes) == 1:

213 result = indexes[0]

214 if isinstance(result, list):

215 result = Index(sorted(result))

216 return result

217

218 indexes, kind = _sanitize_and_check(indexes)

219

220 def _unique_indices(inds, dtype) -> Index:

221 """

222 Convert indexes to lists and concatenate them, removing duplicates.

223

224 The final dtype is inferred.

225

226 Parameters

227 ----------

228 inds : list of Index or list objects

229 dtype : dtype to set for the resulting Index

230

231 Returns

232 -------

233 Index

234 """

235

236 def conv(i):

237 if isinstance(i, Index):

238 i = i.tolist()

239 return i

240

241 return Index(

242 lib.fast_unique_multiple_list([conv(i) for i in inds], sort=sort),

243 dtype=dtype,

244 )

245

246 def _find_common_index_dtype(inds):

247 """

248 Finds a common type for the indexes to pass through to resulting index.

249

250 Parameters

251 ----------

252 inds: list of Index or list objects

253

254 Returns

255 -------

256 The common type or None if no indexes were given

257 """

258 dtypes = [idx.dtype for idx in indexes if isinstance(idx, Index)]

259 if dtypes:

260 dtype = find_common_type(dtypes)

261 else:

262 dtype = None

263

264 return dtype

265

266 if kind == "special":

267 result = indexes[0]

268

269 dtis = [x for x in indexes if isinstance(x, DatetimeIndex)]

270 dti_tzs = [x for x in dtis if x.tz is not None]

271 if len(dti_tzs) not in [0, len(dtis)]:

272 # TODO: this behavior is not tested (so may not be desired),

273 # but is kept in order to keep behavior the same when

274 # deprecating union_many

275 # test_frame_from_dict_with_mixed_indexes

276 raise TypeError("Cannot join tz-naive with tz-aware DatetimeIndex")

277

278 if len(dtis) == len(indexes):

279 sort = True

280 result = indexes[0]

281

282 elif len(dtis) > 1:

283 # If we have mixed timezones, our casting behavior may depend on

284 # the order of indexes, which we don't want.

285 sort = False

286

287 # TODO: what about Categorical[dt64]?

288 # test_frame_from_dict_with_mixed_indexes

289 indexes = [x.astype(object, copy=False) for x in indexes]

290 result = indexes[0]

291

292 for other in indexes[1:]:

293 result = result.union(other, sort=None if sort else False)

294 return result

295

296 elif kind == "array":

297 dtype = _find_common_index_dtype(indexes)

298 index = indexes[0]

299 if not all(index.equals(other) for other in indexes[1:]):

300 index = _unique_indices(indexes, dtype)

301

302 name = get_unanimous_names(*indexes)[0]

303 if name != index.name:

304 index = index.rename(name)

305 return index

306 else: # kind='list'

307 dtype = _find_common_index_dtype(indexes)

308 return _unique_indices(indexes, dtype)

309

310

311def _sanitize_and_check(indexes):

312 """

313 Verify the type of indexes and convert lists to Index.

314

315 Cases:

316

317 - [list, list, ...]: Return ([list, list, ...], 'list')

318 - [list, Index, ...]: Return _sanitize_and_check([Index, Index, ...])

319 Lists are sorted and converted to Index.

320 - [Index, Index, ...]: Return ([Index, Index, ...], TYPE)

321 TYPE = 'special' if at least one special type, 'array' otherwise.

322

323 Parameters

324 ----------

325 indexes : list of Index or list objects

326

327 Returns

328 -------

329 sanitized_indexes : list of Index or list objects

330 type : {'list', 'array', 'special'}

331 """

332 kinds = list({type(index) for index in indexes})

333

334 if list in kinds:

335 if len(kinds) > 1:

336 indexes = [

337 Index(list(x)) if not isinstance(x, Index) else x for x in indexes

338 ]

339 kinds.remove(list)

340 else:

341 return indexes, "list"

342

343 if len(kinds) > 1 or Index not in kinds:

344 return indexes, "special"

345 else:

346 return indexes, "array"

347

348

349def all_indexes_same(indexes) -> bool:

350 """

351 Determine if all indexes contain the same elements.

352

353 Parameters

354 ----------

355 indexes : iterable of Index objects

356

357 Returns

358 -------

359 bool

360 True if all indexes contain the same elements, False otherwise.

361 """

362 itr = iter(indexes)

363 first = next(itr)

364 return all(first.equals(index) for index in itr)

365

366

367def default_index(n: int) -> RangeIndex:

368 rng = range(0, n)

369 return RangeIndex._simple_new(rng, name=None)