Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/pandas/core/indexes/api.py: 48%

1from __future__ import annotations

3import textwrap

4from typing import (

5 TYPE_CHECKING,

6 cast,

9import numpy as np

11from pandas._libs import (

12 NaT,

13 lib,

14)

15from pandas.errors import InvalidIndexError

17from pandas.core.dtypes.cast import find_common_type

19from pandas.core.algorithms import safe_sort

20from pandas.core.indexes.base import (

21 Index,

22 _new_Index,

23 ensure_index,

24 ensure_index_from_sequences,

25 get_unanimous_names,

26)

27from pandas.core.indexes.category import CategoricalIndex

28from pandas.core.indexes.datetimes import DatetimeIndex

29from pandas.core.indexes.interval import IntervalIndex

30from pandas.core.indexes.multi import MultiIndex

31from pandas.core.indexes.period import PeriodIndex

32from pandas.core.indexes.range import RangeIndex

33from pandas.core.indexes.timedeltas import TimedeltaIndex

35if TYPE_CHECKING:

36 from pandas._typing import Axis

37_sort_msg = textwrap.dedent(

38 """\

39Sorting because non-concatenation axis is not aligned. A future version

40of pandas will change to not sort by default.

42To accept the future behavior, pass 'sort=False'.

44To retain the current behavior and silence the warning, pass 'sort=True'.

45"""

46)

49__all__ = [

50 "Index",

51 "MultiIndex",

52 "CategoricalIndex",

53 "IntervalIndex",

54 "RangeIndex",

55 "InvalidIndexError",

56 "TimedeltaIndex",

57 "PeriodIndex",

58 "DatetimeIndex",

59 "_new_Index",

60 "NaT",

61 "ensure_index",

62 "ensure_index_from_sequences",

63 "get_objs_combined_axis",

64 "union_indexes",

65 "get_unanimous_names",

66 "all_indexes_same",

67 "default_index",

68 "safe_sort_index",

69]

72def get_objs_combined_axis(

73 objs,

74 intersect: bool = False,

75 axis: Axis = 0,

76 sort: bool = True,

77 copy: bool = False,

78) -> Index:

79 """

80 Extract combined index: return intersection or union (depending on the

81 value of "intersect") of indexes on given axis, or None if all objects

82 lack indexes (e.g. they are numpy arrays).

84 Parameters

85 ----------

86 objs : list

87 Series or DataFrame objects, may be mix of the two.

88 intersect : bool, default False

89 If True, calculate the intersection between indexes. Otherwise,

90 calculate the union.

91 axis : {0 or 'index', 1 or 'outer'}, default 0

92 The axis to extract indexes from.

93 sort : bool, default True

94 Whether the result index should come out sorted or not.

95 copy : bool, default False

96 If True, return a copy of the combined index.

98 Returns

99 -------

100 Index

101 """

102 obs_idxes = [obj._get_axis(axis) for obj in objs]

103 return _get_combined_index(obs_idxes, intersect=intersect, sort=sort, copy=copy)

104

105

106def _get_distinct_objs(objs: list[Index]) -> list[Index]:

107 """

108 Return a list with distinct elements of "objs" (different ids).

109 Preserves order.

110 """

111 ids: set[int] = set()

112 res = []

113 for obj in objs:

114 if id(obj) not in ids:

115 ids.add(id(obj))

116 res.append(obj)

117 return res

118

119

120def _get_combined_index(

121 indexes: list[Index],

122 intersect: bool = False,

123 sort: bool = False,

124 copy: bool = False,

125) -> Index:

126 """

127 Return the union or intersection of indexes.

128

129 Parameters

130 ----------

131 indexes : list of Index or list objects

132 When intersect=True, do not accept list of lists.

133 intersect : bool, default False

134 If True, calculate the intersection between indexes. Otherwise,

135 calculate the union.

136 sort : bool, default False

137 Whether the result index should come out sorted or not.

138 copy : bool, default False

139 If True, return a copy of the combined index.

140

141 Returns

142 -------

143 Index

144 """

145 # TODO: handle index names!

146 indexes = _get_distinct_objs(indexes)

147 if len(indexes) == 0:

148 index = Index([])

149 elif len(indexes) == 1:

150 index = indexes[0]

151 elif intersect:

152 index = indexes[0]

153 for other in indexes[1:]:

154 index = index.intersection(other)

155 else:

156 index = union_indexes(indexes, sort=False)

157 index = ensure_index(index)

158

159 if sort:

160 index = safe_sort_index(index)

161 # GH 29879

162 if copy:

163 index = index.copy()

164

165 return index

166

167

168def safe_sort_index(index: Index) -> Index:

169 """

170 Returns the sorted index

171

172 We keep the dtypes and the name attributes.

173

174 Parameters

175 ----------

176 index : an Index

177

178 Returns

179 -------

180 Index

181 """

182 if index.is_monotonic_increasing:

183 return index

184

185 try:

186 array_sorted = safe_sort(index)

187 except TypeError:

188 pass

189 else:

190 if isinstance(array_sorted, Index):

191 return array_sorted

192

193 array_sorted = cast(np.ndarray, array_sorted)

194 if isinstance(index, MultiIndex):

195 index = MultiIndex.from_tuples(array_sorted, names=index.names)

196 else:

197 index = Index(array_sorted, name=index.name, dtype=index.dtype)

198

199 return index

200

201

202def union_indexes(indexes, sort: bool | None = True) -> Index:

203 """

204 Return the union of indexes.

205

206 The behavior of sort and names is not consistent.

207

208 Parameters

209 ----------

210 indexes : list of Index or list objects

211 sort : bool, default True

212 Whether the result index should come out sorted or not.

213

214 Returns

215 -------

216 Index

217 """

218 if len(indexes) == 0:

219 raise AssertionError("Must have at least 1 Index to union")

220 if len(indexes) == 1:

221 result = indexes[0]

222 if isinstance(result, list):

223 if not sort:

224 result = Index(result)

225 else:

226 result = Index(sorted(result))

227 return result

228

229 indexes, kind = _sanitize_and_check(indexes)

230

231 def _unique_indices(inds, dtype) -> Index:

232 """

233 Concatenate indices and remove duplicates.

234

235 Parameters

236 ----------

237 inds : list of Index or list objects

238 dtype : dtype to set for the resulting Index

239

240 Returns

241 -------

242 Index

243 """

244 if all(isinstance(ind, Index) for ind in inds):

245 inds = [ind.astype(dtype, copy=False) for ind in inds]

246 result = inds[0].unique()

247 other = inds[1].append(inds[2:])

248 diff = other[result.get_indexer_for(other) == -1]

249 if len(diff):

250 result = result.append(diff.unique())

251 if sort:

252 result = result.sort_values()

253 return result

254

255 def conv(i):

256 if isinstance(i, Index):

257 i = i.tolist()

258 return i

259

260 return Index(

261 lib.fast_unique_multiple_list([conv(i) for i in inds], sort=sort),

262 dtype=dtype,

263 )

264

265 def _find_common_index_dtype(inds):

266 """

267 Finds a common type for the indexes to pass through to resulting index.

268

269 Parameters

270 ----------

271 inds: list of Index or list objects

272

273 Returns

274 -------

275 The common type or None if no indexes were given

276 """

277 dtypes = [idx.dtype for idx in indexes if isinstance(idx, Index)]

278 if dtypes:

279 dtype = find_common_type(dtypes)

280 else:

281 dtype = None

282

283 return dtype

284

285 if kind == "special":

286 result = indexes[0]

287

288 dtis = [x for x in indexes if isinstance(x, DatetimeIndex)]

289 dti_tzs = [x for x in dtis if x.tz is not None]

290 if len(dti_tzs) not in [0, len(dtis)]:

291 # TODO: this behavior is not tested (so may not be desired),

292 # but is kept in order to keep behavior the same when

293 # deprecating union_many

294 # test_frame_from_dict_with_mixed_indexes

295 raise TypeError("Cannot join tz-naive with tz-aware DatetimeIndex")

296

297 if len(dtis) == len(indexes):

298 sort = True

299 result = indexes[0]

300

301 elif len(dtis) > 1:

302 # If we have mixed timezones, our casting behavior may depend on

303 # the order of indexes, which we don't want.

304 sort = False

305

306 # TODO: what about Categorical[dt64]?

307 # test_frame_from_dict_with_mixed_indexes

308 indexes = [x.astype(object, copy=False) for x in indexes]

309 result = indexes[0]

310

311 for other in indexes[1:]:

312 result = result.union(other, sort=None if sort else False)

313 return result

314

315 elif kind == "array":

316 dtype = _find_common_index_dtype(indexes)

317 index = indexes[0]

318 if not all(index.equals(other) for other in indexes[1:]):

319 index = _unique_indices(indexes, dtype)

320

321 name = get_unanimous_names(*indexes)[0]

322 if name != index.name:

323 index = index.rename(name)

324 return index

325 else: # kind='list'

326 dtype = _find_common_index_dtype(indexes)

327 return _unique_indices(indexes, dtype)

328

329

330def _sanitize_and_check(indexes):

331 """

332 Verify the type of indexes and convert lists to Index.

333

334 Cases:

335

336 - [list, list, ...]: Return ([list, list, ...], 'list')

337 - [list, Index, ...]: Return _sanitize_and_check([Index, Index, ...])

338 Lists are sorted and converted to Index.

339 - [Index, Index, ...]: Return ([Index, Index, ...], TYPE)

340 TYPE = 'special' if at least one special type, 'array' otherwise.

341

342 Parameters

343 ----------

344 indexes : list of Index or list objects

345

346 Returns

347 -------

348 sanitized_indexes : list of Index or list objects

349 type : {'list', 'array', 'special'}

350 """

351 kinds = list({type(index) for index in indexes})

352

353 if list in kinds:

354 if len(kinds) > 1:

355 indexes = [

356 Index(list(x)) if not isinstance(x, Index) else x for x in indexes

357 ]

358 kinds.remove(list)

359 else:

360 return indexes, "list"

361

362 if len(kinds) > 1 or Index not in kinds:

363 return indexes, "special"

364 else:

365 return indexes, "array"

366

367

368def all_indexes_same(indexes) -> bool:

369 """

370 Determine if all indexes contain the same elements.

371

372 Parameters

373 ----------

374 indexes : iterable of Index objects

375

376 Returns

377 -------

378 bool

379 True if all indexes contain the same elements, False otherwise.

380 """

381 itr = iter(indexes)

382 first = next(itr)

383 return all(first.equals(index) for index in itr)

384

385

386def default_index(n: int) -> RangeIndex:

387 rng = range(n)

388 return RangeIndex._simple_new(rng, name=None)