Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/pandas/core/indexes/api.py: 48%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

139 statements  

1from __future__ import annotations 

2 

3import textwrap 

4from typing import ( 

5 TYPE_CHECKING, 

6 cast, 

7) 

8 

9import numpy as np 

10 

11from pandas._libs import ( 

12 NaT, 

13 lib, 

14) 

15from pandas.errors import InvalidIndexError 

16 

17from pandas.core.dtypes.cast import find_common_type 

18 

19from pandas.core.algorithms import safe_sort 

20from pandas.core.indexes.base import ( 

21 Index, 

22 _new_Index, 

23 ensure_index, 

24 ensure_index_from_sequences, 

25 get_unanimous_names, 

26) 

27from pandas.core.indexes.category import CategoricalIndex 

28from pandas.core.indexes.datetimes import DatetimeIndex 

29from pandas.core.indexes.interval import IntervalIndex 

30from pandas.core.indexes.multi import MultiIndex 

31from pandas.core.indexes.period import PeriodIndex 

32from pandas.core.indexes.range import RangeIndex 

33from pandas.core.indexes.timedeltas import TimedeltaIndex 

34 

35if TYPE_CHECKING: 

36 from pandas._typing import Axis 

37_sort_msg = textwrap.dedent( 

38 """\ 

39Sorting because non-concatenation axis is not aligned. A future version 

40of pandas will change to not sort by default. 

41 

42To accept the future behavior, pass 'sort=False'. 

43 

44To retain the current behavior and silence the warning, pass 'sort=True'. 

45""" 

46) 

47 

48 

49__all__ = [ 

50 "Index", 

51 "MultiIndex", 

52 "CategoricalIndex", 

53 "IntervalIndex", 

54 "RangeIndex", 

55 "InvalidIndexError", 

56 "TimedeltaIndex", 

57 "PeriodIndex", 

58 "DatetimeIndex", 

59 "_new_Index", 

60 "NaT", 

61 "ensure_index", 

62 "ensure_index_from_sequences", 

63 "get_objs_combined_axis", 

64 "union_indexes", 

65 "get_unanimous_names", 

66 "all_indexes_same", 

67 "default_index", 

68 "safe_sort_index", 

69] 

70 

71 

72def get_objs_combined_axis( 

73 objs, 

74 intersect: bool = False, 

75 axis: Axis = 0, 

76 sort: bool = True, 

77 copy: bool = False, 

78) -> Index: 

79 """ 

80 Extract combined index: return intersection or union (depending on the 

81 value of "intersect") of indexes on given axis, or None if all objects 

82 lack indexes (e.g. they are numpy arrays). 

83 

84 Parameters 

85 ---------- 

86 objs : list 

87 Series or DataFrame objects, may be mix of the two. 

88 intersect : bool, default False 

89 If True, calculate the intersection between indexes. Otherwise, 

90 calculate the union. 

91 axis : {0 or 'index', 1 or 'outer'}, default 0 

92 The axis to extract indexes from. 

93 sort : bool, default True 

94 Whether the result index should come out sorted or not. 

95 copy : bool, default False 

96 If True, return a copy of the combined index. 

97 

98 Returns 

99 ------- 

100 Index 

101 """ 

102 obs_idxes = [obj._get_axis(axis) for obj in objs] 

103 return _get_combined_index(obs_idxes, intersect=intersect, sort=sort, copy=copy) 

104 

105 

106def _get_distinct_objs(objs: list[Index]) -> list[Index]: 

107 """ 

108 Return a list with distinct elements of "objs" (different ids). 

109 Preserves order. 

110 """ 

111 ids: set[int] = set() 

112 res = [] 

113 for obj in objs: 

114 if id(obj) not in ids: 

115 ids.add(id(obj)) 

116 res.append(obj) 

117 return res 

118 

119 

120def _get_combined_index( 

121 indexes: list[Index], 

122 intersect: bool = False, 

123 sort: bool = False, 

124 copy: bool = False, 

125) -> Index: 

126 """ 

127 Return the union or intersection of indexes. 

128 

129 Parameters 

130 ---------- 

131 indexes : list of Index or list objects 

132 When intersect=True, do not accept list of lists. 

133 intersect : bool, default False 

134 If True, calculate the intersection between indexes. Otherwise, 

135 calculate the union. 

136 sort : bool, default False 

137 Whether the result index should come out sorted or not. 

138 copy : bool, default False 

139 If True, return a copy of the combined index. 

140 

141 Returns 

142 ------- 

143 Index 

144 """ 

145 # TODO: handle index names! 

146 indexes = _get_distinct_objs(indexes) 

147 if len(indexes) == 0: 

148 index = Index([]) 

149 elif len(indexes) == 1: 

150 index = indexes[0] 

151 elif intersect: 

152 index = indexes[0] 

153 for other in indexes[1:]: 

154 index = index.intersection(other) 

155 else: 

156 index = union_indexes(indexes, sort=False) 

157 index = ensure_index(index) 

158 

159 if sort: 

160 index = safe_sort_index(index) 

161 # GH 29879 

162 if copy: 

163 index = index.copy() 

164 

165 return index 

166 

167 

168def safe_sort_index(index: Index) -> Index: 

169 """ 

170 Returns the sorted index 

171 

172 We keep the dtypes and the name attributes. 

173 

174 Parameters 

175 ---------- 

176 index : an Index 

177 

178 Returns 

179 ------- 

180 Index 

181 """ 

182 if index.is_monotonic_increasing: 

183 return index 

184 

185 try: 

186 array_sorted = safe_sort(index) 

187 except TypeError: 

188 pass 

189 else: 

190 if isinstance(array_sorted, Index): 

191 return array_sorted 

192 

193 array_sorted = cast(np.ndarray, array_sorted) 

194 if isinstance(index, MultiIndex): 

195 index = MultiIndex.from_tuples(array_sorted, names=index.names) 

196 else: 

197 index = Index(array_sorted, name=index.name, dtype=index.dtype) 

198 

199 return index 

200 

201 

202def union_indexes(indexes, sort: bool | None = True) -> Index: 

203 """ 

204 Return the union of indexes. 

205 

206 The behavior of sort and names is not consistent. 

207 

208 Parameters 

209 ---------- 

210 indexes : list of Index or list objects 

211 sort : bool, default True 

212 Whether the result index should come out sorted or not. 

213 

214 Returns 

215 ------- 

216 Index 

217 """ 

218 if len(indexes) == 0: 

219 raise AssertionError("Must have at least 1 Index to union") 

220 if len(indexes) == 1: 

221 result = indexes[0] 

222 if isinstance(result, list): 

223 if not sort: 

224 result = Index(result) 

225 else: 

226 result = Index(sorted(result)) 

227 return result 

228 

229 indexes, kind = _sanitize_and_check(indexes) 

230 

231 def _unique_indices(inds, dtype) -> Index: 

232 """ 

233 Concatenate indices and remove duplicates. 

234 

235 Parameters 

236 ---------- 

237 inds : list of Index or list objects 

238 dtype : dtype to set for the resulting Index 

239 

240 Returns 

241 ------- 

242 Index 

243 """ 

244 if all(isinstance(ind, Index) for ind in inds): 

245 inds = [ind.astype(dtype, copy=False) for ind in inds] 

246 result = inds[0].unique() 

247 other = inds[1].append(inds[2:]) 

248 diff = other[result.get_indexer_for(other) == -1] 

249 if len(diff): 

250 result = result.append(diff.unique()) 

251 if sort: 

252 result = result.sort_values() 

253 return result 

254 

255 def conv(i): 

256 if isinstance(i, Index): 

257 i = i.tolist() 

258 return i 

259 

260 return Index( 

261 lib.fast_unique_multiple_list([conv(i) for i in inds], sort=sort), 

262 dtype=dtype, 

263 ) 

264 

265 def _find_common_index_dtype(inds): 

266 """ 

267 Finds a common type for the indexes to pass through to resulting index. 

268 

269 Parameters 

270 ---------- 

271 inds: list of Index or list objects 

272 

273 Returns 

274 ------- 

275 The common type or None if no indexes were given 

276 """ 

277 dtypes = [idx.dtype for idx in indexes if isinstance(idx, Index)] 

278 if dtypes: 

279 dtype = find_common_type(dtypes) 

280 else: 

281 dtype = None 

282 

283 return dtype 

284 

285 if kind == "special": 

286 result = indexes[0] 

287 

288 dtis = [x for x in indexes if isinstance(x, DatetimeIndex)] 

289 dti_tzs = [x for x in dtis if x.tz is not None] 

290 if len(dti_tzs) not in [0, len(dtis)]: 

291 # TODO: this behavior is not tested (so may not be desired), 

292 # but is kept in order to keep behavior the same when 

293 # deprecating union_many 

294 # test_frame_from_dict_with_mixed_indexes 

295 raise TypeError("Cannot join tz-naive with tz-aware DatetimeIndex") 

296 

297 if len(dtis) == len(indexes): 

298 sort = True 

299 result = indexes[0] 

300 

301 elif len(dtis) > 1: 

302 # If we have mixed timezones, our casting behavior may depend on 

303 # the order of indexes, which we don't want. 

304 sort = False 

305 

306 # TODO: what about Categorical[dt64]? 

307 # test_frame_from_dict_with_mixed_indexes 

308 indexes = [x.astype(object, copy=False) for x in indexes] 

309 result = indexes[0] 

310 

311 for other in indexes[1:]: 

312 result = result.union(other, sort=None if sort else False) 

313 return result 

314 

315 elif kind == "array": 

316 dtype = _find_common_index_dtype(indexes) 

317 index = indexes[0] 

318 if not all(index.equals(other) for other in indexes[1:]): 

319 index = _unique_indices(indexes, dtype) 

320 

321 name = get_unanimous_names(*indexes)[0] 

322 if name != index.name: 

323 index = index.rename(name) 

324 return index 

325 else: # kind='list' 

326 dtype = _find_common_index_dtype(indexes) 

327 return _unique_indices(indexes, dtype) 

328 

329 

330def _sanitize_and_check(indexes): 

331 """ 

332 Verify the type of indexes and convert lists to Index. 

333 

334 Cases: 

335 

336 - [list, list, ...]: Return ([list, list, ...], 'list') 

337 - [list, Index, ...]: Return _sanitize_and_check([Index, Index, ...]) 

338 Lists are sorted and converted to Index. 

339 - [Index, Index, ...]: Return ([Index, Index, ...], TYPE) 

340 TYPE = 'special' if at least one special type, 'array' otherwise. 

341 

342 Parameters 

343 ---------- 

344 indexes : list of Index or list objects 

345 

346 Returns 

347 ------- 

348 sanitized_indexes : list of Index or list objects 

349 type : {'list', 'array', 'special'} 

350 """ 

351 kinds = list({type(index) for index in indexes}) 

352 

353 if list in kinds: 

354 if len(kinds) > 1: 

355 indexes = [ 

356 Index(list(x)) if not isinstance(x, Index) else x for x in indexes 

357 ] 

358 kinds.remove(list) 

359 else: 

360 return indexes, "list" 

361 

362 if len(kinds) > 1 or Index not in kinds: 

363 return indexes, "special" 

364 else: 

365 return indexes, "array" 

366 

367 

368def all_indexes_same(indexes) -> bool: 

369 """ 

370 Determine if all indexes contain the same elements. 

371 

372 Parameters 

373 ---------- 

374 indexes : iterable of Index objects 

375 

376 Returns 

377 ------- 

378 bool 

379 True if all indexes contain the same elements, False otherwise. 

380 """ 

381 itr = iter(indexes) 

382 first = next(itr) 

383 return all(first.equals(index) for index in itr) 

384 

385 

386def default_index(n: int) -> RangeIndex: 

387 rng = range(n) 

388 return RangeIndex._simple_new(rng, name=None)