Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pandas/core/indexes/api.py: 21%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

126 statements  

1from __future__ import annotations 

2 

3import textwrap 

4from typing import cast 

5 

6import numpy as np 

7 

8from pandas._libs import ( 

9 NaT, 

10 lib, 

11) 

12from pandas._typing import Axis 

13from pandas.errors import InvalidIndexError 

14 

15from pandas.core.dtypes.cast import find_common_type 

16 

17from pandas.core.algorithms import safe_sort 

18from pandas.core.indexes.base import ( 

19 Index, 

20 _new_Index, 

21 ensure_index, 

22 ensure_index_from_sequences, 

23 get_unanimous_names, 

24) 

25from pandas.core.indexes.category import CategoricalIndex 

26from pandas.core.indexes.datetimes import DatetimeIndex 

27from pandas.core.indexes.interval import IntervalIndex 

28from pandas.core.indexes.multi import MultiIndex 

29from pandas.core.indexes.period import PeriodIndex 

30from pandas.core.indexes.range import RangeIndex 

31from pandas.core.indexes.timedeltas import TimedeltaIndex 

32 

33_sort_msg = textwrap.dedent( 

34 """\ 

35Sorting because non-concatenation axis is not aligned. A future version 

36of pandas will change to not sort by default. 

37 

38To accept the future behavior, pass 'sort=False'. 

39 

40To retain the current behavior and silence the warning, pass 'sort=True'. 

41""" 

42) 

43 

44 

45__all__ = [ 

46 "Index", 

47 "MultiIndex", 

48 "CategoricalIndex", 

49 "IntervalIndex", 

50 "RangeIndex", 

51 "InvalidIndexError", 

52 "TimedeltaIndex", 

53 "PeriodIndex", 

54 "DatetimeIndex", 

55 "_new_Index", 

56 "NaT", 

57 "ensure_index", 

58 "ensure_index_from_sequences", 

59 "get_objs_combined_axis", 

60 "union_indexes", 

61 "get_unanimous_names", 

62 "all_indexes_same", 

63 "default_index", 

64 "safe_sort_index", 

65] 

66 

67 

68def get_objs_combined_axis( 

69 objs, intersect: bool = False, axis: Axis = 0, sort: bool = True, copy: bool = False 

70) -> Index: 

71 """ 

72 Extract combined index: return intersection or union (depending on the 

73 value of "intersect") of indexes on given axis, or None if all objects 

74 lack indexes (e.g. they are numpy arrays). 

75 

76 Parameters 

77 ---------- 

78 objs : list 

79 Series or DataFrame objects, may be mix of the two. 

80 intersect : bool, default False 

81 If True, calculate the intersection between indexes. Otherwise, 

82 calculate the union. 

83 axis : {0 or 'index', 1 or 'outer'}, default 0 

84 The axis to extract indexes from. 

85 sort : bool, default True 

86 Whether the result index should come out sorted or not. 

87 copy : bool, default False 

88 If True, return a copy of the combined index. 

89 

90 Returns 

91 ------- 

92 Index 

93 """ 

94 obs_idxes = [obj._get_axis(axis) for obj in objs] 

95 return _get_combined_index(obs_idxes, intersect=intersect, sort=sort, copy=copy) 

96 

97 

98def _get_distinct_objs(objs: list[Index]) -> list[Index]: 

99 """ 

100 Return a list with distinct elements of "objs" (different ids). 

101 Preserves order. 

102 """ 

103 ids: set[int] = set() 

104 res = [] 

105 for obj in objs: 

106 if id(obj) not in ids: 

107 ids.add(id(obj)) 

108 res.append(obj) 

109 return res 

110 

111 

112def _get_combined_index( 

113 indexes: list[Index], 

114 intersect: bool = False, 

115 sort: bool = False, 

116 copy: bool = False, 

117) -> Index: 

118 """ 

119 Return the union or intersection of indexes. 

120 

121 Parameters 

122 ---------- 

123 indexes : list of Index or list objects 

124 When intersect=True, do not accept list of lists. 

125 intersect : bool, default False 

126 If True, calculate the intersection between indexes. Otherwise, 

127 calculate the union. 

128 sort : bool, default False 

129 Whether the result index should come out sorted or not. 

130 copy : bool, default False 

131 If True, return a copy of the combined index. 

132 

133 Returns 

134 ------- 

135 Index 

136 """ 

137 # TODO: handle index names! 

138 indexes = _get_distinct_objs(indexes) 

139 if len(indexes) == 0: 

140 index = Index([]) 

141 elif len(indexes) == 1: 

142 index = indexes[0] 

143 elif intersect: 

144 index = indexes[0] 

145 for other in indexes[1:]: 

146 index = index.intersection(other) 

147 else: 

148 index = union_indexes(indexes, sort=False) 

149 index = ensure_index(index) 

150 

151 if sort: 

152 index = safe_sort_index(index) 

153 # GH 29879 

154 if copy: 

155 index = index.copy() 

156 

157 return index 

158 

159 

160def safe_sort_index(index: Index) -> Index: 

161 """ 

162 Returns the sorted index 

163 

164 We keep the dtypes and the name attributes. 

165 

166 Parameters 

167 ---------- 

168 index : an Index 

169 

170 Returns 

171 ------- 

172 Index 

173 """ 

174 if index.is_monotonic_increasing: 

175 return index 

176 

177 try: 

178 array_sorted = safe_sort(index) 

179 except TypeError: 

180 pass 

181 else: 

182 if isinstance(array_sorted, Index): 

183 return array_sorted 

184 

185 array_sorted = cast(np.ndarray, array_sorted) 

186 if isinstance(index, MultiIndex): 

187 index = MultiIndex.from_tuples(array_sorted, names=index.names) 

188 else: 

189 index = Index(array_sorted, name=index.name, dtype=index.dtype) 

190 

191 return index 

192 

193 

194def union_indexes(indexes, sort: bool | None = True) -> Index: 

195 """ 

196 Return the union of indexes. 

197 

198 The behavior of sort and names is not consistent. 

199 

200 Parameters 

201 ---------- 

202 indexes : list of Index or list objects 

203 sort : bool, default True 

204 Whether the result index should come out sorted or not. 

205 

206 Returns 

207 ------- 

208 Index 

209 """ 

210 if len(indexes) == 0: 

211 raise AssertionError("Must have at least 1 Index to union") 

212 if len(indexes) == 1: 

213 result = indexes[0] 

214 if isinstance(result, list): 

215 result = Index(sorted(result)) 

216 return result 

217 

218 indexes, kind = _sanitize_and_check(indexes) 

219 

220 def _unique_indices(inds, dtype) -> Index: 

221 """ 

222 Convert indexes to lists and concatenate them, removing duplicates. 

223 

224 The final dtype is inferred. 

225 

226 Parameters 

227 ---------- 

228 inds : list of Index or list objects 

229 dtype : dtype to set for the resulting Index 

230 

231 Returns 

232 ------- 

233 Index 

234 """ 

235 

236 def conv(i): 

237 if isinstance(i, Index): 

238 i = i.tolist() 

239 return i 

240 

241 return Index( 

242 lib.fast_unique_multiple_list([conv(i) for i in inds], sort=sort), 

243 dtype=dtype, 

244 ) 

245 

246 def _find_common_index_dtype(inds): 

247 """ 

248 Finds a common type for the indexes to pass through to resulting index. 

249 

250 Parameters 

251 ---------- 

252 inds: list of Index or list objects 

253 

254 Returns 

255 ------- 

256 The common type or None if no indexes were given 

257 """ 

258 dtypes = [idx.dtype for idx in indexes if isinstance(idx, Index)] 

259 if dtypes: 

260 dtype = find_common_type(dtypes) 

261 else: 

262 dtype = None 

263 

264 return dtype 

265 

266 if kind == "special": 

267 result = indexes[0] 

268 

269 dtis = [x for x in indexes if isinstance(x, DatetimeIndex)] 

270 dti_tzs = [x for x in dtis if x.tz is not None] 

271 if len(dti_tzs) not in [0, len(dtis)]: 

272 # TODO: this behavior is not tested (so may not be desired), 

273 # but is kept in order to keep behavior the same when 

274 # deprecating union_many 

275 # test_frame_from_dict_with_mixed_indexes 

276 raise TypeError("Cannot join tz-naive with tz-aware DatetimeIndex") 

277 

278 if len(dtis) == len(indexes): 

279 sort = True 

280 result = indexes[0] 

281 

282 elif len(dtis) > 1: 

283 # If we have mixed timezones, our casting behavior may depend on 

284 # the order of indexes, which we don't want. 

285 sort = False 

286 

287 # TODO: what about Categorical[dt64]? 

288 # test_frame_from_dict_with_mixed_indexes 

289 indexes = [x.astype(object, copy=False) for x in indexes] 

290 result = indexes[0] 

291 

292 for other in indexes[1:]: 

293 result = result.union(other, sort=None if sort else False) 

294 return result 

295 

296 elif kind == "array": 

297 dtype = _find_common_index_dtype(indexes) 

298 index = indexes[0] 

299 if not all(index.equals(other) for other in indexes[1:]): 

300 index = _unique_indices(indexes, dtype) 

301 

302 name = get_unanimous_names(*indexes)[0] 

303 if name != index.name: 

304 index = index.rename(name) 

305 return index 

306 else: # kind='list' 

307 dtype = _find_common_index_dtype(indexes) 

308 return _unique_indices(indexes, dtype) 

309 

310 

311def _sanitize_and_check(indexes): 

312 """ 

313 Verify the type of indexes and convert lists to Index. 

314 

315 Cases: 

316 

317 - [list, list, ...]: Return ([list, list, ...], 'list') 

318 - [list, Index, ...]: Return _sanitize_and_check([Index, Index, ...]) 

319 Lists are sorted and converted to Index. 

320 - [Index, Index, ...]: Return ([Index, Index, ...], TYPE) 

321 TYPE = 'special' if at least one special type, 'array' otherwise. 

322 

323 Parameters 

324 ---------- 

325 indexes : list of Index or list objects 

326 

327 Returns 

328 ------- 

329 sanitized_indexes : list of Index or list objects 

330 type : {'list', 'array', 'special'} 

331 """ 

332 kinds = list({type(index) for index in indexes}) 

333 

334 if list in kinds: 

335 if len(kinds) > 1: 

336 indexes = [ 

337 Index(list(x)) if not isinstance(x, Index) else x for x in indexes 

338 ] 

339 kinds.remove(list) 

340 else: 

341 return indexes, "list" 

342 

343 if len(kinds) > 1 or Index not in kinds: 

344 return indexes, "special" 

345 else: 

346 return indexes, "array" 

347 

348 

349def all_indexes_same(indexes) -> bool: 

350 """ 

351 Determine if all indexes contain the same elements. 

352 

353 Parameters 

354 ---------- 

355 indexes : iterable of Index objects 

356 

357 Returns 

358 ------- 

359 bool 

360 True if all indexes contain the same elements, False otherwise. 

361 """ 

362 itr = iter(indexes) 

363 first = next(itr) 

364 return all(first.equals(index) for index in itr) 

365 

366 

367def default_index(n: int) -> RangeIndex: 

368 rng = range(0, n) 

369 return RangeIndex._simple_new(rng, name=None)