Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/pandas/core/reshape/concat.py: 40%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

306 statements  

1""" 

2Concat routines. 

3""" 

4from __future__ import annotations 

5 

6from collections import abc 

7from typing import ( 

8 TYPE_CHECKING, 

9 Callable, 

10 Literal, 

11 cast, 

12 overload, 

13) 

14import warnings 

15 

16import numpy as np 

17 

18from pandas._config import using_copy_on_write 

19 

20from pandas.util._decorators import cache_readonly 

21from pandas.util._exceptions import find_stack_level 

22 

23from pandas.core.dtypes.common import ( 

24 is_bool, 

25 is_iterator, 

26) 

27from pandas.core.dtypes.concat import concat_compat 

28from pandas.core.dtypes.generic import ( 

29 ABCDataFrame, 

30 ABCSeries, 

31) 

32from pandas.core.dtypes.missing import isna 

33 

34from pandas.core.arrays.categorical import ( 

35 factorize_from_iterable, 

36 factorize_from_iterables, 

37) 

38import pandas.core.common as com 

39from pandas.core.indexes.api import ( 

40 Index, 

41 MultiIndex, 

42 all_indexes_same, 

43 default_index, 

44 ensure_index, 

45 get_objs_combined_axis, 

46 get_unanimous_names, 

47) 

48from pandas.core.internals import concatenate_managers 

49 

50if TYPE_CHECKING: 

51 from collections.abc import ( 

52 Hashable, 

53 Iterable, 

54 Mapping, 

55 ) 

56 

57 from pandas._typing import ( 

58 Axis, 

59 AxisInt, 

60 HashableT, 

61 ) 

62 

63 from pandas import ( 

64 DataFrame, 

65 Series, 

66 ) 

67 

68# --------------------------------------------------------------------- 

69# Concatenate DataFrame objects 

70 

71 

72@overload 

73def concat( 

74 objs: Iterable[DataFrame] | Mapping[HashableT, DataFrame], 

75 *, 

76 axis: Literal[0, "index"] = ..., 

77 join: str = ..., 

78 ignore_index: bool = ..., 

79 keys: Iterable[Hashable] | None = ..., 

80 levels=..., 

81 names: list[HashableT] | None = ..., 

82 verify_integrity: bool = ..., 

83 sort: bool = ..., 

84 copy: bool | None = ..., 

85) -> DataFrame: 

86 ... 

87 

88 

89@overload 

90def concat( 

91 objs: Iterable[Series] | Mapping[HashableT, Series], 

92 *, 

93 axis: Literal[0, "index"] = ..., 

94 join: str = ..., 

95 ignore_index: bool = ..., 

96 keys: Iterable[Hashable] | None = ..., 

97 levels=..., 

98 names: list[HashableT] | None = ..., 

99 verify_integrity: bool = ..., 

100 sort: bool = ..., 

101 copy: bool | None = ..., 

102) -> Series: 

103 ... 

104 

105 

106@overload 

107def concat( 

108 objs: Iterable[Series | DataFrame] | Mapping[HashableT, Series | DataFrame], 

109 *, 

110 axis: Literal[0, "index"] = ..., 

111 join: str = ..., 

112 ignore_index: bool = ..., 

113 keys: Iterable[Hashable] | None = ..., 

114 levels=..., 

115 names: list[HashableT] | None = ..., 

116 verify_integrity: bool = ..., 

117 sort: bool = ..., 

118 copy: bool | None = ..., 

119) -> DataFrame | Series: 

120 ... 

121 

122 

123@overload 

124def concat( 

125 objs: Iterable[Series | DataFrame] | Mapping[HashableT, Series | DataFrame], 

126 *, 

127 axis: Literal[1, "columns"], 

128 join: str = ..., 

129 ignore_index: bool = ..., 

130 keys: Iterable[Hashable] | None = ..., 

131 levels=..., 

132 names: list[HashableT] | None = ..., 

133 verify_integrity: bool = ..., 

134 sort: bool = ..., 

135 copy: bool | None = ..., 

136) -> DataFrame: 

137 ... 

138 

139 

140@overload 

141def concat( 

142 objs: Iterable[Series | DataFrame] | Mapping[HashableT, Series | DataFrame], 

143 *, 

144 axis: Axis = ..., 

145 join: str = ..., 

146 ignore_index: bool = ..., 

147 keys: Iterable[Hashable] | None = ..., 

148 levels=..., 

149 names: list[HashableT] | None = ..., 

150 verify_integrity: bool = ..., 

151 sort: bool = ..., 

152 copy: bool | None = ..., 

153) -> DataFrame | Series: 

154 ... 

155 

156 

157def concat( 

158 objs: Iterable[Series | DataFrame] | Mapping[HashableT, Series | DataFrame], 

159 *, 

160 axis: Axis = 0, 

161 join: str = "outer", 

162 ignore_index: bool = False, 

163 keys: Iterable[Hashable] | None = None, 

164 levels=None, 

165 names: list[HashableT] | None = None, 

166 verify_integrity: bool = False, 

167 sort: bool = False, 

168 copy: bool | None = None, 

169) -> DataFrame | Series: 

170 """ 

171 Concatenate pandas objects along a particular axis. 

172 

173 Allows optional set logic along the other axes. 

174 

175 Can also add a layer of hierarchical indexing on the concatenation axis, 

176 which may be useful if the labels are the same (or overlapping) on 

177 the passed axis number. 

178 

179 Parameters 

180 ---------- 

181 objs : a sequence or mapping of Series or DataFrame objects 

182 If a mapping is passed, the sorted keys will be used as the `keys` 

183 argument, unless it is passed, in which case the values will be 

184 selected (see below). Any None objects will be dropped silently unless 

185 they are all None in which case a ValueError will be raised. 

186 axis : {0/'index', 1/'columns'}, default 0 

187 The axis to concatenate along. 

188 join : {'inner', 'outer'}, default 'outer' 

189 How to handle indexes on other axis (or axes). 

190 ignore_index : bool, default False 

191 If True, do not use the index values along the concatenation axis. The 

192 resulting axis will be labeled 0, ..., n - 1. This is useful if you are 

193 concatenating objects where the concatenation axis does not have 

194 meaningful indexing information. Note the index values on the other 

195 axes are still respected in the join. 

196 keys : sequence, default None 

197 If multiple levels passed, should contain tuples. Construct 

198 hierarchical index using the passed keys as the outermost level. 

199 levels : list of sequences, default None 

200 Specific levels (unique values) to use for constructing a 

201 MultiIndex. Otherwise they will be inferred from the keys. 

202 names : list, default None 

203 Names for the levels in the resulting hierarchical index. 

204 verify_integrity : bool, default False 

205 Check whether the new concatenated axis contains duplicates. This can 

206 be very expensive relative to the actual data concatenation. 

207 sort : bool, default False 

208 Sort non-concatenation axis if it is not already aligned. One exception to 

209 this is when the non-concatentation axis is a DatetimeIndex and join='outer' 

210 and the axis is not already aligned. In that case, the non-concatenation 

211 axis is always sorted lexicographically. 

212 copy : bool, default True 

213 If False, do not copy data unnecessarily. 

214 

215 Returns 

216 ------- 

217 object, type of objs 

218 When concatenating all ``Series`` along the index (axis=0), a 

219 ``Series`` is returned. When ``objs`` contains at least one 

220 ``DataFrame``, a ``DataFrame`` is returned. When concatenating along 

221 the columns (axis=1), a ``DataFrame`` is returned. 

222 

223 See Also 

224 -------- 

225 DataFrame.join : Join DataFrames using indexes. 

226 DataFrame.merge : Merge DataFrames by indexes or columns. 

227 

228 Notes 

229 ----- 

230 The keys, levels, and names arguments are all optional. 

231 

232 A walkthrough of how this method fits in with other tools for combining 

233 pandas objects can be found `here 

234 <https://pandas.pydata.org/pandas-docs/stable/user_guide/merging.html>`__. 

235 

236 It is not recommended to build DataFrames by adding single rows in a 

237 for loop. Build a list of rows and make a DataFrame in a single concat. 

238 

239 Examples 

240 -------- 

241 Combine two ``Series``. 

242 

243 >>> s1 = pd.Series(['a', 'b']) 

244 >>> s2 = pd.Series(['c', 'd']) 

245 >>> pd.concat([s1, s2]) 

246 0 a 

247 1 b 

248 0 c 

249 1 d 

250 dtype: object 

251 

252 Clear the existing index and reset it in the result 

253 by setting the ``ignore_index`` option to ``True``. 

254 

255 >>> pd.concat([s1, s2], ignore_index=True) 

256 0 a 

257 1 b 

258 2 c 

259 3 d 

260 dtype: object 

261 

262 Add a hierarchical index at the outermost level of 

263 the data with the ``keys`` option. 

264 

265 >>> pd.concat([s1, s2], keys=['s1', 's2']) 

266 s1 0 a 

267 1 b 

268 s2 0 c 

269 1 d 

270 dtype: object 

271 

272 Label the index keys you create with the ``names`` option. 

273 

274 >>> pd.concat([s1, s2], keys=['s1', 's2'], 

275 ... names=['Series name', 'Row ID']) 

276 Series name Row ID 

277 s1 0 a 

278 1 b 

279 s2 0 c 

280 1 d 

281 dtype: object 

282 

283 Combine two ``DataFrame`` objects with identical columns. 

284 

285 >>> df1 = pd.DataFrame([['a', 1], ['b', 2]], 

286 ... columns=['letter', 'number']) 

287 >>> df1 

288 letter number 

289 0 a 1 

290 1 b 2 

291 >>> df2 = pd.DataFrame([['c', 3], ['d', 4]], 

292 ... columns=['letter', 'number']) 

293 >>> df2 

294 letter number 

295 0 c 3 

296 1 d 4 

297 >>> pd.concat([df1, df2]) 

298 letter number 

299 0 a 1 

300 1 b 2 

301 0 c 3 

302 1 d 4 

303 

304 Combine ``DataFrame`` objects with overlapping columns 

305 and return everything. Columns outside the intersection will 

306 be filled with ``NaN`` values. 

307 

308 >>> df3 = pd.DataFrame([['c', 3, 'cat'], ['d', 4, 'dog']], 

309 ... columns=['letter', 'number', 'animal']) 

310 >>> df3 

311 letter number animal 

312 0 c 3 cat 

313 1 d 4 dog 

314 >>> pd.concat([df1, df3], sort=False) 

315 letter number animal 

316 0 a 1 NaN 

317 1 b 2 NaN 

318 0 c 3 cat 

319 1 d 4 dog 

320 

321 Combine ``DataFrame`` objects with overlapping columns 

322 and return only those that are shared by passing ``inner`` to 

323 the ``join`` keyword argument. 

324 

325 >>> pd.concat([df1, df3], join="inner") 

326 letter number 

327 0 a 1 

328 1 b 2 

329 0 c 3 

330 1 d 4 

331 

332 Combine ``DataFrame`` objects horizontally along the x axis by 

333 passing in ``axis=1``. 

334 

335 >>> df4 = pd.DataFrame([['bird', 'polly'], ['monkey', 'george']], 

336 ... columns=['animal', 'name']) 

337 >>> pd.concat([df1, df4], axis=1) 

338 letter number animal name 

339 0 a 1 bird polly 

340 1 b 2 monkey george 

341 

342 Prevent the result from including duplicate index values with the 

343 ``verify_integrity`` option. 

344 

345 >>> df5 = pd.DataFrame([1], index=['a']) 

346 >>> df5 

347 0 

348 a 1 

349 >>> df6 = pd.DataFrame([2], index=['a']) 

350 >>> df6 

351 0 

352 a 2 

353 >>> pd.concat([df5, df6], verify_integrity=True) 

354 Traceback (most recent call last): 

355 ... 

356 ValueError: Indexes have overlapping values: ['a'] 

357 

358 Append a single row to the end of a ``DataFrame`` object. 

359 

360 >>> df7 = pd.DataFrame({'a': 1, 'b': 2}, index=[0]) 

361 >>> df7 

362 a b 

363 0 1 2 

364 >>> new_row = pd.Series({'a': 3, 'b': 4}) 

365 >>> new_row 

366 a 3 

367 b 4 

368 dtype: int64 

369 >>> pd.concat([df7, new_row.to_frame().T], ignore_index=True) 

370 a b 

371 0 1 2 

372 1 3 4 

373 """ 

374 if copy is None: 

375 if using_copy_on_write(): 

376 copy = False 

377 else: 

378 copy = True 

379 elif copy and using_copy_on_write(): 

380 copy = False 

381 

382 op = _Concatenator( 

383 objs, 

384 axis=axis, 

385 ignore_index=ignore_index, 

386 join=join, 

387 keys=keys, 

388 levels=levels, 

389 names=names, 

390 verify_integrity=verify_integrity, 

391 copy=copy, 

392 sort=sort, 

393 ) 

394 

395 return op.get_result() 

396 

397 

398class _Concatenator: 

399 """ 

400 Orchestrates a concatenation operation for BlockManagers 

401 """ 

402 

403 sort: bool 

404 

405 def __init__( 

406 self, 

407 objs: Iterable[Series | DataFrame] | Mapping[HashableT, Series | DataFrame], 

408 axis: Axis = 0, 

409 join: str = "outer", 

410 keys: Iterable[Hashable] | None = None, 

411 levels=None, 

412 names: list[HashableT] | None = None, 

413 ignore_index: bool = False, 

414 verify_integrity: bool = False, 

415 copy: bool = True, 

416 sort: bool = False, 

417 ) -> None: 

418 if isinstance(objs, (ABCSeries, ABCDataFrame, str)): 

419 raise TypeError( 

420 "first argument must be an iterable of pandas " 

421 f'objects, you passed an object of type "{type(objs).__name__}"' 

422 ) 

423 

424 if join == "outer": 

425 self.intersect = False 

426 elif join == "inner": 

427 self.intersect = True 

428 else: # pragma: no cover 

429 raise ValueError( 

430 "Only can inner (intersect) or outer (union) join the other axis" 

431 ) 

432 

433 if not is_bool(sort): 

434 raise ValueError( 

435 f"The 'sort' keyword only accepts boolean values; {sort} was passed." 

436 ) 

437 # Incompatible types in assignment (expression has type "Union[bool, bool_]", 

438 # variable has type "bool") 

439 self.sort = sort # type: ignore[assignment] 

440 

441 self.ignore_index = ignore_index 

442 self.verify_integrity = verify_integrity 

443 self.copy = copy 

444 

445 objs, keys = self._clean_keys_and_objs(objs, keys) 

446 

447 # figure out what our result ndim is going to be 

448 ndims = self._get_ndims(objs) 

449 sample, objs = self._get_sample_object(objs, ndims, keys, names, levels) 

450 

451 # Standardize axis parameter to int 

452 if sample.ndim == 1: 

453 from pandas import DataFrame 

454 

455 axis = DataFrame._get_axis_number(axis) 

456 self._is_frame = False 

457 self._is_series = True 

458 else: 

459 axis = sample._get_axis_number(axis) 

460 self._is_frame = True 

461 self._is_series = False 

462 

463 # Need to flip BlockManager axis in the DataFrame special case 

464 axis = sample._get_block_manager_axis(axis) 

465 

466 # if we have mixed ndims, then convert to highest ndim 

467 # creating column numbers as needed 

468 if len(ndims) > 1: 

469 objs = self._sanitize_mixed_ndim(objs, sample, ignore_index, axis) 

470 

471 self.objs = objs 

472 

473 # note: this is the BlockManager axis (since DataFrame is transposed) 

474 self.bm_axis = axis 

475 self.axis = 1 - self.bm_axis if self._is_frame else 0 

476 self.keys = keys 

477 self.names = names or getattr(keys, "names", None) 

478 self.levels = levels 

479 

480 def _get_ndims(self, objs: list[Series | DataFrame]) -> set[int]: 

481 # figure out what our result ndim is going to be 

482 ndims = set() 

483 for obj in objs: 

484 if not isinstance(obj, (ABCSeries, ABCDataFrame)): 

485 msg = ( 

486 f"cannot concatenate object of type '{type(obj)}'; " 

487 "only Series and DataFrame objs are valid" 

488 ) 

489 raise TypeError(msg) 

490 

491 ndims.add(obj.ndim) 

492 return ndims 

493 

494 def _clean_keys_and_objs( 

495 self, 

496 objs: Iterable[Series | DataFrame] | Mapping[HashableT, Series | DataFrame], 

497 keys, 

498 ) -> tuple[list[Series | DataFrame], Index | None]: 

499 if isinstance(objs, abc.Mapping): 

500 if keys is None: 

501 keys = list(objs.keys()) 

502 objs_list = [objs[k] for k in keys] 

503 else: 

504 objs_list = list(objs) 

505 

506 if len(objs_list) == 0: 

507 raise ValueError("No objects to concatenate") 

508 

509 if keys is None: 

510 objs_list = list(com.not_none(*objs_list)) 

511 else: 

512 # GH#1649 

513 clean_keys = [] 

514 clean_objs = [] 

515 if is_iterator(keys): 

516 keys = list(keys) 

517 if len(keys) != len(objs_list): 

518 # GH#43485 

519 warnings.warn( 

520 "The behavior of pd.concat with len(keys) != len(objs) is " 

521 "deprecated. In a future version this will raise instead of " 

522 "truncating to the smaller of the two sequences", 

523 FutureWarning, 

524 stacklevel=find_stack_level(), 

525 ) 

526 for k, v in zip(keys, objs_list): 

527 if v is None: 

528 continue 

529 clean_keys.append(k) 

530 clean_objs.append(v) 

531 objs_list = clean_objs 

532 

533 if isinstance(keys, MultiIndex): 

534 # TODO: retain levels? 

535 keys = type(keys).from_tuples(clean_keys, names=keys.names) 

536 else: 

537 name = getattr(keys, "name", None) 

538 keys = Index(clean_keys, name=name, dtype=getattr(keys, "dtype", None)) 

539 

540 if len(objs_list) == 0: 

541 raise ValueError("All objects passed were None") 

542 

543 return objs_list, keys 

544 

545 def _get_sample_object( 

546 self, 

547 objs: list[Series | DataFrame], 

548 ndims: set[int], 

549 keys, 

550 names, 

551 levels, 

552 ) -> tuple[Series | DataFrame, list[Series | DataFrame]]: 

553 # get the sample 

554 # want the highest ndim that we have, and must be non-empty 

555 # unless all objs are empty 

556 sample: Series | DataFrame | None = None 

557 if len(ndims) > 1: 

558 max_ndim = max(ndims) 

559 for obj in objs: 

560 if obj.ndim == max_ndim and np.sum(obj.shape): 

561 sample = obj 

562 break 

563 

564 else: 

565 # filter out the empties if we have not multi-index possibilities 

566 # note to keep empty Series as it affect to result columns / name 

567 non_empties = [obj for obj in objs if sum(obj.shape) > 0 or obj.ndim == 1] 

568 

569 if len(non_empties) and ( 

570 keys is None and names is None and levels is None and not self.intersect 

571 ): 

572 objs = non_empties 

573 sample = objs[0] 

574 

575 if sample is None: 

576 sample = objs[0] 

577 return sample, objs 

578 

579 def _sanitize_mixed_ndim( 

580 self, 

581 objs: list[Series | DataFrame], 

582 sample: Series | DataFrame, 

583 ignore_index: bool, 

584 axis: AxisInt, 

585 ) -> list[Series | DataFrame]: 

586 # if we have mixed ndims, then convert to highest ndim 

587 # creating column numbers as needed 

588 

589 new_objs = [] 

590 

591 current_column = 0 

592 max_ndim = sample.ndim 

593 for obj in objs: 

594 ndim = obj.ndim 

595 if ndim == max_ndim: 

596 pass 

597 

598 elif ndim != max_ndim - 1: 

599 raise ValueError( 

600 "cannot concatenate unaligned mixed dimensional NDFrame objects" 

601 ) 

602 

603 else: 

604 name = getattr(obj, "name", None) 

605 if ignore_index or name is None: 

606 if axis == 1: 

607 # doing a row-wise concatenation so need everything 

608 # to line up 

609 name = 0 

610 else: 

611 # doing a column-wise concatenation so need series 

612 # to have unique names 

613 name = current_column 

614 current_column += 1 

615 

616 obj = sample._constructor({name: obj}, copy=False) 

617 

618 new_objs.append(obj) 

619 

620 return new_objs 

621 

622 def get_result(self): 

623 cons: Callable[..., DataFrame | Series] 

624 sample: DataFrame | Series 

625 

626 # series only 

627 if self._is_series: 

628 sample = cast("Series", self.objs[0]) 

629 

630 # stack blocks 

631 if self.bm_axis == 0: 

632 name = com.consensus_name_attr(self.objs) 

633 cons = sample._constructor 

634 

635 arrs = [ser._values for ser in self.objs] 

636 

637 res = concat_compat(arrs, axis=0) 

638 

639 new_index: Index 

640 if self.ignore_index: 

641 # We can avoid surprisingly-expensive _get_concat_axis 

642 new_index = default_index(len(res)) 

643 else: 

644 new_index = self.new_axes[0] 

645 

646 mgr = type(sample._mgr).from_array(res, index=new_index) 

647 

648 result = sample._constructor_from_mgr(mgr, axes=mgr.axes) 

649 result._name = name 

650 return result.__finalize__(self, method="concat") 

651 

652 # combine as columns in a frame 

653 else: 

654 data = dict(zip(range(len(self.objs)), self.objs)) 

655 

656 # GH28330 Preserves subclassed objects through concat 

657 cons = sample._constructor_expanddim 

658 

659 index, columns = self.new_axes 

660 df = cons(data, index=index, copy=self.copy) 

661 df.columns = columns 

662 return df.__finalize__(self, method="concat") 

663 

664 # combine block managers 

665 else: 

666 sample = cast("DataFrame", self.objs[0]) 

667 

668 mgrs_indexers = [] 

669 for obj in self.objs: 

670 indexers = {} 

671 for ax, new_labels in enumerate(self.new_axes): 

672 # ::-1 to convert BlockManager ax to DataFrame ax 

673 if ax == self.bm_axis: 

674 # Suppress reindexing on concat axis 

675 continue 

676 

677 # 1-ax to convert BlockManager axis to DataFrame axis 

678 obj_labels = obj.axes[1 - ax] 

679 if not new_labels.equals(obj_labels): 

680 indexers[ax] = obj_labels.get_indexer(new_labels) 

681 

682 mgrs_indexers.append((obj._mgr, indexers)) 

683 

684 new_data = concatenate_managers( 

685 mgrs_indexers, self.new_axes, concat_axis=self.bm_axis, copy=self.copy 

686 ) 

687 if not self.copy and not using_copy_on_write(): 

688 new_data._consolidate_inplace() 

689 

690 out = sample._constructor_from_mgr(new_data, axes=new_data.axes) 

691 return out.__finalize__(self, method="concat") 

692 

693 def _get_result_dim(self) -> int: 

694 if self._is_series and self.bm_axis == 1: 

695 return 2 

696 else: 

697 return self.objs[0].ndim 

698 

699 @cache_readonly 

700 def new_axes(self) -> list[Index]: 

701 ndim = self._get_result_dim() 

702 return [ 

703 self._get_concat_axis if i == self.bm_axis else self._get_comb_axis(i) 

704 for i in range(ndim) 

705 ] 

706 

707 def _get_comb_axis(self, i: AxisInt) -> Index: 

708 data_axis = self.objs[0]._get_block_manager_axis(i) 

709 return get_objs_combined_axis( 

710 self.objs, 

711 axis=data_axis, 

712 intersect=self.intersect, 

713 sort=self.sort, 

714 copy=self.copy, 

715 ) 

716 

717 @cache_readonly 

718 def _get_concat_axis(self) -> Index: 

719 """ 

720 Return index to be used along concatenation axis. 

721 """ 

722 if self._is_series: 

723 if self.bm_axis == 0: 

724 indexes = [x.index for x in self.objs] 

725 elif self.ignore_index: 

726 idx = default_index(len(self.objs)) 

727 return idx 

728 elif self.keys is None: 

729 names: list[Hashable] = [None] * len(self.objs) 

730 num = 0 

731 has_names = False 

732 for i, x in enumerate(self.objs): 

733 if x.ndim != 1: 

734 raise TypeError( 

735 f"Cannot concatenate type 'Series' with " 

736 f"object of type '{type(x).__name__}'" 

737 ) 

738 if x.name is not None: 

739 names[i] = x.name 

740 has_names = True 

741 else: 

742 names[i] = num 

743 num += 1 

744 if has_names: 

745 return Index(names) 

746 else: 

747 return default_index(len(self.objs)) 

748 else: 

749 return ensure_index(self.keys).set_names(self.names) 

750 else: 

751 indexes = [x.axes[self.axis] for x in self.objs] 

752 

753 if self.ignore_index: 

754 idx = default_index(sum(len(i) for i in indexes)) 

755 return idx 

756 

757 if self.keys is None: 

758 if self.levels is not None: 

759 raise ValueError("levels supported only when keys is not None") 

760 concat_axis = _concat_indexes(indexes) 

761 else: 

762 concat_axis = _make_concat_multiindex( 

763 indexes, self.keys, self.levels, self.names 

764 ) 

765 

766 self._maybe_check_integrity(concat_axis) 

767 

768 return concat_axis 

769 

770 def _maybe_check_integrity(self, concat_index: Index): 

771 if self.verify_integrity: 

772 if not concat_index.is_unique: 

773 overlap = concat_index[concat_index.duplicated()].unique() 

774 raise ValueError(f"Indexes have overlapping values: {overlap}") 

775 

776 

777def _concat_indexes(indexes) -> Index: 

778 return indexes[0].append(indexes[1:]) 

779 

780 

781def _make_concat_multiindex(indexes, keys, levels=None, names=None) -> MultiIndex: 

782 if (levels is None and isinstance(keys[0], tuple)) or ( 

783 levels is not None and len(levels) > 1 

784 ): 

785 zipped = list(zip(*keys)) 

786 if names is None: 

787 names = [None] * len(zipped) 

788 

789 if levels is None: 

790 _, levels = factorize_from_iterables(zipped) 

791 else: 

792 levels = [ensure_index(x) for x in levels] 

793 else: 

794 zipped = [keys] 

795 if names is None: 

796 names = [None] 

797 

798 if levels is None: 

799 levels = [ensure_index(keys).unique()] 

800 else: 

801 levels = [ensure_index(x) for x in levels] 

802 

803 for level in levels: 

804 if not level.is_unique: 

805 raise ValueError(f"Level values not unique: {level.tolist()}") 

806 

807 if not all_indexes_same(indexes) or not all(level.is_unique for level in levels): 

808 codes_list = [] 

809 

810 # things are potentially different sizes, so compute the exact codes 

811 # for each level and pass those to MultiIndex.from_arrays 

812 

813 for hlevel, level in zip(zipped, levels): 

814 to_concat = [] 

815 if isinstance(hlevel, Index) and hlevel.equals(level): 

816 lens = [len(idx) for idx in indexes] 

817 codes_list.append(np.repeat(np.arange(len(hlevel)), lens)) 

818 else: 

819 for key, index in zip(hlevel, indexes): 

820 # Find matching codes, include matching nan values as equal. 

821 mask = (isna(level) & isna(key)) | (level == key) 

822 if not mask.any(): 

823 raise ValueError(f"Key {key} not in level {level}") 

824 i = np.nonzero(mask)[0][0] 

825 

826 to_concat.append(np.repeat(i, len(index))) 

827 codes_list.append(np.concatenate(to_concat)) 

828 

829 concat_index = _concat_indexes(indexes) 

830 

831 # these go at the end 

832 if isinstance(concat_index, MultiIndex): 

833 levels.extend(concat_index.levels) 

834 codes_list.extend(concat_index.codes) 

835 else: 

836 codes, categories = factorize_from_iterable(concat_index) 

837 levels.append(categories) 

838 codes_list.append(codes) 

839 

840 if len(names) == len(levels): 

841 names = list(names) 

842 else: 

843 # make sure that all of the passed indices have the same nlevels 

844 if not len({idx.nlevels for idx in indexes}) == 1: 

845 raise AssertionError( 

846 "Cannot concat indices that do not have the same number of levels" 

847 ) 

848 

849 # also copies 

850 names = list(names) + list(get_unanimous_names(*indexes)) 

851 

852 return MultiIndex( 

853 levels=levels, codes=codes_list, names=names, verify_integrity=False 

854 ) 

855 

856 new_index = indexes[0] 

857 n = len(new_index) 

858 kpieces = len(indexes) 

859 

860 # also copies 

861 new_names = list(names) 

862 new_levels = list(levels) 

863 

864 # construct codes 

865 new_codes = [] 

866 

867 # do something a bit more speedy 

868 

869 for hlevel, level in zip(zipped, levels): 

870 hlevel_index = ensure_index(hlevel) 

871 mapped = level.get_indexer(hlevel_index) 

872 

873 mask = mapped == -1 

874 if mask.any(): 

875 raise ValueError( 

876 f"Values not found in passed level: {hlevel_index[mask]!s}" 

877 ) 

878 

879 new_codes.append(np.repeat(mapped, n)) 

880 

881 if isinstance(new_index, MultiIndex): 

882 new_levels.extend(new_index.levels) 

883 new_codes.extend([np.tile(lab, kpieces) for lab in new_index.codes]) 

884 else: 

885 new_levels.append(new_index.unique()) 

886 single_codes = new_index.unique().get_indexer(new_index) 

887 new_codes.append(np.tile(single_codes, kpieces)) 

888 

889 if len(new_names) < len(new_levels): 

890 new_names.extend(new_index.names) 

891 

892 return MultiIndex( 

893 levels=new_levels, codes=new_codes, names=new_names, verify_integrity=False 

894 )