Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pandas/core/reshape/concat.py: 13%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

284 statements  

1""" 

2Concat routines. 

3""" 

4from __future__ import annotations 

5 

6from collections import abc 

7from typing import ( 

8 TYPE_CHECKING, 

9 Callable, 

10 Hashable, 

11 Iterable, 

12 Literal, 

13 Mapping, 

14 cast, 

15 overload, 

16) 

17 

18import numpy as np 

19 

20from pandas._config import using_copy_on_write 

21 

22from pandas._typing import ( 

23 Axis, 

24 AxisInt, 

25 HashableT, 

26) 

27from pandas.util._decorators import cache_readonly 

28 

29from pandas.core.dtypes.concat import concat_compat 

30from pandas.core.dtypes.generic import ( 

31 ABCDataFrame, 

32 ABCSeries, 

33) 

34from pandas.core.dtypes.inference import is_bool 

35from pandas.core.dtypes.missing import isna 

36 

37from pandas.core.arrays.categorical import ( 

38 factorize_from_iterable, 

39 factorize_from_iterables, 

40) 

41import pandas.core.common as com 

42from pandas.core.indexes.api import ( 

43 Index, 

44 MultiIndex, 

45 all_indexes_same, 

46 default_index, 

47 ensure_index, 

48 get_objs_combined_axis, 

49 get_unanimous_names, 

50) 

51from pandas.core.internals import concatenate_managers 

52 

53if TYPE_CHECKING: 

54 from pandas import ( 

55 DataFrame, 

56 Series, 

57 ) 

58 from pandas.core.generic import NDFrame 

59 

60# --------------------------------------------------------------------- 

61# Concatenate DataFrame objects 

62 

63 

64@overload 

65def concat( 

66 objs: Iterable[DataFrame] | Mapping[HashableT, DataFrame], 

67 *, 

68 axis: Literal[0, "index"] = ..., 

69 join: str = ..., 

70 ignore_index: bool = ..., 

71 keys=..., 

72 levels=..., 

73 names=..., 

74 verify_integrity: bool = ..., 

75 sort: bool = ..., 

76 copy: bool | None = ..., 

77) -> DataFrame: 

78 ... 

79 

80 

81@overload 

82def concat( 

83 objs: Iterable[Series] | Mapping[HashableT, Series], 

84 *, 

85 axis: Literal[0, "index"] = ..., 

86 join: str = ..., 

87 ignore_index: bool = ..., 

88 keys=..., 

89 levels=..., 

90 names=..., 

91 verify_integrity: bool = ..., 

92 sort: bool = ..., 

93 copy: bool | None = ..., 

94) -> Series: 

95 ... 

96 

97 

98@overload 

99def concat( 

100 objs: Iterable[NDFrame] | Mapping[HashableT, NDFrame], 

101 *, 

102 axis: Literal[0, "index"] = ..., 

103 join: str = ..., 

104 ignore_index: bool = ..., 

105 keys=..., 

106 levels=..., 

107 names=..., 

108 verify_integrity: bool = ..., 

109 sort: bool = ..., 

110 copy: bool | None = ..., 

111) -> DataFrame | Series: 

112 ... 

113 

114 

115@overload 

116def concat( 

117 objs: Iterable[NDFrame] | Mapping[HashableT, NDFrame], 

118 *, 

119 axis: Literal[1, "columns"], 

120 join: str = ..., 

121 ignore_index: bool = ..., 

122 keys=..., 

123 levels=..., 

124 names=..., 

125 verify_integrity: bool = ..., 

126 sort: bool = ..., 

127 copy: bool | None = ..., 

128) -> DataFrame: 

129 ... 

130 

131 

132@overload 

133def concat( 

134 objs: Iterable[NDFrame] | Mapping[HashableT, NDFrame], 

135 *, 

136 axis: Axis = ..., 

137 join: str = ..., 

138 ignore_index: bool = ..., 

139 keys=..., 

140 levels=..., 

141 names=..., 

142 verify_integrity: bool = ..., 

143 sort: bool = ..., 

144 copy: bool | None = ..., 

145) -> DataFrame | Series: 

146 ... 

147 

148 

149def concat( 

150 objs: Iterable[NDFrame] | Mapping[HashableT, NDFrame], 

151 *, 

152 axis: Axis = 0, 

153 join: str = "outer", 

154 ignore_index: bool = False, 

155 keys=None, 

156 levels=None, 

157 names=None, 

158 verify_integrity: bool = False, 

159 sort: bool = False, 

160 copy: bool | None = None, 

161) -> DataFrame | Series: 

162 """ 

163 Concatenate pandas objects along a particular axis. 

164 

165 Allows optional set logic along the other axes. 

166 

167 Can also add a layer of hierarchical indexing on the concatenation axis, 

168 which may be useful if the labels are the same (or overlapping) on 

169 the passed axis number. 

170 

171 Parameters 

172 ---------- 

173 objs : a sequence or mapping of Series or DataFrame objects 

174 If a mapping is passed, the sorted keys will be used as the `keys` 

175 argument, unless it is passed, in which case the values will be 

176 selected (see below). Any None objects will be dropped silently unless 

177 they are all None in which case a ValueError will be raised. 

178 axis : {0/'index', 1/'columns'}, default 0 

179 The axis to concatenate along. 

180 join : {'inner', 'outer'}, default 'outer' 

181 How to handle indexes on other axis (or axes). 

182 ignore_index : bool, default False 

183 If True, do not use the index values along the concatenation axis. The 

184 resulting axis will be labeled 0, ..., n - 1. This is useful if you are 

185 concatenating objects where the concatenation axis does not have 

186 meaningful indexing information. Note the index values on the other 

187 axes are still respected in the join. 

188 keys : sequence, default None 

189 If multiple levels passed, should contain tuples. Construct 

190 hierarchical index using the passed keys as the outermost level. 

191 levels : list of sequences, default None 

192 Specific levels (unique values) to use for constructing a 

193 MultiIndex. Otherwise they will be inferred from the keys. 

194 names : list, default None 

195 Names for the levels in the resulting hierarchical index. 

196 verify_integrity : bool, default False 

197 Check whether the new concatenated axis contains duplicates. This can 

198 be very expensive relative to the actual data concatenation. 

199 sort : bool, default False 

200 Sort non-concatenation axis if it is not already aligned. 

201 

202 copy : bool, default True 

203 If False, do not copy data unnecessarily. 

204 

205 Returns 

206 ------- 

207 object, type of objs 

208 When concatenating all ``Series`` along the index (axis=0), a 

209 ``Series`` is returned. When ``objs`` contains at least one 

210 ``DataFrame``, a ``DataFrame`` is returned. When concatenating along 

211 the columns (axis=1), a ``DataFrame`` is returned. 

212 

213 See Also 

214 -------- 

215 DataFrame.join : Join DataFrames using indexes. 

216 DataFrame.merge : Merge DataFrames by indexes or columns. 

217 

218 Notes 

219 ----- 

220 The keys, levels, and names arguments are all optional. 

221 

222 A walkthrough of how this method fits in with other tools for combining 

223 pandas objects can be found `here 

224 <https://pandas.pydata.org/pandas-docs/stable/user_guide/merging.html>`__. 

225 

226 It is not recommended to build DataFrames by adding single rows in a 

227 for loop. Build a list of rows and make a DataFrame in a single concat. 

228 

229 Examples 

230 -------- 

231 Combine two ``Series``. 

232 

233 >>> s1 = pd.Series(['a', 'b']) 

234 >>> s2 = pd.Series(['c', 'd']) 

235 >>> pd.concat([s1, s2]) 

236 0 a 

237 1 b 

238 0 c 

239 1 d 

240 dtype: object 

241 

242 Clear the existing index and reset it in the result 

243 by setting the ``ignore_index`` option to ``True``. 

244 

245 >>> pd.concat([s1, s2], ignore_index=True) 

246 0 a 

247 1 b 

248 2 c 

249 3 d 

250 dtype: object 

251 

252 Add a hierarchical index at the outermost level of 

253 the data with the ``keys`` option. 

254 

255 >>> pd.concat([s1, s2], keys=['s1', 's2']) 

256 s1 0 a 

257 1 b 

258 s2 0 c 

259 1 d 

260 dtype: object 

261 

262 Label the index keys you create with the ``names`` option. 

263 

264 >>> pd.concat([s1, s2], keys=['s1', 's2'], 

265 ... names=['Series name', 'Row ID']) 

266 Series name Row ID 

267 s1 0 a 

268 1 b 

269 s2 0 c 

270 1 d 

271 dtype: object 

272 

273 Combine two ``DataFrame`` objects with identical columns. 

274 

275 >>> df1 = pd.DataFrame([['a', 1], ['b', 2]], 

276 ... columns=['letter', 'number']) 

277 >>> df1 

278 letter number 

279 0 a 1 

280 1 b 2 

281 >>> df2 = pd.DataFrame([['c', 3], ['d', 4]], 

282 ... columns=['letter', 'number']) 

283 >>> df2 

284 letter number 

285 0 c 3 

286 1 d 4 

287 >>> pd.concat([df1, df2]) 

288 letter number 

289 0 a 1 

290 1 b 2 

291 0 c 3 

292 1 d 4 

293 

294 Combine ``DataFrame`` objects with overlapping columns 

295 and return everything. Columns outside the intersection will 

296 be filled with ``NaN`` values. 

297 

298 >>> df3 = pd.DataFrame([['c', 3, 'cat'], ['d', 4, 'dog']], 

299 ... columns=['letter', 'number', 'animal']) 

300 >>> df3 

301 letter number animal 

302 0 c 3 cat 

303 1 d 4 dog 

304 >>> pd.concat([df1, df3], sort=False) 

305 letter number animal 

306 0 a 1 NaN 

307 1 b 2 NaN 

308 0 c 3 cat 

309 1 d 4 dog 

310 

311 Combine ``DataFrame`` objects with overlapping columns 

312 and return only those that are shared by passing ``inner`` to 

313 the ``join`` keyword argument. 

314 

315 >>> pd.concat([df1, df3], join="inner") 

316 letter number 

317 0 a 1 

318 1 b 2 

319 0 c 3 

320 1 d 4 

321 

322 Combine ``DataFrame`` objects horizontally along the x axis by 

323 passing in ``axis=1``. 

324 

325 >>> df4 = pd.DataFrame([['bird', 'polly'], ['monkey', 'george']], 

326 ... columns=['animal', 'name']) 

327 >>> pd.concat([df1, df4], axis=1) 

328 letter number animal name 

329 0 a 1 bird polly 

330 1 b 2 monkey george 

331 

332 Prevent the result from including duplicate index values with the 

333 ``verify_integrity`` option. 

334 

335 >>> df5 = pd.DataFrame([1], index=['a']) 

336 >>> df5 

337 0 

338 a 1 

339 >>> df6 = pd.DataFrame([2], index=['a']) 

340 >>> df6 

341 0 

342 a 2 

343 >>> pd.concat([df5, df6], verify_integrity=True) 

344 Traceback (most recent call last): 

345 ... 

346 ValueError: Indexes have overlapping values: ['a'] 

347 

348 Append a single row to the end of a ``DataFrame`` object. 

349 

350 >>> df7 = pd.DataFrame({'a': 1, 'b': 2}, index=[0]) 

351 >>> df7 

352 a b 

353 0 1 2 

354 >>> new_row = pd.Series({'a': 3, 'b': 4}) 

355 >>> new_row 

356 a 3 

357 b 4 

358 dtype: int64 

359 >>> pd.concat([df7, new_row.to_frame().T], ignore_index=True) 

360 a b 

361 0 1 2 

362 1 3 4 

363 """ 

364 if copy is None: 

365 if using_copy_on_write(): 

366 copy = False 

367 else: 

368 copy = True 

369 elif copy and using_copy_on_write(): 

370 copy = False 

371 

372 op = _Concatenator( 

373 objs, 

374 axis=axis, 

375 ignore_index=ignore_index, 

376 join=join, 

377 keys=keys, 

378 levels=levels, 

379 names=names, 

380 verify_integrity=verify_integrity, 

381 copy=copy, 

382 sort=sort, 

383 ) 

384 

385 return op.get_result() 

386 

387 

388class _Concatenator: 

389 """ 

390 Orchestrates a concatenation operation for BlockManagers 

391 """ 

392 

393 def __init__( 

394 self, 

395 objs: Iterable[NDFrame] | Mapping[HashableT, NDFrame], 

396 axis: Axis = 0, 

397 join: str = "outer", 

398 keys=None, 

399 levels=None, 

400 names=None, 

401 ignore_index: bool = False, 

402 verify_integrity: bool = False, 

403 copy: bool = True, 

404 sort: bool = False, 

405 ) -> None: 

406 if isinstance(objs, (ABCSeries, ABCDataFrame, str)): 

407 raise TypeError( 

408 "first argument must be an iterable of pandas " 

409 f'objects, you passed an object of type "{type(objs).__name__}"' 

410 ) 

411 

412 if join == "outer": 

413 self.intersect = False 

414 elif join == "inner": 

415 self.intersect = True 

416 else: # pragma: no cover 

417 raise ValueError( 

418 "Only can inner (intersect) or outer (union) join the other axis" 

419 ) 

420 

421 if isinstance(objs, abc.Mapping): 

422 if keys is None: 

423 keys = list(objs.keys()) 

424 objs = [objs[k] for k in keys] 

425 else: 

426 objs = list(objs) 

427 

428 if len(objs) == 0: 

429 raise ValueError("No objects to concatenate") 

430 

431 if keys is None: 

432 objs = list(com.not_none(*objs)) 

433 else: 

434 # #1649 

435 clean_keys = [] 

436 clean_objs = [] 

437 for k, v in zip(keys, objs): 

438 if v is None: 

439 continue 

440 clean_keys.append(k) 

441 clean_objs.append(v) 

442 objs = clean_objs 

443 

444 if isinstance(keys, MultiIndex): 

445 # TODO: retain levels? 

446 keys = type(keys).from_tuples(clean_keys, names=keys.names) 

447 else: 

448 name = getattr(keys, "name", None) 

449 keys = Index(clean_keys, name=name, dtype=getattr(keys, "dtype", None)) 

450 

451 if len(objs) == 0: 

452 raise ValueError("All objects passed were None") 

453 

454 # figure out what our result ndim is going to be 

455 ndims = set() 

456 for obj in objs: 

457 if not isinstance(obj, (ABCSeries, ABCDataFrame)): 

458 msg = ( 

459 f"cannot concatenate object of type '{type(obj)}'; " 

460 "only Series and DataFrame objs are valid" 

461 ) 

462 raise TypeError(msg) 

463 

464 ndims.add(obj.ndim) 

465 

466 # get the sample 

467 # want the highest ndim that we have, and must be non-empty 

468 # unless all objs are empty 

469 sample: NDFrame | None = None 

470 if len(ndims) > 1: 

471 max_ndim = max(ndims) 

472 for obj in objs: 

473 if obj.ndim == max_ndim and np.sum(obj.shape): 

474 sample = obj 

475 break 

476 

477 else: 

478 # filter out the empties if we have not multi-index possibilities 

479 # note to keep empty Series as it affect to result columns / name 

480 non_empties = [ 

481 obj for obj in objs if sum(obj.shape) > 0 or isinstance(obj, ABCSeries) 

482 ] 

483 

484 if len(non_empties) and ( 

485 keys is None and names is None and levels is None and not self.intersect 

486 ): 

487 objs = non_empties 

488 sample = objs[0] 

489 

490 if sample is None: 

491 sample = objs[0] 

492 self.objs = objs 

493 

494 # Standardize axis parameter to int 

495 if isinstance(sample, ABCSeries): 

496 from pandas import DataFrame 

497 

498 axis = DataFrame._get_axis_number(axis) 

499 else: 

500 axis = sample._get_axis_number(axis) 

501 

502 # Need to flip BlockManager axis in the DataFrame special case 

503 self._is_frame = isinstance(sample, ABCDataFrame) 

504 if self._is_frame: 

505 axis = sample._get_block_manager_axis(axis) 

506 

507 self._is_series = isinstance(sample, ABCSeries) 

508 if not 0 <= axis <= sample.ndim: 

509 raise AssertionError( 

510 f"axis must be between 0 and {sample.ndim}, input was {axis}" 

511 ) 

512 

513 # if we have mixed ndims, then convert to highest ndim 

514 # creating column numbers as needed 

515 if len(ndims) > 1: 

516 current_column = 0 

517 max_ndim = sample.ndim 

518 self.objs, objs = [], self.objs 

519 for obj in objs: 

520 ndim = obj.ndim 

521 if ndim == max_ndim: 

522 pass 

523 

524 elif ndim != max_ndim - 1: 

525 raise ValueError( 

526 "cannot concatenate unaligned mixed " 

527 "dimensional NDFrame objects" 

528 ) 

529 

530 else: 

531 name = getattr(obj, "name", None) 

532 if ignore_index or name is None: 

533 name = current_column 

534 current_column += 1 

535 

536 # doing a row-wise concatenation so need everything 

537 # to line up 

538 if self._is_frame and axis == 1: 

539 name = 0 

540 # mypy needs to know sample is not an NDFrame 

541 sample = cast("DataFrame | Series", sample) 

542 obj = sample._constructor({name: obj}, copy=False) 

543 

544 self.objs.append(obj) 

545 

546 # note: this is the BlockManager axis (since DataFrame is transposed) 

547 self.bm_axis = axis 

548 self.axis = 1 - self.bm_axis if self._is_frame else 0 

549 self.keys = keys 

550 self.names = names or getattr(keys, "names", None) 

551 self.levels = levels 

552 

553 if not is_bool(sort): 

554 raise ValueError( 

555 f"The 'sort' keyword only accepts boolean values; {sort} was passed." 

556 ) 

557 self.sort = sort 

558 

559 self.ignore_index = ignore_index 

560 self.verify_integrity = verify_integrity 

561 self.copy = copy 

562 

563 self.new_axes = self._get_new_axes() 

564 

565 def get_result(self): 

566 cons: Callable[..., DataFrame | Series] 

567 sample: DataFrame | Series 

568 

569 # series only 

570 if self._is_series: 

571 sample = cast("Series", self.objs[0]) 

572 

573 # stack blocks 

574 if self.bm_axis == 0: 

575 name = com.consensus_name_attr(self.objs) 

576 cons = sample._constructor 

577 

578 arrs = [ser._values for ser in self.objs] 

579 

580 res = concat_compat(arrs, axis=0) 

581 result = cons(res, index=self.new_axes[0], name=name, dtype=res.dtype) 

582 return result.__finalize__(self, method="concat") 

583 

584 # combine as columns in a frame 

585 else: 

586 data = dict(zip(range(len(self.objs)), self.objs)) 

587 

588 # GH28330 Preserves subclassed objects through concat 

589 cons = sample._constructor_expanddim 

590 

591 index, columns = self.new_axes 

592 df = cons(data, index=index, copy=self.copy) 

593 df.columns = columns 

594 return df.__finalize__(self, method="concat") 

595 

596 # combine block managers 

597 else: 

598 sample = cast("DataFrame", self.objs[0]) 

599 

600 mgrs_indexers = [] 

601 for obj in self.objs: 

602 indexers = {} 

603 for ax, new_labels in enumerate(self.new_axes): 

604 # ::-1 to convert BlockManager ax to DataFrame ax 

605 if ax == self.bm_axis: 

606 # Suppress reindexing on concat axis 

607 continue 

608 

609 # 1-ax to convert BlockManager axis to DataFrame axis 

610 obj_labels = obj.axes[1 - ax] 

611 if not new_labels.equals(obj_labels): 

612 indexers[ax] = obj_labels.get_indexer(new_labels) 

613 

614 mgrs_indexers.append((obj._mgr, indexers)) 

615 

616 new_data = concatenate_managers( 

617 mgrs_indexers, self.new_axes, concat_axis=self.bm_axis, copy=self.copy 

618 ) 

619 if not self.copy and not using_copy_on_write(): 

620 new_data._consolidate_inplace() 

621 

622 cons = sample._constructor 

623 return cons(new_data).__finalize__(self, method="concat") 

624 

625 def _get_result_dim(self) -> int: 

626 if self._is_series and self.bm_axis == 1: 

627 return 2 

628 else: 

629 return self.objs[0].ndim 

630 

631 def _get_new_axes(self) -> list[Index]: 

632 ndim = self._get_result_dim() 

633 return [ 

634 self._get_concat_axis if i == self.bm_axis else self._get_comb_axis(i) 

635 for i in range(ndim) 

636 ] 

637 

638 def _get_comb_axis(self, i: AxisInt) -> Index: 

639 data_axis = self.objs[0]._get_block_manager_axis(i) 

640 return get_objs_combined_axis( 

641 self.objs, 

642 axis=data_axis, 

643 intersect=self.intersect, 

644 sort=self.sort, 

645 copy=self.copy, 

646 ) 

647 

648 @cache_readonly 

649 def _get_concat_axis(self) -> Index: 

650 """ 

651 Return index to be used along concatenation axis. 

652 """ 

653 if self._is_series: 

654 if self.bm_axis == 0: 

655 indexes = [x.index for x in self.objs] 

656 elif self.ignore_index: 

657 idx = default_index(len(self.objs)) 

658 return idx 

659 elif self.keys is None: 

660 names: list[Hashable] = [None] * len(self.objs) 

661 num = 0 

662 has_names = False 

663 for i, x in enumerate(self.objs): 

664 if not isinstance(x, ABCSeries): 

665 raise TypeError( 

666 f"Cannot concatenate type 'Series' with " 

667 f"object of type '{type(x).__name__}'" 

668 ) 

669 if x.name is not None: 

670 names[i] = x.name 

671 has_names = True 

672 else: 

673 names[i] = num 

674 num += 1 

675 if has_names: 

676 return Index(names) 

677 else: 

678 return default_index(len(self.objs)) 

679 else: 

680 return ensure_index(self.keys).set_names(self.names) 

681 else: 

682 indexes = [x.axes[self.axis] for x in self.objs] 

683 

684 if self.ignore_index: 

685 idx = default_index(sum(len(i) for i in indexes)) 

686 return idx 

687 

688 if self.keys is None: 

689 if self.levels is not None: 

690 raise ValueError("levels supported only when keys is not None") 

691 concat_axis = _concat_indexes(indexes) 

692 else: 

693 concat_axis = _make_concat_multiindex( 

694 indexes, self.keys, self.levels, self.names 

695 ) 

696 

697 self._maybe_check_integrity(concat_axis) 

698 

699 return concat_axis 

700 

701 def _maybe_check_integrity(self, concat_index: Index): 

702 if self.verify_integrity: 

703 if not concat_index.is_unique: 

704 overlap = concat_index[concat_index.duplicated()].unique() 

705 raise ValueError(f"Indexes have overlapping values: {overlap}") 

706 

707 

708def _concat_indexes(indexes) -> Index: 

709 return indexes[0].append(indexes[1:]) 

710 

711 

712def _make_concat_multiindex(indexes, keys, levels=None, names=None) -> MultiIndex: 

713 if (levels is None and isinstance(keys[0], tuple)) or ( 

714 levels is not None and len(levels) > 1 

715 ): 

716 zipped = list(zip(*keys)) 

717 if names is None: 

718 names = [None] * len(zipped) 

719 

720 if levels is None: 

721 _, levels = factorize_from_iterables(zipped) 

722 else: 

723 levels = [ensure_index(x) for x in levels] 

724 else: 

725 zipped = [keys] 

726 if names is None: 

727 names = [None] 

728 

729 if levels is None: 

730 levels = [ensure_index(keys).unique()] 

731 else: 

732 levels = [ensure_index(x) for x in levels] 

733 

734 for level in levels: 

735 if not level.is_unique: 

736 raise ValueError(f"Level values not unique: {level.tolist()}") 

737 

738 if not all_indexes_same(indexes) or not all(level.is_unique for level in levels): 

739 codes_list = [] 

740 

741 # things are potentially different sizes, so compute the exact codes 

742 # for each level and pass those to MultiIndex.from_arrays 

743 

744 for hlevel, level in zip(zipped, levels): 

745 to_concat = [] 

746 if isinstance(hlevel, Index) and hlevel.equals(level): 

747 lens = [len(idx) for idx in indexes] 

748 codes_list.append(np.repeat(np.arange(len(hlevel)), lens)) 

749 else: 

750 for key, index in zip(hlevel, indexes): 

751 # Find matching codes, include matching nan values as equal. 

752 mask = (isna(level) & isna(key)) | (level == key) 

753 if not mask.any(): 

754 raise ValueError(f"Key {key} not in level {level}") 

755 i = np.nonzero(mask)[0][0] 

756 

757 to_concat.append(np.repeat(i, len(index))) 

758 codes_list.append(np.concatenate(to_concat)) 

759 

760 concat_index = _concat_indexes(indexes) 

761 

762 # these go at the end 

763 if isinstance(concat_index, MultiIndex): 

764 levels.extend(concat_index.levels) 

765 codes_list.extend(concat_index.codes) 

766 else: 

767 codes, categories = factorize_from_iterable(concat_index) 

768 levels.append(categories) 

769 codes_list.append(codes) 

770 

771 if len(names) == len(levels): 

772 names = list(names) 

773 else: 

774 # make sure that all of the passed indices have the same nlevels 

775 if not len({idx.nlevels for idx in indexes}) == 1: 

776 raise AssertionError( 

777 "Cannot concat indices that do not have the same number of levels" 

778 ) 

779 

780 # also copies 

781 names = list(names) + list(get_unanimous_names(*indexes)) 

782 

783 return MultiIndex( 

784 levels=levels, codes=codes_list, names=names, verify_integrity=False 

785 ) 

786 

787 new_index = indexes[0] 

788 n = len(new_index) 

789 kpieces = len(indexes) 

790 

791 # also copies 

792 new_names = list(names) 

793 new_levels = list(levels) 

794 

795 # construct codes 

796 new_codes = [] 

797 

798 # do something a bit more speedy 

799 

800 for hlevel, level in zip(zipped, levels): 

801 hlevel = ensure_index(hlevel) 

802 mapped = level.get_indexer(hlevel) 

803 

804 mask = mapped == -1 

805 if mask.any(): 

806 raise ValueError(f"Values not found in passed level: {hlevel[mask]!s}") 

807 

808 new_codes.append(np.repeat(mapped, n)) 

809 

810 if isinstance(new_index, MultiIndex): 

811 new_levels.extend(new_index.levels) 

812 new_codes.extend([np.tile(lab, kpieces) for lab in new_index.codes]) 

813 else: 

814 new_levels.append(new_index.unique()) 

815 single_codes = new_index.unique().get_indexer(new_index) 

816 new_codes.append(np.tile(single_codes, kpieces)) 

817 

818 if len(new_names) < len(new_levels): 

819 new_names.extend(new_index.names) 

820 

821 return MultiIndex( 

822 levels=new_levels, codes=new_codes, names=new_names, verify_integrity=False 

823 )