Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/pandas/core/reshape/concat.py: 40%

1"""

2Concat routines.

3"""

4from __future__ import annotations

6from collections import abc

7from typing import (

8 TYPE_CHECKING,

9 Callable,

10 Literal,

11 cast,

12 overload,

13)

14import warnings

16import numpy as np

18from pandas._config import using_copy_on_write

20from pandas.util._decorators import cache_readonly

21from pandas.util._exceptions import find_stack_level

23from pandas.core.dtypes.common import (

24 is_bool,

25 is_iterator,

26)

27from pandas.core.dtypes.concat import concat_compat

28from pandas.core.dtypes.generic import (

29 ABCDataFrame,

30 ABCSeries,

31)

32from pandas.core.dtypes.missing import isna

34from pandas.core.arrays.categorical import (

35 factorize_from_iterable,

36 factorize_from_iterables,

37)

38import pandas.core.common as com

39from pandas.core.indexes.api import (

40 Index,

41 MultiIndex,

42 all_indexes_same,

43 default_index,

44 ensure_index,

45 get_objs_combined_axis,

46 get_unanimous_names,

47)

48from pandas.core.internals import concatenate_managers

50if TYPE_CHECKING:

51 from collections.abc import (

52 Hashable,

53 Iterable,

54 Mapping,

55 )

57 from pandas._typing import (

58 Axis,

59 AxisInt,

60 HashableT,

61 )

63 from pandas import (

64 DataFrame,

65 Series,

66 )

68# ---------------------------------------------------------------------

69# Concatenate DataFrame objects

72@overload

73def concat(

74 objs: Iterable[DataFrame] | Mapping[HashableT, DataFrame],

75 *,

76 axis: Literal[0, "index"] = ...,

77 join: str = ...,

78 ignore_index: bool = ...,

79 keys: Iterable[Hashable] | None = ...,

80 levels=...,

81 names: list[HashableT] | None = ...,

82 verify_integrity: bool = ...,

83 sort: bool = ...,

84 copy: bool | None = ...,

85) -> DataFrame:

86 ...

89@overload

90def concat(

91 objs: Iterable[Series] | Mapping[HashableT, Series],

92 *,

93 axis: Literal[0, "index"] = ...,

94 join: str = ...,

95 ignore_index: bool = ...,

96 keys: Iterable[Hashable] | None = ...,

97 levels=...,

98 names: list[HashableT] | None = ...,

99 verify_integrity: bool = ...,

100 sort: bool = ...,

101 copy: bool | None = ...,

102) -> Series:

103 ...

104

105

106@overload

107def concat(

108 objs: Iterable[Series | DataFrame] | Mapping[HashableT, Series | DataFrame],

109 *,

110 axis: Literal[0, "index"] = ...,

111 join: str = ...,

112 ignore_index: bool = ...,

113 keys: Iterable[Hashable] | None = ...,

114 levels=...,

115 names: list[HashableT] | None = ...,

116 verify_integrity: bool = ...,

117 sort: bool = ...,

118 copy: bool | None = ...,

119) -> DataFrame | Series:

120 ...

121

122

123@overload

124def concat(

125 objs: Iterable[Series | DataFrame] | Mapping[HashableT, Series | DataFrame],

126 *,

127 axis: Literal[1, "columns"],

128 join: str = ...,

129 ignore_index: bool = ...,

130 keys: Iterable[Hashable] | None = ...,

131 levels=...,

132 names: list[HashableT] | None = ...,

133 verify_integrity: bool = ...,

134 sort: bool = ...,

135 copy: bool | None = ...,

136) -> DataFrame:

137 ...

138

139

140@overload

141def concat(

142 objs: Iterable[Series | DataFrame] | Mapping[HashableT, Series | DataFrame],

143 *,

144 axis: Axis = ...,

145 join: str = ...,

146 ignore_index: bool = ...,

147 keys: Iterable[Hashable] | None = ...,

148 levels=...,

149 names: list[HashableT] | None = ...,

150 verify_integrity: bool = ...,

151 sort: bool = ...,

152 copy: bool | None = ...,

153) -> DataFrame | Series:

154 ...

155

156

157def concat(

158 objs: Iterable[Series | DataFrame] | Mapping[HashableT, Series | DataFrame],

159 *,

160 axis: Axis = 0,

161 join: str = "outer",

162 ignore_index: bool = False,

163 keys: Iterable[Hashable] | None = None,

164 levels=None,

165 names: list[HashableT] | None = None,

166 verify_integrity: bool = False,

167 sort: bool = False,

168 copy: bool | None = None,

169) -> DataFrame | Series:

170 """

171 Concatenate pandas objects along a particular axis.

172

173 Allows optional set logic along the other axes.

174

175 Can also add a layer of hierarchical indexing on the concatenation axis,

176 which may be useful if the labels are the same (or overlapping) on

177 the passed axis number.

178

179 Parameters

180 ----------

181 objs : a sequence or mapping of Series or DataFrame objects

182 If a mapping is passed, the sorted keys will be used as the `keys`

183 argument, unless it is passed, in which case the values will be

184 selected (see below). Any None objects will be dropped silently unless

185 they are all None in which case a ValueError will be raised.

186 axis : {0/'index', 1/'columns'}, default 0

187 The axis to concatenate along.

188 join : {'inner', 'outer'}, default 'outer'

189 How to handle indexes on other axis (or axes).

190 ignore_index : bool, default False

191 If True, do not use the index values along the concatenation axis. The

192 resulting axis will be labeled 0, ..., n - 1. This is useful if you are

193 concatenating objects where the concatenation axis does not have

194 meaningful indexing information. Note the index values on the other

195 axes are still respected in the join.

196 keys : sequence, default None

197 If multiple levels passed, should contain tuples. Construct

198 hierarchical index using the passed keys as the outermost level.

199 levels : list of sequences, default None

200 Specific levels (unique values) to use for constructing a

201 MultiIndex. Otherwise they will be inferred from the keys.

202 names : list, default None

203 Names for the levels in the resulting hierarchical index.

204 verify_integrity : bool, default False

205 Check whether the new concatenated axis contains duplicates. This can

206 be very expensive relative to the actual data concatenation.

207 sort : bool, default False

208 Sort non-concatenation axis if it is not already aligned. One exception to

209 this is when the non-concatentation axis is a DatetimeIndex and join='outer'

210 and the axis is not already aligned. In that case, the non-concatenation

211 axis is always sorted lexicographically.

212 copy : bool, default True

213 If False, do not copy data unnecessarily.

214

215 Returns

216 -------

217 object, type of objs

218 When concatenating all ``Series`` along the index (axis=0), a

219 ``Series`` is returned. When ``objs`` contains at least one

220 ``DataFrame``, a ``DataFrame`` is returned. When concatenating along

221 the columns (axis=1), a ``DataFrame`` is returned.

222

223 See Also

224 --------

225 DataFrame.join : Join DataFrames using indexes.

226 DataFrame.merge : Merge DataFrames by indexes or columns.

227

228 Notes

229 -----

230 The keys, levels, and names arguments are all optional.

231

232 A walkthrough of how this method fits in with other tools for combining

233 pandas objects can be found `here

234 <https://pandas.pydata.org/pandas-docs/stable/user_guide/merging.html>`__.

235

236 It is not recommended to build DataFrames by adding single rows in a

237 for loop. Build a list of rows and make a DataFrame in a single concat.

238

239 Examples

240 --------

241 Combine two ``Series``.

242

243 >>> s1 = pd.Series(['a', 'b'])

244 >>> s2 = pd.Series(['c', 'd'])

245 >>> pd.concat([s1, s2])

246 0 a

247 1 b

248 0 c

249 1 d

250 dtype: object

251

252 Clear the existing index and reset it in the result

253 by setting the ``ignore_index`` option to ``True``.

254

255 >>> pd.concat([s1, s2], ignore_index=True)

256 0 a

257 1 b

258 2 c

259 3 d

260 dtype: object

261

262 Add a hierarchical index at the outermost level of

263 the data with the ``keys`` option.

264

265 >>> pd.concat([s1, s2], keys=['s1', 's2'])

266 s1 0 a

267 1 b

268 s2 0 c

269 1 d

270 dtype: object

271

272 Label the index keys you create with the ``names`` option.

273

274 >>> pd.concat([s1, s2], keys=['s1', 's2'],

275 ... names=['Series name', 'Row ID'])

276 Series name Row ID

277 s1 0 a

278 1 b

279 s2 0 c

280 1 d

281 dtype: object

282

283 Combine two ``DataFrame`` objects with identical columns.

284

285 >>> df1 = pd.DataFrame([['a', 1], ['b', 2]],

286 ... columns=['letter', 'number'])

287 >>> df1

288 letter number

289 0 a 1

290 1 b 2

291 >>> df2 = pd.DataFrame([['c', 3], ['d', 4]],

292 ... columns=['letter', 'number'])

293 >>> df2

294 letter number

295 0 c 3

296 1 d 4

297 >>> pd.concat([df1, df2])

298 letter number

299 0 a 1

300 1 b 2

301 0 c 3

302 1 d 4

303

304 Combine ``DataFrame`` objects with overlapping columns

305 and return everything. Columns outside the intersection will

306 be filled with ``NaN`` values.

307

308 >>> df3 = pd.DataFrame([['c', 3, 'cat'], ['d', 4, 'dog']],

309 ... columns=['letter', 'number', 'animal'])

310 >>> df3

311 letter number animal

312 0 c 3 cat

313 1 d 4 dog

314 >>> pd.concat([df1, df3], sort=False)

315 letter number animal

316 0 a 1 NaN

317 1 b 2 NaN

318 0 c 3 cat

319 1 d 4 dog

320

321 Combine ``DataFrame`` objects with overlapping columns

322 and return only those that are shared by passing ``inner`` to

323 the ``join`` keyword argument.

324

325 >>> pd.concat([df1, df3], join="inner")

326 letter number

327 0 a 1

328 1 b 2

329 0 c 3

330 1 d 4

331

332 Combine ``DataFrame`` objects horizontally along the x axis by

333 passing in ``axis=1``.

334

335 >>> df4 = pd.DataFrame([['bird', 'polly'], ['monkey', 'george']],

336 ... columns=['animal', 'name'])

337 >>> pd.concat([df1, df4], axis=1)

338 letter number animal name

339 0 a 1 bird polly

340 1 b 2 monkey george

341

342 Prevent the result from including duplicate index values with the

343 ``verify_integrity`` option.

344

345 >>> df5 = pd.DataFrame([1], index=['a'])

346 >>> df5

347 0

348 a 1

349 >>> df6 = pd.DataFrame([2], index=['a'])

350 >>> df6

351 0

352 a 2

353 >>> pd.concat([df5, df6], verify_integrity=True)

354 Traceback (most recent call last):

355 ...

356 ValueError: Indexes have overlapping values: ['a']

357

358 Append a single row to the end of a ``DataFrame`` object.

359

360 >>> df7 = pd.DataFrame({'a': 1, 'b': 2}, index=[0])

361 >>> df7

362 a b

363 0 1 2

364 >>> new_row = pd.Series({'a': 3, 'b': 4})

365 >>> new_row

366 a 3

367 b 4

368 dtype: int64

369 >>> pd.concat([df7, new_row.to_frame().T], ignore_index=True)

370 a b

371 0 1 2

372 1 3 4

373 """

374 if copy is None:

375 if using_copy_on_write():

376 copy = False

377 else:

378 copy = True

379 elif copy and using_copy_on_write():

380 copy = False

381

382 op = _Concatenator(

383 objs,

384 axis=axis,

385 ignore_index=ignore_index,

386 join=join,

387 keys=keys,

388 levels=levels,

389 names=names,

390 verify_integrity=verify_integrity,

391 copy=copy,

392 sort=sort,

393 )

394

395 return op.get_result()

396

397

398class _Concatenator:

399 """

400 Orchestrates a concatenation operation for BlockManagers

401 """

402

403 sort: bool

404

405 def __init__(

406 self,

407 objs: Iterable[Series | DataFrame] | Mapping[HashableT, Series | DataFrame],

408 axis: Axis = 0,

409 join: str = "outer",

410 keys: Iterable[Hashable] | None = None,

411 levels=None,

412 names: list[HashableT] | None = None,

413 ignore_index: bool = False,

414 verify_integrity: bool = False,

415 copy: bool = True,

416 sort: bool = False,

417 ) -> None:

418 if isinstance(objs, (ABCSeries, ABCDataFrame, str)):

419 raise TypeError(

420 "first argument must be an iterable of pandas "

421 f'objects, you passed an object of type "{type(objs).__name__}"'

422 )

423

424 if join == "outer":

425 self.intersect = False

426 elif join == "inner":

427 self.intersect = True

428 else: # pragma: no cover

429 raise ValueError(

430 "Only can inner (intersect) or outer (union) join the other axis"

431 )

432

433 if not is_bool(sort):

434 raise ValueError(

435 f"The 'sort' keyword only accepts boolean values; {sort} was passed."

436 )

437 # Incompatible types in assignment (expression has type "Union[bool, bool_]",

438 # variable has type "bool")

439 self.sort = sort # type: ignore[assignment]

440

441 self.ignore_index = ignore_index

442 self.verify_integrity = verify_integrity

443 self.copy = copy

444

445 objs, keys = self._clean_keys_and_objs(objs, keys)

446

447 # figure out what our result ndim is going to be

448 ndims = self._get_ndims(objs)

449 sample, objs = self._get_sample_object(objs, ndims, keys, names, levels)

450

451 # Standardize axis parameter to int

452 if sample.ndim == 1:

453 from pandas import DataFrame

454

455 axis = DataFrame._get_axis_number(axis)

456 self._is_frame = False

457 self._is_series = True

458 else:

459 axis = sample._get_axis_number(axis)

460 self._is_frame = True

461 self._is_series = False

462

463 # Need to flip BlockManager axis in the DataFrame special case

464 axis = sample._get_block_manager_axis(axis)

465

466 # if we have mixed ndims, then convert to highest ndim

467 # creating column numbers as needed

468 if len(ndims) > 1:

469 objs = self._sanitize_mixed_ndim(objs, sample, ignore_index, axis)

470

471 self.objs = objs

472

473 # note: this is the BlockManager axis (since DataFrame is transposed)

474 self.bm_axis = axis

475 self.axis = 1 - self.bm_axis if self._is_frame else 0

476 self.keys = keys

477 self.names = names or getattr(keys, "names", None)

478 self.levels = levels

479

480 def _get_ndims(self, objs: list[Series | DataFrame]) -> set[int]:

481 # figure out what our result ndim is going to be

482 ndims = set()

483 for obj in objs:

484 if not isinstance(obj, (ABCSeries, ABCDataFrame)):

485 msg = (

486 f"cannot concatenate object of type '{type(obj)}'; "

487 "only Series and DataFrame objs are valid"

488 )

489 raise TypeError(msg)

490

491 ndims.add(obj.ndim)

492 return ndims

493

494 def _clean_keys_and_objs(

495 self,

496 objs: Iterable[Series | DataFrame] | Mapping[HashableT, Series | DataFrame],

497 keys,

498 ) -> tuple[list[Series | DataFrame], Index | None]:

499 if isinstance(objs, abc.Mapping):

500 if keys is None:

501 keys = list(objs.keys())

502 objs_list = [objs[k] for k in keys]

503 else:

504 objs_list = list(objs)

505

506 if len(objs_list) == 0:

507 raise ValueError("No objects to concatenate")

508

509 if keys is None:

510 objs_list = list(com.not_none(*objs_list))

511 else:

512 # GH#1649

513 clean_keys = []

514 clean_objs = []

515 if is_iterator(keys):

516 keys = list(keys)

517 if len(keys) != len(objs_list):

518 # GH#43485

519 warnings.warn(

520 "The behavior of pd.concat with len(keys) != len(objs) is "

521 "deprecated. In a future version this will raise instead of "

522 "truncating to the smaller of the two sequences",

523 FutureWarning,

524 stacklevel=find_stack_level(),

525 )

526 for k, v in zip(keys, objs_list):

527 if v is None:

528 continue

529 clean_keys.append(k)

530 clean_objs.append(v)

531 objs_list = clean_objs

532

533 if isinstance(keys, MultiIndex):

534 # TODO: retain levels?

535 keys = type(keys).from_tuples(clean_keys, names=keys.names)

536 else:

537 name = getattr(keys, "name", None)

538 keys = Index(clean_keys, name=name, dtype=getattr(keys, "dtype", None))

539

540 if len(objs_list) == 0:

541 raise ValueError("All objects passed were None")

542

543 return objs_list, keys

544

545 def _get_sample_object(

546 self,

547 objs: list[Series | DataFrame],

548 ndims: set[int],

549 keys,

550 names,

551 levels,

552 ) -> tuple[Series | DataFrame, list[Series | DataFrame]]:

553 # get the sample

554 # want the highest ndim that we have, and must be non-empty

555 # unless all objs are empty

556 sample: Series | DataFrame | None = None

557 if len(ndims) > 1:

558 max_ndim = max(ndims)

559 for obj in objs:

560 if obj.ndim == max_ndim and np.sum(obj.shape):

561 sample = obj

562 break

563

564 else:

565 # filter out the empties if we have not multi-index possibilities

566 # note to keep empty Series as it affect to result columns / name

567 non_empties = [obj for obj in objs if sum(obj.shape) > 0 or obj.ndim == 1]

568

569 if len(non_empties) and (

570 keys is None and names is None and levels is None and not self.intersect

571 ):

572 objs = non_empties

573 sample = objs[0]

574

575 if sample is None:

576 sample = objs[0]

577 return sample, objs

578

579 def _sanitize_mixed_ndim(

580 self,

581 objs: list[Series | DataFrame],

582 sample: Series | DataFrame,

583 ignore_index: bool,

584 axis: AxisInt,

585 ) -> list[Series | DataFrame]:

586 # if we have mixed ndims, then convert to highest ndim

587 # creating column numbers as needed

588

589 new_objs = []

590

591 current_column = 0

592 max_ndim = sample.ndim

593 for obj in objs:

594 ndim = obj.ndim

595 if ndim == max_ndim:

596 pass

597

598 elif ndim != max_ndim - 1:

599 raise ValueError(

600 "cannot concatenate unaligned mixed dimensional NDFrame objects"

601 )

602

603 else:

604 name = getattr(obj, "name", None)

605 if ignore_index or name is None:

606 if axis == 1:

607 # doing a row-wise concatenation so need everything

608 # to line up

609 name = 0

610 else:

611 # doing a column-wise concatenation so need series

612 # to have unique names

613 name = current_column

614 current_column += 1

615

616 obj = sample._constructor({name: obj}, copy=False)

617

618 new_objs.append(obj)

619

620 return new_objs

621

622 def get_result(self):

623 cons: Callable[..., DataFrame | Series]

624 sample: DataFrame | Series

625

626 # series only

627 if self._is_series:

628 sample = cast("Series", self.objs[0])

629

630 # stack blocks

631 if self.bm_axis == 0:

632 name = com.consensus_name_attr(self.objs)

633 cons = sample._constructor

634

635 arrs = [ser._values for ser in self.objs]

636

637 res = concat_compat(arrs, axis=0)

638

639 new_index: Index

640 if self.ignore_index:

641 # We can avoid surprisingly-expensive _get_concat_axis

642 new_index = default_index(len(res))

643 else:

644 new_index = self.new_axes[0]

645

646 mgr = type(sample._mgr).from_array(res, index=new_index)

647

648 result = sample._constructor_from_mgr(mgr, axes=mgr.axes)

649 result._name = name

650 return result.__finalize__(self, method="concat")

651

652 # combine as columns in a frame

653 else:

654 data = dict(zip(range(len(self.objs)), self.objs))

655

656 # GH28330 Preserves subclassed objects through concat

657 cons = sample._constructor_expanddim

658

659 index, columns = self.new_axes

660 df = cons(data, index=index, copy=self.copy)

661 df.columns = columns

662 return df.__finalize__(self, method="concat")

663

664 # combine block managers

665 else:

666 sample = cast("DataFrame", self.objs[0])

667

668 mgrs_indexers = []

669 for obj in self.objs:

670 indexers = {}

671 for ax, new_labels in enumerate(self.new_axes):

672 # ::-1 to convert BlockManager ax to DataFrame ax

673 if ax == self.bm_axis:

674 # Suppress reindexing on concat axis

675 continue

676

677 # 1-ax to convert BlockManager axis to DataFrame axis

678 obj_labels = obj.axes[1 - ax]

679 if not new_labels.equals(obj_labels):

680 indexers[ax] = obj_labels.get_indexer(new_labels)

681

682 mgrs_indexers.append((obj._mgr, indexers))

683

684 new_data = concatenate_managers(

685 mgrs_indexers, self.new_axes, concat_axis=self.bm_axis, copy=self.copy

686 )

687 if not self.copy and not using_copy_on_write():

688 new_data._consolidate_inplace()

689

690 out = sample._constructor_from_mgr(new_data, axes=new_data.axes)

691 return out.__finalize__(self, method="concat")

692

693 def _get_result_dim(self) -> int:

694 if self._is_series and self.bm_axis == 1:

695 return 2

696 else:

697 return self.objs[0].ndim

698

699 @cache_readonly

700 def new_axes(self) -> list[Index]:

701 ndim = self._get_result_dim()

702 return [

703 self._get_concat_axis if i == self.bm_axis else self._get_comb_axis(i)

704 for i in range(ndim)

705 ]

706

707 def _get_comb_axis(self, i: AxisInt) -> Index:

708 data_axis = self.objs[0]._get_block_manager_axis(i)

709 return get_objs_combined_axis(

710 self.objs,

711 axis=data_axis,

712 intersect=self.intersect,

713 sort=self.sort,

714 copy=self.copy,

715 )

716

717 @cache_readonly

718 def _get_concat_axis(self) -> Index:

719 """

720 Return index to be used along concatenation axis.

721 """

722 if self._is_series:

723 if self.bm_axis == 0:

724 indexes = [x.index for x in self.objs]

725 elif self.ignore_index:

726 idx = default_index(len(self.objs))

727 return idx

728 elif self.keys is None:

729 names: list[Hashable] = [None] * len(self.objs)

730 num = 0

731 has_names = False

732 for i, x in enumerate(self.objs):

733 if x.ndim != 1:

734 raise TypeError(

735 f"Cannot concatenate type 'Series' with "

736 f"object of type '{type(x).__name__}'"

737 )

738 if x.name is not None:

739 names[i] = x.name

740 has_names = True

741 else:

742 names[i] = num

743 num += 1

744 if has_names:

745 return Index(names)

746 else:

747 return default_index(len(self.objs))

748 else:

749 return ensure_index(self.keys).set_names(self.names)

750 else:

751 indexes = [x.axes[self.axis] for x in self.objs]

752

753 if self.ignore_index:

754 idx = default_index(sum(len(i) for i in indexes))

755 return idx

756

757 if self.keys is None:

758 if self.levels is not None:

759 raise ValueError("levels supported only when keys is not None")

760 concat_axis = _concat_indexes(indexes)

761 else:

762 concat_axis = _make_concat_multiindex(

763 indexes, self.keys, self.levels, self.names

764 )

765

766 self._maybe_check_integrity(concat_axis)

767

768 return concat_axis

769

770 def _maybe_check_integrity(self, concat_index: Index):

771 if self.verify_integrity:

772 if not concat_index.is_unique:

773 overlap = concat_index[concat_index.duplicated()].unique()

774 raise ValueError(f"Indexes have overlapping values: {overlap}")

775

776

777def _concat_indexes(indexes) -> Index:

778 return indexes[0].append(indexes[1:])

779

780

781def _make_concat_multiindex(indexes, keys, levels=None, names=None) -> MultiIndex:

782 if (levels is None and isinstance(keys[0], tuple)) or (

783 levels is not None and len(levels) > 1

784 ):

785 zipped = list(zip(*keys))

786 if names is None:

787 names = [None] * len(zipped)

788

789 if levels is None:

790 _, levels = factorize_from_iterables(zipped)

791 else:

792 levels = [ensure_index(x) for x in levels]

793 else:

794 zipped = [keys]

795 if names is None:

796 names = [None]

797

798 if levels is None:

799 levels = [ensure_index(keys).unique()]

800 else:

801 levels = [ensure_index(x) for x in levels]

802

803 for level in levels:

804 if not level.is_unique:

805 raise ValueError(f"Level values not unique: {level.tolist()}")

806

807 if not all_indexes_same(indexes) or not all(level.is_unique for level in levels):

808 codes_list = []

809

810 # things are potentially different sizes, so compute the exact codes

811 # for each level and pass those to MultiIndex.from_arrays

812

813 for hlevel, level in zip(zipped, levels):

814 to_concat = []

815 if isinstance(hlevel, Index) and hlevel.equals(level):

816 lens = [len(idx) for idx in indexes]

817 codes_list.append(np.repeat(np.arange(len(hlevel)), lens))

818 else:

819 for key, index in zip(hlevel, indexes):

820 # Find matching codes, include matching nan values as equal.

821 mask = (isna(level) & isna(key)) | (level == key)

822 if not mask.any():

823 raise ValueError(f"Key {key} not in level {level}")

824 i = np.nonzero(mask)[0][0]

825

826 to_concat.append(np.repeat(i, len(index)))

827 codes_list.append(np.concatenate(to_concat))

828

829 concat_index = _concat_indexes(indexes)

830

831 # these go at the end

832 if isinstance(concat_index, MultiIndex):

833 levels.extend(concat_index.levels)

834 codes_list.extend(concat_index.codes)

835 else:

836 codes, categories = factorize_from_iterable(concat_index)

837 levels.append(categories)

838 codes_list.append(codes)

839

840 if len(names) == len(levels):

841 names = list(names)

842 else:

843 # make sure that all of the passed indices have the same nlevels

844 if not len({idx.nlevels for idx in indexes}) == 1:

845 raise AssertionError(

846 "Cannot concat indices that do not have the same number of levels"

847 )

848

849 # also copies

850 names = list(names) + list(get_unanimous_names(*indexes))

851

852 return MultiIndex(

853 levels=levels, codes=codes_list, names=names, verify_integrity=False

854 )

855

856 new_index = indexes[0]

857 n = len(new_index)

858 kpieces = len(indexes)

859

860 # also copies

861 new_names = list(names)

862 new_levels = list(levels)

863

864 # construct codes

865 new_codes = []

866

867 # do something a bit more speedy

868

869 for hlevel, level in zip(zipped, levels):

870 hlevel_index = ensure_index(hlevel)

871 mapped = level.get_indexer(hlevel_index)

872

873 mask = mapped == -1

874 if mask.any():

875 raise ValueError(

876 f"Values not found in passed level: {hlevel_index[mask]!s}"

877 )

878

879 new_codes.append(np.repeat(mapped, n))

880

881 if isinstance(new_index, MultiIndex):

882 new_levels.extend(new_index.levels)

883 new_codes.extend([np.tile(lab, kpieces) for lab in new_index.codes])

884 else:

885 new_levels.append(new_index.unique())

886 single_codes = new_index.unique().get_indexer(new_index)

887 new_codes.append(np.tile(single_codes, kpieces))

888

889 if len(new_names) < len(new_levels):

890 new_names.extend(new_index.names)

891

892 return MultiIndex(

893 levels=new_levels, codes=new_codes, names=new_names, verify_integrity=False

894 )