1"""
2Concat routines.
3"""
4from __future__ import annotations
5
6from collections import abc
7from typing import (
8 TYPE_CHECKING,
9 Callable,
10 Literal,
11 cast,
12 overload,
13)
14import warnings
15
16import numpy as np
17
18from pandas._config import using_copy_on_write
19
20from pandas.util._decorators import cache_readonly
21from pandas.util._exceptions import find_stack_level
22
23from pandas.core.dtypes.common import (
24 is_bool,
25 is_iterator,
26)
27from pandas.core.dtypes.concat import concat_compat
28from pandas.core.dtypes.generic import (
29 ABCDataFrame,
30 ABCSeries,
31)
32from pandas.core.dtypes.missing import isna
33
34from pandas.core.arrays.categorical import (
35 factorize_from_iterable,
36 factorize_from_iterables,
37)
38import pandas.core.common as com
39from pandas.core.indexes.api import (
40 Index,
41 MultiIndex,
42 all_indexes_same,
43 default_index,
44 ensure_index,
45 get_objs_combined_axis,
46 get_unanimous_names,
47)
48from pandas.core.internals import concatenate_managers
49
50if TYPE_CHECKING:
51 from collections.abc import (
52 Hashable,
53 Iterable,
54 Mapping,
55 )
56
57 from pandas._typing import (
58 Axis,
59 AxisInt,
60 HashableT,
61 )
62
63 from pandas import (
64 DataFrame,
65 Series,
66 )
67
68# ---------------------------------------------------------------------
69# Concatenate DataFrame objects
70
71
72@overload
73def concat(
74 objs: Iterable[DataFrame] | Mapping[HashableT, DataFrame],
75 *,
76 axis: Literal[0, "index"] = ...,
77 join: str = ...,
78 ignore_index: bool = ...,
79 keys: Iterable[Hashable] | None = ...,
80 levels=...,
81 names: list[HashableT] | None = ...,
82 verify_integrity: bool = ...,
83 sort: bool = ...,
84 copy: bool | None = ...,
85) -> DataFrame:
86 ...
87
88
89@overload
90def concat(
91 objs: Iterable[Series] | Mapping[HashableT, Series],
92 *,
93 axis: Literal[0, "index"] = ...,
94 join: str = ...,
95 ignore_index: bool = ...,
96 keys: Iterable[Hashable] | None = ...,
97 levels=...,
98 names: list[HashableT] | None = ...,
99 verify_integrity: bool = ...,
100 sort: bool = ...,
101 copy: bool | None = ...,
102) -> Series:
103 ...
104
105
106@overload
107def concat(
108 objs: Iterable[Series | DataFrame] | Mapping[HashableT, Series | DataFrame],
109 *,
110 axis: Literal[0, "index"] = ...,
111 join: str = ...,
112 ignore_index: bool = ...,
113 keys: Iterable[Hashable] | None = ...,
114 levels=...,
115 names: list[HashableT] | None = ...,
116 verify_integrity: bool = ...,
117 sort: bool = ...,
118 copy: bool | None = ...,
119) -> DataFrame | Series:
120 ...
121
122
123@overload
124def concat(
125 objs: Iterable[Series | DataFrame] | Mapping[HashableT, Series | DataFrame],
126 *,
127 axis: Literal[1, "columns"],
128 join: str = ...,
129 ignore_index: bool = ...,
130 keys: Iterable[Hashable] | None = ...,
131 levels=...,
132 names: list[HashableT] | None = ...,
133 verify_integrity: bool = ...,
134 sort: bool = ...,
135 copy: bool | None = ...,
136) -> DataFrame:
137 ...
138
139
140@overload
141def concat(
142 objs: Iterable[Series | DataFrame] | Mapping[HashableT, Series | DataFrame],
143 *,
144 axis: Axis = ...,
145 join: str = ...,
146 ignore_index: bool = ...,
147 keys: Iterable[Hashable] | None = ...,
148 levels=...,
149 names: list[HashableT] | None = ...,
150 verify_integrity: bool = ...,
151 sort: bool = ...,
152 copy: bool | None = ...,
153) -> DataFrame | Series:
154 ...
155
156
157def concat(
158 objs: Iterable[Series | DataFrame] | Mapping[HashableT, Series | DataFrame],
159 *,
160 axis: Axis = 0,
161 join: str = "outer",
162 ignore_index: bool = False,
163 keys: Iterable[Hashable] | None = None,
164 levels=None,
165 names: list[HashableT] | None = None,
166 verify_integrity: bool = False,
167 sort: bool = False,
168 copy: bool | None = None,
169) -> DataFrame | Series:
170 """
171 Concatenate pandas objects along a particular axis.
172
173 Allows optional set logic along the other axes.
174
175 Can also add a layer of hierarchical indexing on the concatenation axis,
176 which may be useful if the labels are the same (or overlapping) on
177 the passed axis number.
178
179 Parameters
180 ----------
181 objs : a sequence or mapping of Series or DataFrame objects
182 If a mapping is passed, the sorted keys will be used as the `keys`
183 argument, unless it is passed, in which case the values will be
184 selected (see below). Any None objects will be dropped silently unless
185 they are all None in which case a ValueError will be raised.
186 axis : {0/'index', 1/'columns'}, default 0
187 The axis to concatenate along.
188 join : {'inner', 'outer'}, default 'outer'
189 How to handle indexes on other axis (or axes).
190 ignore_index : bool, default False
191 If True, do not use the index values along the concatenation axis. The
192 resulting axis will be labeled 0, ..., n - 1. This is useful if you are
193 concatenating objects where the concatenation axis does not have
194 meaningful indexing information. Note the index values on the other
195 axes are still respected in the join.
196 keys : sequence, default None
197 If multiple levels passed, should contain tuples. Construct
198 hierarchical index using the passed keys as the outermost level.
199 levels : list of sequences, default None
200 Specific levels (unique values) to use for constructing a
201 MultiIndex. Otherwise they will be inferred from the keys.
202 names : list, default None
203 Names for the levels in the resulting hierarchical index.
204 verify_integrity : bool, default False
205 Check whether the new concatenated axis contains duplicates. This can
206 be very expensive relative to the actual data concatenation.
207 sort : bool, default False
208 Sort non-concatenation axis if it is not already aligned. One exception to
209 this is when the non-concatentation axis is a DatetimeIndex and join='outer'
210 and the axis is not already aligned. In that case, the non-concatenation
211 axis is always sorted lexicographically.
212 copy : bool, default True
213 If False, do not copy data unnecessarily.
214
215 Returns
216 -------
217 object, type of objs
218 When concatenating all ``Series`` along the index (axis=0), a
219 ``Series`` is returned. When ``objs`` contains at least one
220 ``DataFrame``, a ``DataFrame`` is returned. When concatenating along
221 the columns (axis=1), a ``DataFrame`` is returned.
222
223 See Also
224 --------
225 DataFrame.join : Join DataFrames using indexes.
226 DataFrame.merge : Merge DataFrames by indexes or columns.
227
228 Notes
229 -----
230 The keys, levels, and names arguments are all optional.
231
232 A walkthrough of how this method fits in with other tools for combining
233 pandas objects can be found `here
234 <https://pandas.pydata.org/pandas-docs/stable/user_guide/merging.html>`__.
235
236 It is not recommended to build DataFrames by adding single rows in a
237 for loop. Build a list of rows and make a DataFrame in a single concat.
238
239 Examples
240 --------
241 Combine two ``Series``.
242
243 >>> s1 = pd.Series(['a', 'b'])
244 >>> s2 = pd.Series(['c', 'd'])
245 >>> pd.concat([s1, s2])
246 0 a
247 1 b
248 0 c
249 1 d
250 dtype: object
251
252 Clear the existing index and reset it in the result
253 by setting the ``ignore_index`` option to ``True``.
254
255 >>> pd.concat([s1, s2], ignore_index=True)
256 0 a
257 1 b
258 2 c
259 3 d
260 dtype: object
261
262 Add a hierarchical index at the outermost level of
263 the data with the ``keys`` option.
264
265 >>> pd.concat([s1, s2], keys=['s1', 's2'])
266 s1 0 a
267 1 b
268 s2 0 c
269 1 d
270 dtype: object
271
272 Label the index keys you create with the ``names`` option.
273
274 >>> pd.concat([s1, s2], keys=['s1', 's2'],
275 ... names=['Series name', 'Row ID'])
276 Series name Row ID
277 s1 0 a
278 1 b
279 s2 0 c
280 1 d
281 dtype: object
282
283 Combine two ``DataFrame`` objects with identical columns.
284
285 >>> df1 = pd.DataFrame([['a', 1], ['b', 2]],
286 ... columns=['letter', 'number'])
287 >>> df1
288 letter number
289 0 a 1
290 1 b 2
291 >>> df2 = pd.DataFrame([['c', 3], ['d', 4]],
292 ... columns=['letter', 'number'])
293 >>> df2
294 letter number
295 0 c 3
296 1 d 4
297 >>> pd.concat([df1, df2])
298 letter number
299 0 a 1
300 1 b 2
301 0 c 3
302 1 d 4
303
304 Combine ``DataFrame`` objects with overlapping columns
305 and return everything. Columns outside the intersection will
306 be filled with ``NaN`` values.
307
308 >>> df3 = pd.DataFrame([['c', 3, 'cat'], ['d', 4, 'dog']],
309 ... columns=['letter', 'number', 'animal'])
310 >>> df3
311 letter number animal
312 0 c 3 cat
313 1 d 4 dog
314 >>> pd.concat([df1, df3], sort=False)
315 letter number animal
316 0 a 1 NaN
317 1 b 2 NaN
318 0 c 3 cat
319 1 d 4 dog
320
321 Combine ``DataFrame`` objects with overlapping columns
322 and return only those that are shared by passing ``inner`` to
323 the ``join`` keyword argument.
324
325 >>> pd.concat([df1, df3], join="inner")
326 letter number
327 0 a 1
328 1 b 2
329 0 c 3
330 1 d 4
331
332 Combine ``DataFrame`` objects horizontally along the x axis by
333 passing in ``axis=1``.
334
335 >>> df4 = pd.DataFrame([['bird', 'polly'], ['monkey', 'george']],
336 ... columns=['animal', 'name'])
337 >>> pd.concat([df1, df4], axis=1)
338 letter number animal name
339 0 a 1 bird polly
340 1 b 2 monkey george
341
342 Prevent the result from including duplicate index values with the
343 ``verify_integrity`` option.
344
345 >>> df5 = pd.DataFrame([1], index=['a'])
346 >>> df5
347 0
348 a 1
349 >>> df6 = pd.DataFrame([2], index=['a'])
350 >>> df6
351 0
352 a 2
353 >>> pd.concat([df5, df6], verify_integrity=True)
354 Traceback (most recent call last):
355 ...
356 ValueError: Indexes have overlapping values: ['a']
357
358 Append a single row to the end of a ``DataFrame`` object.
359
360 >>> df7 = pd.DataFrame({'a': 1, 'b': 2}, index=[0])
361 >>> df7
362 a b
363 0 1 2
364 >>> new_row = pd.Series({'a': 3, 'b': 4})
365 >>> new_row
366 a 3
367 b 4
368 dtype: int64
369 >>> pd.concat([df7, new_row.to_frame().T], ignore_index=True)
370 a b
371 0 1 2
372 1 3 4
373 """
374 if copy is None:
375 if using_copy_on_write():
376 copy = False
377 else:
378 copy = True
379 elif copy and using_copy_on_write():
380 copy = False
381
382 op = _Concatenator(
383 objs,
384 axis=axis,
385 ignore_index=ignore_index,
386 join=join,
387 keys=keys,
388 levels=levels,
389 names=names,
390 verify_integrity=verify_integrity,
391 copy=copy,
392 sort=sort,
393 )
394
395 return op.get_result()
396
397
398class _Concatenator:
399 """
400 Orchestrates a concatenation operation for BlockManagers
401 """
402
403 sort: bool
404
405 def __init__(
406 self,
407 objs: Iterable[Series | DataFrame] | Mapping[HashableT, Series | DataFrame],
408 axis: Axis = 0,
409 join: str = "outer",
410 keys: Iterable[Hashable] | None = None,
411 levels=None,
412 names: list[HashableT] | None = None,
413 ignore_index: bool = False,
414 verify_integrity: bool = False,
415 copy: bool = True,
416 sort: bool = False,
417 ) -> None:
418 if isinstance(objs, (ABCSeries, ABCDataFrame, str)):
419 raise TypeError(
420 "first argument must be an iterable of pandas "
421 f'objects, you passed an object of type "{type(objs).__name__}"'
422 )
423
424 if join == "outer":
425 self.intersect = False
426 elif join == "inner":
427 self.intersect = True
428 else: # pragma: no cover
429 raise ValueError(
430 "Only can inner (intersect) or outer (union) join the other axis"
431 )
432
433 if not is_bool(sort):
434 raise ValueError(
435 f"The 'sort' keyword only accepts boolean values; {sort} was passed."
436 )
437 # Incompatible types in assignment (expression has type "Union[bool, bool_]",
438 # variable has type "bool")
439 self.sort = sort # type: ignore[assignment]
440
441 self.ignore_index = ignore_index
442 self.verify_integrity = verify_integrity
443 self.copy = copy
444
445 objs, keys = self._clean_keys_and_objs(objs, keys)
446
447 # figure out what our result ndim is going to be
448 ndims = self._get_ndims(objs)
449 sample, objs = self._get_sample_object(objs, ndims, keys, names, levels)
450
451 # Standardize axis parameter to int
452 if sample.ndim == 1:
453 from pandas import DataFrame
454
455 axis = DataFrame._get_axis_number(axis)
456 self._is_frame = False
457 self._is_series = True
458 else:
459 axis = sample._get_axis_number(axis)
460 self._is_frame = True
461 self._is_series = False
462
463 # Need to flip BlockManager axis in the DataFrame special case
464 axis = sample._get_block_manager_axis(axis)
465
466 # if we have mixed ndims, then convert to highest ndim
467 # creating column numbers as needed
468 if len(ndims) > 1:
469 objs = self._sanitize_mixed_ndim(objs, sample, ignore_index, axis)
470
471 self.objs = objs
472
473 # note: this is the BlockManager axis (since DataFrame is transposed)
474 self.bm_axis = axis
475 self.axis = 1 - self.bm_axis if self._is_frame else 0
476 self.keys = keys
477 self.names = names or getattr(keys, "names", None)
478 self.levels = levels
479
480 def _get_ndims(self, objs: list[Series | DataFrame]) -> set[int]:
481 # figure out what our result ndim is going to be
482 ndims = set()
483 for obj in objs:
484 if not isinstance(obj, (ABCSeries, ABCDataFrame)):
485 msg = (
486 f"cannot concatenate object of type '{type(obj)}'; "
487 "only Series and DataFrame objs are valid"
488 )
489 raise TypeError(msg)
490
491 ndims.add(obj.ndim)
492 return ndims
493
494 def _clean_keys_and_objs(
495 self,
496 objs: Iterable[Series | DataFrame] | Mapping[HashableT, Series | DataFrame],
497 keys,
498 ) -> tuple[list[Series | DataFrame], Index | None]:
499 if isinstance(objs, abc.Mapping):
500 if keys is None:
501 keys = list(objs.keys())
502 objs_list = [objs[k] for k in keys]
503 else:
504 objs_list = list(objs)
505
506 if len(objs_list) == 0:
507 raise ValueError("No objects to concatenate")
508
509 if keys is None:
510 objs_list = list(com.not_none(*objs_list))
511 else:
512 # GH#1649
513 clean_keys = []
514 clean_objs = []
515 if is_iterator(keys):
516 keys = list(keys)
517 if len(keys) != len(objs_list):
518 # GH#43485
519 warnings.warn(
520 "The behavior of pd.concat with len(keys) != len(objs) is "
521 "deprecated. In a future version this will raise instead of "
522 "truncating to the smaller of the two sequences",
523 FutureWarning,
524 stacklevel=find_stack_level(),
525 )
526 for k, v in zip(keys, objs_list):
527 if v is None:
528 continue
529 clean_keys.append(k)
530 clean_objs.append(v)
531 objs_list = clean_objs
532
533 if isinstance(keys, MultiIndex):
534 # TODO: retain levels?
535 keys = type(keys).from_tuples(clean_keys, names=keys.names)
536 else:
537 name = getattr(keys, "name", None)
538 keys = Index(clean_keys, name=name, dtype=getattr(keys, "dtype", None))
539
540 if len(objs_list) == 0:
541 raise ValueError("All objects passed were None")
542
543 return objs_list, keys
544
545 def _get_sample_object(
546 self,
547 objs: list[Series | DataFrame],
548 ndims: set[int],
549 keys,
550 names,
551 levels,
552 ) -> tuple[Series | DataFrame, list[Series | DataFrame]]:
553 # get the sample
554 # want the highest ndim that we have, and must be non-empty
555 # unless all objs are empty
556 sample: Series | DataFrame | None = None
557 if len(ndims) > 1:
558 max_ndim = max(ndims)
559 for obj in objs:
560 if obj.ndim == max_ndim and np.sum(obj.shape):
561 sample = obj
562 break
563
564 else:
565 # filter out the empties if we have not multi-index possibilities
566 # note to keep empty Series as it affect to result columns / name
567 non_empties = [obj for obj in objs if sum(obj.shape) > 0 or obj.ndim == 1]
568
569 if len(non_empties) and (
570 keys is None and names is None and levels is None and not self.intersect
571 ):
572 objs = non_empties
573 sample = objs[0]
574
575 if sample is None:
576 sample = objs[0]
577 return sample, objs
578
579 def _sanitize_mixed_ndim(
580 self,
581 objs: list[Series | DataFrame],
582 sample: Series | DataFrame,
583 ignore_index: bool,
584 axis: AxisInt,
585 ) -> list[Series | DataFrame]:
586 # if we have mixed ndims, then convert to highest ndim
587 # creating column numbers as needed
588
589 new_objs = []
590
591 current_column = 0
592 max_ndim = sample.ndim
593 for obj in objs:
594 ndim = obj.ndim
595 if ndim == max_ndim:
596 pass
597
598 elif ndim != max_ndim - 1:
599 raise ValueError(
600 "cannot concatenate unaligned mixed dimensional NDFrame objects"
601 )
602
603 else:
604 name = getattr(obj, "name", None)
605 if ignore_index or name is None:
606 if axis == 1:
607 # doing a row-wise concatenation so need everything
608 # to line up
609 name = 0
610 else:
611 # doing a column-wise concatenation so need series
612 # to have unique names
613 name = current_column
614 current_column += 1
615
616 obj = sample._constructor({name: obj}, copy=False)
617
618 new_objs.append(obj)
619
620 return new_objs
621
622 def get_result(self):
623 cons: Callable[..., DataFrame | Series]
624 sample: DataFrame | Series
625
626 # series only
627 if self._is_series:
628 sample = cast("Series", self.objs[0])
629
630 # stack blocks
631 if self.bm_axis == 0:
632 name = com.consensus_name_attr(self.objs)
633 cons = sample._constructor
634
635 arrs = [ser._values for ser in self.objs]
636
637 res = concat_compat(arrs, axis=0)
638
639 new_index: Index
640 if self.ignore_index:
641 # We can avoid surprisingly-expensive _get_concat_axis
642 new_index = default_index(len(res))
643 else:
644 new_index = self.new_axes[0]
645
646 mgr = type(sample._mgr).from_array(res, index=new_index)
647
648 result = sample._constructor_from_mgr(mgr, axes=mgr.axes)
649 result._name = name
650 return result.__finalize__(self, method="concat")
651
652 # combine as columns in a frame
653 else:
654 data = dict(zip(range(len(self.objs)), self.objs))
655
656 # GH28330 Preserves subclassed objects through concat
657 cons = sample._constructor_expanddim
658
659 index, columns = self.new_axes
660 df = cons(data, index=index, copy=self.copy)
661 df.columns = columns
662 return df.__finalize__(self, method="concat")
663
664 # combine block managers
665 else:
666 sample = cast("DataFrame", self.objs[0])
667
668 mgrs_indexers = []
669 for obj in self.objs:
670 indexers = {}
671 for ax, new_labels in enumerate(self.new_axes):
672 # ::-1 to convert BlockManager ax to DataFrame ax
673 if ax == self.bm_axis:
674 # Suppress reindexing on concat axis
675 continue
676
677 # 1-ax to convert BlockManager axis to DataFrame axis
678 obj_labels = obj.axes[1 - ax]
679 if not new_labels.equals(obj_labels):
680 indexers[ax] = obj_labels.get_indexer(new_labels)
681
682 mgrs_indexers.append((obj._mgr, indexers))
683
684 new_data = concatenate_managers(
685 mgrs_indexers, self.new_axes, concat_axis=self.bm_axis, copy=self.copy
686 )
687 if not self.copy and not using_copy_on_write():
688 new_data._consolidate_inplace()
689
690 out = sample._constructor_from_mgr(new_data, axes=new_data.axes)
691 return out.__finalize__(self, method="concat")
692
693 def _get_result_dim(self) -> int:
694 if self._is_series and self.bm_axis == 1:
695 return 2
696 else:
697 return self.objs[0].ndim
698
699 @cache_readonly
700 def new_axes(self) -> list[Index]:
701 ndim = self._get_result_dim()
702 return [
703 self._get_concat_axis if i == self.bm_axis else self._get_comb_axis(i)
704 for i in range(ndim)
705 ]
706
707 def _get_comb_axis(self, i: AxisInt) -> Index:
708 data_axis = self.objs[0]._get_block_manager_axis(i)
709 return get_objs_combined_axis(
710 self.objs,
711 axis=data_axis,
712 intersect=self.intersect,
713 sort=self.sort,
714 copy=self.copy,
715 )
716
717 @cache_readonly
718 def _get_concat_axis(self) -> Index:
719 """
720 Return index to be used along concatenation axis.
721 """
722 if self._is_series:
723 if self.bm_axis == 0:
724 indexes = [x.index for x in self.objs]
725 elif self.ignore_index:
726 idx = default_index(len(self.objs))
727 return idx
728 elif self.keys is None:
729 names: list[Hashable] = [None] * len(self.objs)
730 num = 0
731 has_names = False
732 for i, x in enumerate(self.objs):
733 if x.ndim != 1:
734 raise TypeError(
735 f"Cannot concatenate type 'Series' with "
736 f"object of type '{type(x).__name__}'"
737 )
738 if x.name is not None:
739 names[i] = x.name
740 has_names = True
741 else:
742 names[i] = num
743 num += 1
744 if has_names:
745 return Index(names)
746 else:
747 return default_index(len(self.objs))
748 else:
749 return ensure_index(self.keys).set_names(self.names)
750 else:
751 indexes = [x.axes[self.axis] for x in self.objs]
752
753 if self.ignore_index:
754 idx = default_index(sum(len(i) for i in indexes))
755 return idx
756
757 if self.keys is None:
758 if self.levels is not None:
759 raise ValueError("levels supported only when keys is not None")
760 concat_axis = _concat_indexes(indexes)
761 else:
762 concat_axis = _make_concat_multiindex(
763 indexes, self.keys, self.levels, self.names
764 )
765
766 self._maybe_check_integrity(concat_axis)
767
768 return concat_axis
769
770 def _maybe_check_integrity(self, concat_index: Index):
771 if self.verify_integrity:
772 if not concat_index.is_unique:
773 overlap = concat_index[concat_index.duplicated()].unique()
774 raise ValueError(f"Indexes have overlapping values: {overlap}")
775
776
777def _concat_indexes(indexes) -> Index:
778 return indexes[0].append(indexes[1:])
779
780
781def _make_concat_multiindex(indexes, keys, levels=None, names=None) -> MultiIndex:
782 if (levels is None and isinstance(keys[0], tuple)) or (
783 levels is not None and len(levels) > 1
784 ):
785 zipped = list(zip(*keys))
786 if names is None:
787 names = [None] * len(zipped)
788
789 if levels is None:
790 _, levels = factorize_from_iterables(zipped)
791 else:
792 levels = [ensure_index(x) for x in levels]
793 else:
794 zipped = [keys]
795 if names is None:
796 names = [None]
797
798 if levels is None:
799 levels = [ensure_index(keys).unique()]
800 else:
801 levels = [ensure_index(x) for x in levels]
802
803 for level in levels:
804 if not level.is_unique:
805 raise ValueError(f"Level values not unique: {level.tolist()}")
806
807 if not all_indexes_same(indexes) or not all(level.is_unique for level in levels):
808 codes_list = []
809
810 # things are potentially different sizes, so compute the exact codes
811 # for each level and pass those to MultiIndex.from_arrays
812
813 for hlevel, level in zip(zipped, levels):
814 to_concat = []
815 if isinstance(hlevel, Index) and hlevel.equals(level):
816 lens = [len(idx) for idx in indexes]
817 codes_list.append(np.repeat(np.arange(len(hlevel)), lens))
818 else:
819 for key, index in zip(hlevel, indexes):
820 # Find matching codes, include matching nan values as equal.
821 mask = (isna(level) & isna(key)) | (level == key)
822 if not mask.any():
823 raise ValueError(f"Key {key} not in level {level}")
824 i = np.nonzero(mask)[0][0]
825
826 to_concat.append(np.repeat(i, len(index)))
827 codes_list.append(np.concatenate(to_concat))
828
829 concat_index = _concat_indexes(indexes)
830
831 # these go at the end
832 if isinstance(concat_index, MultiIndex):
833 levels.extend(concat_index.levels)
834 codes_list.extend(concat_index.codes)
835 else:
836 codes, categories = factorize_from_iterable(concat_index)
837 levels.append(categories)
838 codes_list.append(codes)
839
840 if len(names) == len(levels):
841 names = list(names)
842 else:
843 # make sure that all of the passed indices have the same nlevels
844 if not len({idx.nlevels for idx in indexes}) == 1:
845 raise AssertionError(
846 "Cannot concat indices that do not have the same number of levels"
847 )
848
849 # also copies
850 names = list(names) + list(get_unanimous_names(*indexes))
851
852 return MultiIndex(
853 levels=levels, codes=codes_list, names=names, verify_integrity=False
854 )
855
856 new_index = indexes[0]
857 n = len(new_index)
858 kpieces = len(indexes)
859
860 # also copies
861 new_names = list(names)
862 new_levels = list(levels)
863
864 # construct codes
865 new_codes = []
866
867 # do something a bit more speedy
868
869 for hlevel, level in zip(zipped, levels):
870 hlevel_index = ensure_index(hlevel)
871 mapped = level.get_indexer(hlevel_index)
872
873 mask = mapped == -1
874 if mask.any():
875 raise ValueError(
876 f"Values not found in passed level: {hlevel_index[mask]!s}"
877 )
878
879 new_codes.append(np.repeat(mapped, n))
880
881 if isinstance(new_index, MultiIndex):
882 new_levels.extend(new_index.levels)
883 new_codes.extend([np.tile(lab, kpieces) for lab in new_index.codes])
884 else:
885 new_levels.append(new_index.unique())
886 single_codes = new_index.unique().get_indexer(new_index)
887 new_codes.append(np.tile(single_codes, kpieces))
888
889 if len(new_names) < len(new_levels):
890 new_names.extend(new_index.names)
891
892 return MultiIndex(
893 levels=new_levels, codes=new_codes, names=new_names, verify_integrity=False
894 )