1from __future__ import annotations
2
3import copy as cp
4import itertools
5from typing import (
6 TYPE_CHECKING,
7 Sequence,
8 cast,
9)
10
11import numpy as np
12
13from pandas._libs import (
14 NaT,
15 internals as libinternals,
16)
17from pandas._libs.missing import NA
18from pandas._typing import (
19 ArrayLike,
20 AxisInt,
21 DtypeObj,
22 Manager,
23 Shape,
24)
25from pandas.util._decorators import cache_readonly
26
27from pandas.core.dtypes.astype import astype_array
28from pandas.core.dtypes.cast import (
29 ensure_dtype_can_hold_na,
30 find_common_type,
31 np_find_common_type,
32)
33from pandas.core.dtypes.common import (
34 is_1d_only_ea_dtype,
35 is_dtype_equal,
36 is_scalar,
37 needs_i8_conversion,
38)
39from pandas.core.dtypes.concat import concat_compat
40from pandas.core.dtypes.dtypes import (
41 DatetimeTZDtype,
42 ExtensionDtype,
43)
44from pandas.core.dtypes.missing import (
45 is_valid_na_for_dtype,
46 isna,
47 isna_all,
48)
49
50import pandas.core.algorithms as algos
51from pandas.core.arrays import (
52 DatetimeArray,
53 ExtensionArray,
54)
55from pandas.core.arrays.sparse import SparseDtype
56from pandas.core.construction import ensure_wrapped_if_datetimelike
57from pandas.core.internals.array_manager import (
58 ArrayManager,
59 NullArrayProxy,
60)
61from pandas.core.internals.blocks import (
62 ensure_block_shape,
63 new_block_2d,
64)
65from pandas.core.internals.managers import BlockManager
66
67if TYPE_CHECKING:
68 from pandas import Index
69 from pandas.core.internals.blocks import Block
70
71
72def _concatenate_array_managers(
73 mgrs_indexers, axes: list[Index], concat_axis: AxisInt, copy: bool
74) -> Manager:
75 """
76 Concatenate array managers into one.
77
78 Parameters
79 ----------
80 mgrs_indexers : list of (ArrayManager, {axis: indexer,...}) tuples
81 axes : list of Index
82 concat_axis : int
83 copy : bool
84
85 Returns
86 -------
87 ArrayManager
88 """
89 # reindex all arrays
90 mgrs = []
91 for mgr, indexers in mgrs_indexers:
92 axis1_made_copy = False
93 for ax, indexer in indexers.items():
94 mgr = mgr.reindex_indexer(
95 axes[ax], indexer, axis=ax, allow_dups=True, use_na_proxy=True
96 )
97 if ax == 1 and indexer is not None:
98 axis1_made_copy = True
99 if copy and concat_axis == 0 and not axis1_made_copy:
100 # for concat_axis 1 we will always get a copy through concat_arrays
101 mgr = mgr.copy()
102 mgrs.append(mgr)
103
104 if concat_axis == 1:
105 # concatting along the rows -> concat the reindexed arrays
106 # TODO(ArrayManager) doesn't yet preserve the correct dtype
107 arrays = [
108 concat_arrays([mgrs[i].arrays[j] for i in range(len(mgrs))])
109 for j in range(len(mgrs[0].arrays))
110 ]
111 else:
112 # concatting along the columns -> combine reindexed arrays in a single manager
113 assert concat_axis == 0
114 arrays = list(itertools.chain.from_iterable([mgr.arrays for mgr in mgrs]))
115
116 new_mgr = ArrayManager(arrays, [axes[1], axes[0]], verify_integrity=False)
117 return new_mgr
118
119
120def concat_arrays(to_concat: list) -> ArrayLike:
121 """
122 Alternative for concat_compat but specialized for use in the ArrayManager.
123
124 Differences: only deals with 1D arrays (no axis keyword), assumes
125 ensure_wrapped_if_datetimelike and does not skip empty arrays to determine
126 the dtype.
127 In addition ensures that all NullArrayProxies get replaced with actual
128 arrays.
129
130 Parameters
131 ----------
132 to_concat : list of arrays
133
134 Returns
135 -------
136 np.ndarray or ExtensionArray
137 """
138 # ignore the all-NA proxies to determine the resulting dtype
139 to_concat_no_proxy = [x for x in to_concat if not isinstance(x, NullArrayProxy)]
140
141 dtypes = {x.dtype for x in to_concat_no_proxy}
142 single_dtype = len(dtypes) == 1
143
144 if single_dtype:
145 target_dtype = to_concat_no_proxy[0].dtype
146 elif all(x.kind in ["i", "u", "b"] and isinstance(x, np.dtype) for x in dtypes):
147 # GH#42092
148 target_dtype = np_find_common_type(*dtypes)
149 else:
150 target_dtype = find_common_type([arr.dtype for arr in to_concat_no_proxy])
151
152 to_concat = [
153 arr.to_array(target_dtype)
154 if isinstance(arr, NullArrayProxy)
155 else astype_array(arr, target_dtype, copy=False)
156 for arr in to_concat
157 ]
158
159 if isinstance(to_concat[0], ExtensionArray):
160 cls = type(to_concat[0])
161 return cls._concat_same_type(to_concat)
162
163 result = np.concatenate(to_concat)
164
165 # TODO decide on exact behaviour (we shouldn't do this only for empty result)
166 # see https://github.com/pandas-dev/pandas/issues/39817
167 if len(result) == 0:
168 # all empties -> check for bool to not coerce to float
169 kinds = {obj.dtype.kind for obj in to_concat_no_proxy}
170 if len(kinds) != 1:
171 if "b" in kinds:
172 result = result.astype(object)
173 return result
174
175
176def concatenate_managers(
177 mgrs_indexers, axes: list[Index], concat_axis: AxisInt, copy: bool
178) -> Manager:
179 """
180 Concatenate block managers into one.
181
182 Parameters
183 ----------
184 mgrs_indexers : list of (BlockManager, {axis: indexer,...}) tuples
185 axes : list of Index
186 concat_axis : int
187 copy : bool
188
189 Returns
190 -------
191 BlockManager
192 """
193 # TODO(ArrayManager) this assumes that all managers are of the same type
194 if isinstance(mgrs_indexers[0][0], ArrayManager):
195 return _concatenate_array_managers(mgrs_indexers, axes, concat_axis, copy)
196
197 # Assertions disabled for performance
198 # for tup in mgrs_indexers:
199 # # caller is responsible for ensuring this
200 # indexers = tup[1]
201 # assert concat_axis not in indexers
202
203 if concat_axis == 0:
204 return _concat_managers_axis0(mgrs_indexers, axes, copy)
205
206 mgrs_indexers = _maybe_reindex_columns_na_proxy(axes, mgrs_indexers)
207
208 concat_plans = [
209 _get_mgr_concatenation_plan(mgr, indexers) for mgr, indexers in mgrs_indexers
210 ]
211 concat_plan = _combine_concat_plans(concat_plans)
212 blocks = []
213
214 for placement, join_units in concat_plan:
215 unit = join_units[0]
216 blk = unit.block
217
218 if len(join_units) == 1 and not join_units[0].indexers:
219 values = blk.values
220 if copy:
221 values = values.copy()
222 else:
223 values = values.view()
224 fastpath = True
225 elif _is_uniform_join_units(join_units):
226 vals = [ju.block.values for ju in join_units]
227
228 if not blk.is_extension:
229 # _is_uniform_join_units ensures a single dtype, so
230 # we can use np.concatenate, which is more performant
231 # than concat_compat
232 values = np.concatenate(vals, axis=1)
233 else:
234 # TODO(EA2D): special-casing not needed with 2D EAs
235 values = concat_compat(vals, axis=1)
236 values = ensure_block_shape(values, ndim=2)
237
238 values = ensure_wrapped_if_datetimelike(values)
239
240 fastpath = blk.values.dtype == values.dtype
241 else:
242 values = _concatenate_join_units(join_units, copy=copy)
243 fastpath = False
244
245 if fastpath:
246 b = blk.make_block_same_class(values, placement=placement)
247 else:
248 b = new_block_2d(values, placement=placement)
249
250 blocks.append(b)
251
252 return BlockManager(tuple(blocks), axes)
253
254
255def _concat_managers_axis0(
256 mgrs_indexers, axes: list[Index], copy: bool
257) -> BlockManager:
258 """
259 concat_managers specialized to concat_axis=0, with reindexing already
260 having been done in _maybe_reindex_columns_na_proxy.
261 """
262 had_reindexers = {
263 i: len(mgrs_indexers[i][1]) > 0 for i in range(len(mgrs_indexers))
264 }
265 mgrs_indexers = _maybe_reindex_columns_na_proxy(axes, mgrs_indexers)
266
267 mgrs = [x[0] for x in mgrs_indexers]
268
269 offset = 0
270 blocks = []
271 for i, mgr in enumerate(mgrs):
272 # If we already reindexed, then we definitely don't need another copy
273 made_copy = had_reindexers[i]
274
275 for blk in mgr.blocks:
276 if made_copy:
277 nb = blk.copy(deep=False)
278 elif copy:
279 nb = blk.copy()
280 else:
281 # by slicing instead of copy(deep=False), we get a new array
282 # object, see test_concat_copy
283 nb = blk.getitem_block(slice(None))
284 nb._mgr_locs = nb._mgr_locs.add(offset)
285 blocks.append(nb)
286
287 offset += len(mgr.items)
288
289 result = BlockManager(tuple(blocks), axes)
290 return result
291
292
293def _maybe_reindex_columns_na_proxy(
294 axes: list[Index], mgrs_indexers: list[tuple[BlockManager, dict[int, np.ndarray]]]
295) -> list[tuple[BlockManager, dict[int, np.ndarray]]]:
296 """
297 Reindex along columns so that all of the BlockManagers being concatenated
298 have matching columns.
299
300 Columns added in this reindexing have dtype=np.void, indicating they
301 should be ignored when choosing a column's final dtype.
302 """
303 new_mgrs_indexers: list[tuple[BlockManager, dict[int, np.ndarray]]] = []
304
305 for mgr, indexers in mgrs_indexers:
306 # For axis=0 (i.e. columns) we use_na_proxy and only_slice, so this
307 # is a cheap reindexing.
308 for i, indexer in indexers.items():
309 mgr = mgr.reindex_indexer(
310 axes[i],
311 indexers[i],
312 axis=i,
313 copy=False,
314 only_slice=True, # only relevant for i==0
315 allow_dups=True,
316 use_na_proxy=True, # only relevant for i==0
317 )
318 new_mgrs_indexers.append((mgr, {}))
319 return new_mgrs_indexers
320
321
322def _get_mgr_concatenation_plan(mgr: BlockManager, indexers: dict[int, np.ndarray]):
323 """
324 Construct concatenation plan for given block manager and indexers.
325
326 Parameters
327 ----------
328 mgr : BlockManager
329 indexers : dict of {axis: indexer}
330
331 Returns
332 -------
333 plan : list of (BlockPlacement, JoinUnit) tuples
334
335 """
336 assert len(indexers) == 0
337
338 # Calculate post-reindex shape, save for item axis which will be separate
339 # for each block anyway.
340 mgr_shape_list = list(mgr.shape)
341 for ax, indexer in indexers.items():
342 mgr_shape_list[ax] = len(indexer)
343 mgr_shape = tuple(mgr_shape_list)
344
345 assert 0 not in indexers
346
347 if mgr.is_single_block:
348 blk = mgr.blocks[0]
349 return [(blk.mgr_locs, JoinUnit(blk, mgr_shape, indexers))]
350
351 blknos = mgr.blknos
352 blklocs = mgr.blklocs
353
354 plan = []
355 for blkno, placements in libinternals.get_blkno_placements(blknos, group=False):
356 assert placements.is_slice_like
357 assert blkno != -1
358
359 join_unit_indexers = indexers.copy()
360
361 shape_list = list(mgr_shape)
362 shape_list[0] = len(placements)
363 shape = tuple(shape_list)
364
365 blk = mgr.blocks[blkno]
366 ax0_blk_indexer = blklocs[placements.indexer]
367
368 unit_no_ax0_reindexing = (
369 len(placements) == len(blk.mgr_locs)
370 and
371 # Fastpath detection of join unit not
372 # needing to reindex its block: no ax0
373 # reindexing took place and block
374 # placement was sequential before.
375 (
376 (blk.mgr_locs.is_slice_like and blk.mgr_locs.as_slice.step == 1)
377 or
378 # Slow-ish detection: all indexer locs
379 # are sequential (and length match is
380 # checked above).
381 (np.diff(ax0_blk_indexer) == 1).all()
382 )
383 )
384
385 # Omit indexer if no item reindexing is required.
386 if unit_no_ax0_reindexing:
387 join_unit_indexers.pop(0, None)
388 else:
389 join_unit_indexers[0] = ax0_blk_indexer
390
391 unit = JoinUnit(blk, shape, join_unit_indexers)
392
393 plan.append((placements, unit))
394
395 return plan
396
397
398class JoinUnit:
399 def __init__(self, block: Block, shape: Shape, indexers=None) -> None:
400 # Passing shape explicitly is required for cases when block is None.
401 # Note: block is None implies indexers is None, but not vice-versa
402 if indexers is None:
403 indexers = {}
404 self.block = block
405 self.indexers = indexers
406 self.shape = shape
407
408 def __repr__(self) -> str:
409 return f"{type(self).__name__}({repr(self.block)}, {self.indexers})"
410
411 @cache_readonly
412 def needs_filling(self) -> bool:
413 for indexer in self.indexers.values():
414 # FIXME: cache results of indexer == -1 checks.
415 if (indexer == -1).any():
416 return True
417
418 return False
419
420 @cache_readonly
421 def dtype(self) -> DtypeObj:
422 blk = self.block
423 if blk.values.dtype.kind == "V":
424 raise AssertionError("Block is None, no dtype")
425
426 if not self.needs_filling:
427 return blk.dtype
428 return ensure_dtype_can_hold_na(blk.dtype)
429
430 def _is_valid_na_for(self, dtype: DtypeObj) -> bool:
431 """
432 Check that we are all-NA of a type/dtype that is compatible with this dtype.
433 Augments `self.is_na` with an additional check of the type of NA values.
434 """
435 if not self.is_na:
436 return False
437 if self.block.dtype.kind == "V":
438 return True
439
440 if self.dtype == object:
441 values = self.block.values
442 return all(is_valid_na_for_dtype(x, dtype) for x in values.ravel(order="K"))
443
444 na_value = self.block.fill_value
445 if na_value is NaT and not is_dtype_equal(self.dtype, dtype):
446 # e.g. we are dt64 and other is td64
447 # fill_values match but we should not cast self.block.values to dtype
448 # TODO: this will need updating if we ever have non-nano dt64/td64
449 return False
450
451 if na_value is NA and needs_i8_conversion(dtype):
452 # FIXME: kludge; test_append_empty_frame_with_timedelta64ns_nat
453 # e.g. self.dtype == "Int64" and dtype is td64, we dont want
454 # to consider these as matching
455 return False
456
457 # TODO: better to use can_hold_element?
458 return is_valid_na_for_dtype(na_value, dtype)
459
460 @cache_readonly
461 def is_na(self) -> bool:
462 blk = self.block
463 if blk.dtype.kind == "V":
464 return True
465
466 if not blk._can_hold_na:
467 return False
468
469 values = blk.values
470 if values.size == 0:
471 return True
472 if isinstance(values.dtype, SparseDtype):
473 return False
474
475 if values.ndim == 1:
476 # TODO(EA2D): no need for special case with 2D EAs
477 val = values[0]
478 if not is_scalar(val) or not isna(val):
479 # ideally isna_all would do this short-circuiting
480 return False
481 return isna_all(values)
482 else:
483 val = values[0][0]
484 if not is_scalar(val) or not isna(val):
485 # ideally isna_all would do this short-circuiting
486 return False
487 return all(isna_all(row) for row in values)
488
489 def get_reindexed_values(self, empty_dtype: DtypeObj, upcasted_na) -> ArrayLike:
490 values: ArrayLike
491
492 if upcasted_na is None and self.block.dtype.kind != "V":
493 # No upcasting is necessary
494 fill_value = self.block.fill_value
495 values = self.block.values
496 else:
497 fill_value = upcasted_na
498
499 if self._is_valid_na_for(empty_dtype):
500 # note: always holds when self.block.dtype.kind == "V"
501 blk_dtype = self.block.dtype
502
503 if blk_dtype == np.dtype("object"):
504 # we want to avoid filling with np.nan if we are
505 # using None; we already know that we are all
506 # nulls
507 values = self.block.values.ravel(order="K")
508 if len(values) and values[0] is None:
509 fill_value = None
510
511 if isinstance(empty_dtype, DatetimeTZDtype):
512 # NB: exclude e.g. pyarrow[dt64tz] dtypes
513 i8values = np.full(self.shape, fill_value._value)
514 return DatetimeArray(i8values, dtype=empty_dtype)
515
516 elif is_1d_only_ea_dtype(empty_dtype):
517 if is_dtype_equal(blk_dtype, empty_dtype) and self.indexers:
518 # avoid creating new empty array if we already have an array
519 # with correct dtype that can be reindexed
520 pass
521 else:
522 empty_dtype = cast(ExtensionDtype, empty_dtype)
523 cls = empty_dtype.construct_array_type()
524
525 missing_arr = cls._from_sequence([], dtype=empty_dtype)
526 ncols, nrows = self.shape
527 assert ncols == 1, ncols
528 empty_arr = -1 * np.ones((nrows,), dtype=np.intp)
529 return missing_arr.take(
530 empty_arr, allow_fill=True, fill_value=fill_value
531 )
532 elif isinstance(empty_dtype, ExtensionDtype):
533 # TODO: no tests get here, a handful would if we disabled
534 # the dt64tz special-case above (which is faster)
535 cls = empty_dtype.construct_array_type()
536 missing_arr = cls._empty(shape=self.shape, dtype=empty_dtype)
537 missing_arr[:] = fill_value
538 return missing_arr
539 else:
540 # NB: we should never get here with empty_dtype integer or bool;
541 # if we did, the missing_arr.fill would cast to gibberish
542 missing_arr = np.empty(self.shape, dtype=empty_dtype)
543 missing_arr.fill(fill_value)
544 return missing_arr
545
546 if (not self.indexers) and (not self.block._can_consolidate):
547 # preserve these for validation in concat_compat
548 return self.block.values
549
550 if self.block.is_bool:
551 # External code requested filling/upcasting, bool values must
552 # be upcasted to object to avoid being upcasted to numeric.
553 values = self.block.astype(np.dtype("object")).values
554 else:
555 # No dtype upcasting is done here, it will be performed during
556 # concatenation itself.
557 values = self.block.values
558
559 if not self.indexers:
560 # If there's no indexing to be done, we want to signal outside
561 # code that this array must be copied explicitly. This is done
562 # by returning a view and checking `retval.base`.
563 values = values.view()
564
565 else:
566 for ax, indexer in self.indexers.items():
567 values = algos.take_nd(values, indexer, axis=ax)
568
569 return values
570
571
572def _concatenate_join_units(join_units: list[JoinUnit], copy: bool) -> ArrayLike:
573 """
574 Concatenate values from several join units along axis=1.
575 """
576 empty_dtype = _get_empty_dtype(join_units)
577
578 has_none_blocks = any(unit.block.dtype.kind == "V" for unit in join_units)
579 upcasted_na = _dtype_to_na_value(empty_dtype, has_none_blocks)
580
581 to_concat = [
582 ju.get_reindexed_values(empty_dtype=empty_dtype, upcasted_na=upcasted_na)
583 for ju in join_units
584 ]
585
586 if len(to_concat) == 1:
587 # Only one block, nothing to concatenate.
588 concat_values = to_concat[0]
589 if copy:
590 if isinstance(concat_values, np.ndarray):
591 # non-reindexed (=not yet copied) arrays are made into a view
592 # in JoinUnit.get_reindexed_values
593 if concat_values.base is not None:
594 concat_values = concat_values.copy()
595 else:
596 concat_values = concat_values.copy()
597
598 elif any(is_1d_only_ea_dtype(t.dtype) for t in to_concat):
599 # TODO(EA2D): special case not needed if all EAs used HybridBlocks
600
601 # error: No overload variant of "__getitem__" of "ExtensionArray" matches
602 # argument type "Tuple[int, slice]"
603 to_concat = [
604 t
605 if is_1d_only_ea_dtype(t.dtype)
606 else t[0, :] # type: ignore[call-overload]
607 for t in to_concat
608 ]
609 concat_values = concat_compat(to_concat, axis=0, ea_compat_axis=True)
610 concat_values = ensure_block_shape(concat_values, 2)
611
612 else:
613 concat_values = concat_compat(to_concat, axis=1)
614
615 return concat_values
616
617
618def _dtype_to_na_value(dtype: DtypeObj, has_none_blocks: bool):
619 """
620 Find the NA value to go with this dtype.
621 """
622 if isinstance(dtype, ExtensionDtype):
623 return dtype.na_value
624 elif dtype.kind in ["m", "M"]:
625 return dtype.type("NaT")
626 elif dtype.kind in ["f", "c"]:
627 return dtype.type("NaN")
628 elif dtype.kind == "b":
629 # different from missing.na_value_for_dtype
630 return None
631 elif dtype.kind in ["i", "u"]:
632 if not has_none_blocks:
633 # different from missing.na_value_for_dtype
634 return None
635 return np.nan
636 elif dtype.kind == "O":
637 return np.nan
638 raise NotImplementedError
639
640
641def _get_empty_dtype(join_units: Sequence[JoinUnit]) -> DtypeObj:
642 """
643 Return dtype and N/A values to use when concatenating specified units.
644
645 Returned N/A value may be None which means there was no casting involved.
646
647 Returns
648 -------
649 dtype
650 """
651 if len(join_units) == 1:
652 blk = join_units[0].block
653 return blk.dtype
654
655 if _is_uniform_reindex(join_units):
656 empty_dtype = join_units[0].block.dtype
657 return empty_dtype
658
659 has_none_blocks = any(unit.block.dtype.kind == "V" for unit in join_units)
660
661 dtypes = [unit.dtype for unit in join_units if not unit.is_na]
662 if not len(dtypes):
663 dtypes = [unit.dtype for unit in join_units if unit.block.dtype.kind != "V"]
664
665 dtype = find_common_type(dtypes)
666 if has_none_blocks:
667 dtype = ensure_dtype_can_hold_na(dtype)
668 return dtype
669
670
671def _is_uniform_join_units(join_units: list[JoinUnit]) -> bool:
672 """
673 Check if the join units consist of blocks of uniform type that can
674 be concatenated using Block.concat_same_type instead of the generic
675 _concatenate_join_units (which uses `concat_compat`).
676
677 """
678 first = join_units[0].block
679 if first.dtype.kind == "V":
680 return False
681 return (
682 # exclude cases where a) ju.block is None or b) we have e.g. Int64+int64
683 all(type(ju.block) is type(first) for ju in join_units)
684 and
685 # e.g. DatetimeLikeBlock can be dt64 or td64, but these are not uniform
686 all(
687 is_dtype_equal(ju.block.dtype, first.dtype)
688 # GH#42092 we only want the dtype_equal check for non-numeric blocks
689 # (for now, may change but that would need a deprecation)
690 or ju.block.dtype.kind in ["b", "i", "u"]
691 for ju in join_units
692 )
693 and
694 # no blocks that would get missing values (can lead to type upcasts)
695 # unless we're an extension dtype.
696 all(not ju.is_na or ju.block.is_extension for ju in join_units)
697 and
698 # no blocks with indexers (as then the dimensions do not fit)
699 all(not ju.indexers for ju in join_units)
700 and
701 # only use this path when there is something to concatenate
702 len(join_units) > 1
703 )
704
705
706def _is_uniform_reindex(join_units) -> bool:
707 return (
708 # TODO: should this be ju.block._can_hold_na?
709 all(ju.block.is_extension for ju in join_units)
710 and len({ju.block.dtype.name for ju in join_units}) == 1
711 )
712
713
714def _trim_join_unit(join_unit: JoinUnit, length: int) -> JoinUnit:
715 """
716 Reduce join_unit's shape along item axis to length.
717
718 Extra items that didn't fit are returned as a separate block.
719 """
720 if 0 not in join_unit.indexers:
721 extra_indexers = join_unit.indexers
722
723 if join_unit.block is None:
724 extra_block = None
725 else:
726 extra_block = join_unit.block.getitem_block(slice(length, None))
727 join_unit.block = join_unit.block.getitem_block(slice(length))
728 else:
729 extra_block = join_unit.block
730
731 extra_indexers = cp.copy(join_unit.indexers)
732 extra_indexers[0] = extra_indexers[0][length:]
733 join_unit.indexers[0] = join_unit.indexers[0][:length]
734
735 extra_shape = (join_unit.shape[0] - length,) + join_unit.shape[1:]
736 join_unit.shape = (length,) + join_unit.shape[1:]
737
738 return JoinUnit(block=extra_block, indexers=extra_indexers, shape=extra_shape)
739
740
741def _combine_concat_plans(plans):
742 """
743 Combine multiple concatenation plans into one.
744
745 existing_plan is updated in-place.
746
747 We only get here with concat_axis == 1.
748 """
749 if len(plans) == 1:
750 for p in plans[0]:
751 yield p[0], [p[1]]
752
753 else:
754 # singleton list so we can modify it as a side-effect within _next_or_none
755 num_ended = [0]
756
757 def _next_or_none(seq):
758 retval = next(seq, None)
759 if retval is None:
760 num_ended[0] += 1
761 return retval
762
763 plans = list(map(iter, plans))
764 next_items = list(map(_next_or_none, plans))
765
766 while num_ended[0] != len(next_items):
767 if num_ended[0] > 0:
768 raise ValueError("Plan shapes are not aligned")
769
770 placements, units = zip(*next_items)
771
772 lengths = list(map(len, placements))
773 min_len, max_len = min(lengths), max(lengths)
774
775 if min_len == max_len:
776 yield placements[0], units
777 next_items[:] = map(_next_or_none, plans)
778 else:
779 yielded_placement = None
780 yielded_units = [None] * len(next_items)
781 for i, (plc, unit) in enumerate(next_items):
782 yielded_units[i] = unit
783 if len(plc) > min_len:
784 # _trim_join_unit updates unit in place, so only
785 # placement needs to be sliced to skip min_len.
786 next_items[i] = (plc[min_len:], _trim_join_unit(unit, min_len))
787 else:
788 yielded_placement = plc
789 next_items[i] = _next_or_none(plans[i])
790
791 yield yielded_placement, yielded_units