Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pandas/core/internals/concat.py: 13%

1from __future__ import annotations

3import copy as cp

4import itertools

5from typing import (

6 TYPE_CHECKING,

7 Sequence,

8 cast,

11import numpy as np

13from pandas._libs import (

14 NaT,

15 internals as libinternals,

16)

17from pandas._libs.missing import NA

18from pandas._typing import (

19 ArrayLike,

20 AxisInt,

21 DtypeObj,

22 Manager,

23 Shape,

24)

25from pandas.util._decorators import cache_readonly

27from pandas.core.dtypes.astype import astype_array

28from pandas.core.dtypes.cast import (

29 ensure_dtype_can_hold_na,

30 find_common_type,

31 np_find_common_type,

32)

33from pandas.core.dtypes.common import (

34 is_1d_only_ea_dtype,

35 is_dtype_equal,

36 is_scalar,

37 needs_i8_conversion,

38)

39from pandas.core.dtypes.concat import concat_compat

40from pandas.core.dtypes.dtypes import (

41 DatetimeTZDtype,

42 ExtensionDtype,

43)

44from pandas.core.dtypes.missing import (

45 is_valid_na_for_dtype,

46 isna,

47 isna_all,

48)

50import pandas.core.algorithms as algos

51from pandas.core.arrays import (

52 DatetimeArray,

53 ExtensionArray,

54)

55from pandas.core.arrays.sparse import SparseDtype

56from pandas.core.construction import ensure_wrapped_if_datetimelike

57from pandas.core.internals.array_manager import (

58 ArrayManager,

59 NullArrayProxy,

60)

61from pandas.core.internals.blocks import (

62 ensure_block_shape,

63 new_block_2d,

64)

65from pandas.core.internals.managers import BlockManager

67if TYPE_CHECKING:

68 from pandas import Index

69 from pandas.core.internals.blocks import Block

72def _concatenate_array_managers(

73 mgrs_indexers, axes: list[Index], concat_axis: AxisInt, copy: bool

74) -> Manager:

75 """

76 Concatenate array managers into one.

78 Parameters

79 ----------

80 mgrs_indexers : list of (ArrayManager, {axis: indexer,...}) tuples

81 axes : list of Index

82 concat_axis : int

83 copy : bool

85 Returns

86 -------

87 ArrayManager

88 """

89 # reindex all arrays

90 mgrs = []

91 for mgr, indexers in mgrs_indexers:

92 axis1_made_copy = False

93 for ax, indexer in indexers.items():

94 mgr = mgr.reindex_indexer(

95 axes[ax], indexer, axis=ax, allow_dups=True, use_na_proxy=True

96 )

97 if ax == 1 and indexer is not None:

98 axis1_made_copy = True

99 if copy and concat_axis == 0 and not axis1_made_copy:

100 # for concat_axis 1 we will always get a copy through concat_arrays

101 mgr = mgr.copy()

102 mgrs.append(mgr)

103

104 if concat_axis == 1:

105 # concatting along the rows -> concat the reindexed arrays

106 # TODO(ArrayManager) doesn't yet preserve the correct dtype

107 arrays = [

108 concat_arrays([mgrs[i].arrays[j] for i in range(len(mgrs))])

109 for j in range(len(mgrs[0].arrays))

110 ]

111 else:

112 # concatting along the columns -> combine reindexed arrays in a single manager

113 assert concat_axis == 0

114 arrays = list(itertools.chain.from_iterable([mgr.arrays for mgr in mgrs]))

115

116 new_mgr = ArrayManager(arrays, [axes[1], axes[0]], verify_integrity=False)

117 return new_mgr

118

119

120def concat_arrays(to_concat: list) -> ArrayLike:

121 """

122 Alternative for concat_compat but specialized for use in the ArrayManager.

123

124 Differences: only deals with 1D arrays (no axis keyword), assumes

125 ensure_wrapped_if_datetimelike and does not skip empty arrays to determine

126 the dtype.

127 In addition ensures that all NullArrayProxies get replaced with actual

128 arrays.

129

130 Parameters

131 ----------

132 to_concat : list of arrays

133

134 Returns

135 -------

136 np.ndarray or ExtensionArray

137 """

138 # ignore the all-NA proxies to determine the resulting dtype

139 to_concat_no_proxy = [x for x in to_concat if not isinstance(x, NullArrayProxy)]

140

141 dtypes = {x.dtype for x in to_concat_no_proxy}

142 single_dtype = len(dtypes) == 1

143

144 if single_dtype:

145 target_dtype = to_concat_no_proxy[0].dtype

146 elif all(x.kind in ["i", "u", "b"] and isinstance(x, np.dtype) for x in dtypes):

147 # GH#42092

148 target_dtype = np_find_common_type(*dtypes)

149 else:

150 target_dtype = find_common_type([arr.dtype for arr in to_concat_no_proxy])

151

152 to_concat = [

153 arr.to_array(target_dtype)

154 if isinstance(arr, NullArrayProxy)

155 else astype_array(arr, target_dtype, copy=False)

156 for arr in to_concat

157 ]

158

159 if isinstance(to_concat[0], ExtensionArray):

160 cls = type(to_concat[0])

161 return cls._concat_same_type(to_concat)

162

163 result = np.concatenate(to_concat)

164

165 # TODO decide on exact behaviour (we shouldn't do this only for empty result)

166 # see https://github.com/pandas-dev/pandas/issues/39817

167 if len(result) == 0:

168 # all empties -> check for bool to not coerce to float

169 kinds = {obj.dtype.kind for obj in to_concat_no_proxy}

170 if len(kinds) != 1:

171 if "b" in kinds:

172 result = result.astype(object)

173 return result

174

175

176def concatenate_managers(

177 mgrs_indexers, axes: list[Index], concat_axis: AxisInt, copy: bool

178) -> Manager:

179 """

180 Concatenate block managers into one.

181

182 Parameters

183 ----------

184 mgrs_indexers : list of (BlockManager, {axis: indexer,...}) tuples

185 axes : list of Index

186 concat_axis : int

187 copy : bool

188

189 Returns

190 -------

191 BlockManager

192 """

193 # TODO(ArrayManager) this assumes that all managers are of the same type

194 if isinstance(mgrs_indexers[0][0], ArrayManager):

195 return _concatenate_array_managers(mgrs_indexers, axes, concat_axis, copy)

196

197 # Assertions disabled for performance

198 # for tup in mgrs_indexers:

199 # # caller is responsible for ensuring this

200 # indexers = tup[1]

201 # assert concat_axis not in indexers

202

203 if concat_axis == 0:

204 return _concat_managers_axis0(mgrs_indexers, axes, copy)

205

206 mgrs_indexers = _maybe_reindex_columns_na_proxy(axes, mgrs_indexers)

207

208 concat_plans = [

209 _get_mgr_concatenation_plan(mgr, indexers) for mgr, indexers in mgrs_indexers

210 ]

211 concat_plan = _combine_concat_plans(concat_plans)

212 blocks = []

213

214 for placement, join_units in concat_plan:

215 unit = join_units[0]

216 blk = unit.block

217

218 if len(join_units) == 1 and not join_units[0].indexers:

219 values = blk.values

220 if copy:

221 values = values.copy()

222 else:

223 values = values.view()

224 fastpath = True

225 elif _is_uniform_join_units(join_units):

226 vals = [ju.block.values for ju in join_units]

227

228 if not blk.is_extension:

229 # _is_uniform_join_units ensures a single dtype, so

230 # we can use np.concatenate, which is more performant

231 # than concat_compat

232 values = np.concatenate(vals, axis=1)

233 else:

234 # TODO(EA2D): special-casing not needed with 2D EAs

235 values = concat_compat(vals, axis=1)

236 values = ensure_block_shape(values, ndim=2)

237

238 values = ensure_wrapped_if_datetimelike(values)

239

240 fastpath = blk.values.dtype == values.dtype

241 else:

242 values = _concatenate_join_units(join_units, copy=copy)

243 fastpath = False

244

245 if fastpath:

246 b = blk.make_block_same_class(values, placement=placement)

247 else:

248 b = new_block_2d(values, placement=placement)

249

250 blocks.append(b)

251

252 return BlockManager(tuple(blocks), axes)

253

254

255def _concat_managers_axis0(

256 mgrs_indexers, axes: list[Index], copy: bool

257) -> BlockManager:

258 """

259 concat_managers specialized to concat_axis=0, with reindexing already

260 having been done in _maybe_reindex_columns_na_proxy.

261 """

262 had_reindexers = {

263 i: len(mgrs_indexers[i][1]) > 0 for i in range(len(mgrs_indexers))

264 }

265 mgrs_indexers = _maybe_reindex_columns_na_proxy(axes, mgrs_indexers)

266

267 mgrs = [x[0] for x in mgrs_indexers]

268

269 offset = 0

270 blocks = []

271 for i, mgr in enumerate(mgrs):

272 # If we already reindexed, then we definitely don't need another copy

273 made_copy = had_reindexers[i]

274

275 for blk in mgr.blocks:

276 if made_copy:

277 nb = blk.copy(deep=False)

278 elif copy:

279 nb = blk.copy()

280 else:

281 # by slicing instead of copy(deep=False), we get a new array

282 # object, see test_concat_copy

283 nb = blk.getitem_block(slice(None))

284 nb._mgr_locs = nb._mgr_locs.add(offset)

285 blocks.append(nb)

286

287 offset += len(mgr.items)

288

289 result = BlockManager(tuple(blocks), axes)

290 return result

291

292

293def _maybe_reindex_columns_na_proxy(

294 axes: list[Index], mgrs_indexers: list[tuple[BlockManager, dict[int, np.ndarray]]]

295) -> list[tuple[BlockManager, dict[int, np.ndarray]]]:

296 """

297 Reindex along columns so that all of the BlockManagers being concatenated

298 have matching columns.

299

300 Columns added in this reindexing have dtype=np.void, indicating they

301 should be ignored when choosing a column's final dtype.

302 """

303 new_mgrs_indexers: list[tuple[BlockManager, dict[int, np.ndarray]]] = []

304

305 for mgr, indexers in mgrs_indexers:

306 # For axis=0 (i.e. columns) we use_na_proxy and only_slice, so this

307 # is a cheap reindexing.

308 for i, indexer in indexers.items():

309 mgr = mgr.reindex_indexer(

310 axes[i],

311 indexers[i],

312 axis=i,

313 copy=False,

314 only_slice=True, # only relevant for i==0

315 allow_dups=True,

316 use_na_proxy=True, # only relevant for i==0

317 )

318 new_mgrs_indexers.append((mgr, {}))

319 return new_mgrs_indexers

320

321

322def _get_mgr_concatenation_plan(mgr: BlockManager, indexers: dict[int, np.ndarray]):

323 """

324 Construct concatenation plan for given block manager and indexers.

325

326 Parameters

327 ----------

328 mgr : BlockManager

329 indexers : dict of {axis: indexer}

330

331 Returns

332 -------

333 plan : list of (BlockPlacement, JoinUnit) tuples

334

335 """

336 assert len(indexers) == 0

337

338 # Calculate post-reindex shape, save for item axis which will be separate

339 # for each block anyway.

340 mgr_shape_list = list(mgr.shape)

341 for ax, indexer in indexers.items():

342 mgr_shape_list[ax] = len(indexer)

343 mgr_shape = tuple(mgr_shape_list)

344

345 assert 0 not in indexers

346

347 if mgr.is_single_block:

348 blk = mgr.blocks[0]

349 return [(blk.mgr_locs, JoinUnit(blk, mgr_shape, indexers))]

350

351 blknos = mgr.blknos

352 blklocs = mgr.blklocs

353

354 plan = []

355 for blkno, placements in libinternals.get_blkno_placements(blknos, group=False):

356 assert placements.is_slice_like

357 assert blkno != -1

358

359 join_unit_indexers = indexers.copy()

360

361 shape_list = list(mgr_shape)

362 shape_list[0] = len(placements)

363 shape = tuple(shape_list)

364

365 blk = mgr.blocks[blkno]

366 ax0_blk_indexer = blklocs[placements.indexer]

367

368 unit_no_ax0_reindexing = (

369 len(placements) == len(blk.mgr_locs)

370 and

371 # Fastpath detection of join unit not

372 # needing to reindex its block: no ax0

373 # reindexing took place and block

374 # placement was sequential before.

375 (

376 (blk.mgr_locs.is_slice_like and blk.mgr_locs.as_slice.step == 1)

377 or

378 # Slow-ish detection: all indexer locs

379 # are sequential (and length match is

380 # checked above).

381 (np.diff(ax0_blk_indexer) == 1).all()

382 )

383 )

384

385 # Omit indexer if no item reindexing is required.

386 if unit_no_ax0_reindexing:

387 join_unit_indexers.pop(0, None)

388 else:

389 join_unit_indexers[0] = ax0_blk_indexer

390

391 unit = JoinUnit(blk, shape, join_unit_indexers)

392

393 plan.append((placements, unit))

394

395 return plan

396

397

398class JoinUnit:

399 def __init__(self, block: Block, shape: Shape, indexers=None) -> None:

400 # Passing shape explicitly is required for cases when block is None.

401 # Note: block is None implies indexers is None, but not vice-versa

402 if indexers is None:

403 indexers = {}

404 self.block = block

405 self.indexers = indexers

406 self.shape = shape

407

408 def __repr__(self) -> str:

409 return f"{type(self).__name__}({repr(self.block)}, {self.indexers})"

410

411 @cache_readonly

412 def needs_filling(self) -> bool:

413 for indexer in self.indexers.values():

414 # FIXME: cache results of indexer == -1 checks.

415 if (indexer == -1).any():

416 return True

417

418 return False

419

420 @cache_readonly

421 def dtype(self) -> DtypeObj:

422 blk = self.block

423 if blk.values.dtype.kind == "V":

424 raise AssertionError("Block is None, no dtype")

425

426 if not self.needs_filling:

427 return blk.dtype

428 return ensure_dtype_can_hold_na(blk.dtype)

429

430 def _is_valid_na_for(self, dtype: DtypeObj) -> bool:

431 """

432 Check that we are all-NA of a type/dtype that is compatible with this dtype.

433 Augments `self.is_na` with an additional check of the type of NA values.

434 """

435 if not self.is_na:

436 return False

437 if self.block.dtype.kind == "V":

438 return True

439

440 if self.dtype == object:

441 values = self.block.values

442 return all(is_valid_na_for_dtype(x, dtype) for x in values.ravel(order="K"))

443

444 na_value = self.block.fill_value

445 if na_value is NaT and not is_dtype_equal(self.dtype, dtype):

446 # e.g. we are dt64 and other is td64

447 # fill_values match but we should not cast self.block.values to dtype

448 # TODO: this will need updating if we ever have non-nano dt64/td64

449 return False

450

451 if na_value is NA and needs_i8_conversion(dtype):

452 # FIXME: kludge; test_append_empty_frame_with_timedelta64ns_nat

453 # e.g. self.dtype == "Int64" and dtype is td64, we dont want

454 # to consider these as matching

455 return False

456

457 # TODO: better to use can_hold_element?

458 return is_valid_na_for_dtype(na_value, dtype)

459

460 @cache_readonly

461 def is_na(self) -> bool:

462 blk = self.block

463 if blk.dtype.kind == "V":

464 return True

465

466 if not blk._can_hold_na:

467 return False

468

469 values = blk.values

470 if values.size == 0:

471 return True

472 if isinstance(values.dtype, SparseDtype):

473 return False

474

475 if values.ndim == 1:

476 # TODO(EA2D): no need for special case with 2D EAs

477 val = values[0]

478 if not is_scalar(val) or not isna(val):

479 # ideally isna_all would do this short-circuiting

480 return False

481 return isna_all(values)

482 else:

483 val = values[0][0]

484 if not is_scalar(val) or not isna(val):

485 # ideally isna_all would do this short-circuiting

486 return False

487 return all(isna_all(row) for row in values)

488

489 def get_reindexed_values(self, empty_dtype: DtypeObj, upcasted_na) -> ArrayLike:

490 values: ArrayLike

491

492 if upcasted_na is None and self.block.dtype.kind != "V":

493 # No upcasting is necessary

494 fill_value = self.block.fill_value

495 values = self.block.values

496 else:

497 fill_value = upcasted_na

498

499 if self._is_valid_na_for(empty_dtype):

500 # note: always holds when self.block.dtype.kind == "V"

501 blk_dtype = self.block.dtype

502

503 if blk_dtype == np.dtype("object"):

504 # we want to avoid filling with np.nan if we are

505 # using None; we already know that we are all

506 # nulls

507 values = self.block.values.ravel(order="K")

508 if len(values) and values[0] is None:

509 fill_value = None

510

511 if isinstance(empty_dtype, DatetimeTZDtype):

512 # NB: exclude e.g. pyarrow[dt64tz] dtypes

513 i8values = np.full(self.shape, fill_value._value)

514 return DatetimeArray(i8values, dtype=empty_dtype)

515

516 elif is_1d_only_ea_dtype(empty_dtype):

517 if is_dtype_equal(blk_dtype, empty_dtype) and self.indexers:

518 # avoid creating new empty array if we already have an array

519 # with correct dtype that can be reindexed

520 pass

521 else:

522 empty_dtype = cast(ExtensionDtype, empty_dtype)

523 cls = empty_dtype.construct_array_type()

524

525 missing_arr = cls._from_sequence([], dtype=empty_dtype)

526 ncols, nrows = self.shape

527 assert ncols == 1, ncols

528 empty_arr = -1 * np.ones((nrows,), dtype=np.intp)

529 return missing_arr.take(

530 empty_arr, allow_fill=True, fill_value=fill_value

531 )

532 elif isinstance(empty_dtype, ExtensionDtype):

533 # TODO: no tests get here, a handful would if we disabled

534 # the dt64tz special-case above (which is faster)

535 cls = empty_dtype.construct_array_type()

536 missing_arr = cls._empty(shape=self.shape, dtype=empty_dtype)

537 missing_arr[:] = fill_value

538 return missing_arr

539 else:

540 # NB: we should never get here with empty_dtype integer or bool;

541 # if we did, the missing_arr.fill would cast to gibberish

542 missing_arr = np.empty(self.shape, dtype=empty_dtype)

543 missing_arr.fill(fill_value)

544 return missing_arr

545

546 if (not self.indexers) and (not self.block._can_consolidate):

547 # preserve these for validation in concat_compat

548 return self.block.values

549

550 if self.block.is_bool:

551 # External code requested filling/upcasting, bool values must

552 # be upcasted to object to avoid being upcasted to numeric.

553 values = self.block.astype(np.dtype("object")).values

554 else:

555 # No dtype upcasting is done here, it will be performed during

556 # concatenation itself.

557 values = self.block.values

558

559 if not self.indexers:

560 # If there's no indexing to be done, we want to signal outside

561 # code that this array must be copied explicitly. This is done

562 # by returning a view and checking `retval.base`.

563 values = values.view()

564

565 else:

566 for ax, indexer in self.indexers.items():

567 values = algos.take_nd(values, indexer, axis=ax)

568

569 return values

570

571

572def _concatenate_join_units(join_units: list[JoinUnit], copy: bool) -> ArrayLike:

573 """

574 Concatenate values from several join units along axis=1.

575 """

576 empty_dtype = _get_empty_dtype(join_units)

577

578 has_none_blocks = any(unit.block.dtype.kind == "V" for unit in join_units)

579 upcasted_na = _dtype_to_na_value(empty_dtype, has_none_blocks)

580

581 to_concat = [

582 ju.get_reindexed_values(empty_dtype=empty_dtype, upcasted_na=upcasted_na)

583 for ju in join_units

584 ]

585

586 if len(to_concat) == 1:

587 # Only one block, nothing to concatenate.

588 concat_values = to_concat[0]

589 if copy:

590 if isinstance(concat_values, np.ndarray):

591 # non-reindexed (=not yet copied) arrays are made into a view

592 # in JoinUnit.get_reindexed_values

593 if concat_values.base is not None:

594 concat_values = concat_values.copy()

595 else:

596 concat_values = concat_values.copy()

597

598 elif any(is_1d_only_ea_dtype(t.dtype) for t in to_concat):

599 # TODO(EA2D): special case not needed if all EAs used HybridBlocks

600

601 # error: No overload variant of "__getitem__" of "ExtensionArray" matches

602 # argument type "Tuple[int, slice]"

603 to_concat = [

604 t

605 if is_1d_only_ea_dtype(t.dtype)

606 else t[0, :] # type: ignore[call-overload]

607 for t in to_concat

608 ]

609 concat_values = concat_compat(to_concat, axis=0, ea_compat_axis=True)

610 concat_values = ensure_block_shape(concat_values, 2)

611

612 else:

613 concat_values = concat_compat(to_concat, axis=1)

614

615 return concat_values

616

617

618def _dtype_to_na_value(dtype: DtypeObj, has_none_blocks: bool):

619 """

620 Find the NA value to go with this dtype.

621 """

622 if isinstance(dtype, ExtensionDtype):

623 return dtype.na_value

624 elif dtype.kind in ["m", "M"]:

625 return dtype.type("NaT")

626 elif dtype.kind in ["f", "c"]:

627 return dtype.type("NaN")

628 elif dtype.kind == "b":

629 # different from missing.na_value_for_dtype

630 return None

631 elif dtype.kind in ["i", "u"]:

632 if not has_none_blocks:

633 # different from missing.na_value_for_dtype

634 return None

635 return np.nan

636 elif dtype.kind == "O":

637 return np.nan

638 raise NotImplementedError

639

640

641def _get_empty_dtype(join_units: Sequence[JoinUnit]) -> DtypeObj:

642 """

643 Return dtype and N/A values to use when concatenating specified units.

644

645 Returned N/A value may be None which means there was no casting involved.

646

647 Returns

648 -------

649 dtype

650 """

651 if len(join_units) == 1:

652 blk = join_units[0].block

653 return blk.dtype

654

655 if _is_uniform_reindex(join_units):

656 empty_dtype = join_units[0].block.dtype

657 return empty_dtype

658

659 has_none_blocks = any(unit.block.dtype.kind == "V" for unit in join_units)

660

661 dtypes = [unit.dtype for unit in join_units if not unit.is_na]

662 if not len(dtypes):

663 dtypes = [unit.dtype for unit in join_units if unit.block.dtype.kind != "V"]

664

665 dtype = find_common_type(dtypes)

666 if has_none_blocks:

667 dtype = ensure_dtype_can_hold_na(dtype)

668 return dtype

669

670

671def _is_uniform_join_units(join_units: list[JoinUnit]) -> bool:

672 """

673 Check if the join units consist of blocks of uniform type that can

674 be concatenated using Block.concat_same_type instead of the generic

675 _concatenate_join_units (which uses `concat_compat`).

676

677 """

678 first = join_units[0].block

679 if first.dtype.kind == "V":

680 return False

681 return (

682 # exclude cases where a) ju.block is None or b) we have e.g. Int64+int64

683 all(type(ju.block) is type(first) for ju in join_units)

684 and

685 # e.g. DatetimeLikeBlock can be dt64 or td64, but these are not uniform

686 all(

687 is_dtype_equal(ju.block.dtype, first.dtype)

688 # GH#42092 we only want the dtype_equal check for non-numeric blocks

689 # (for now, may change but that would need a deprecation)

690 or ju.block.dtype.kind in ["b", "i", "u"]

691 for ju in join_units

692 )

693 and

694 # no blocks that would get missing values (can lead to type upcasts)

695 # unless we're an extension dtype.

696 all(not ju.is_na or ju.block.is_extension for ju in join_units)

697 and

698 # no blocks with indexers (as then the dimensions do not fit)

699 all(not ju.indexers for ju in join_units)

700 and

701 # only use this path when there is something to concatenate

702 len(join_units) > 1

703 )

704

705

706def _is_uniform_reindex(join_units) -> bool:

707 return (

708 # TODO: should this be ju.block._can_hold_na?

709 all(ju.block.is_extension for ju in join_units)

710 and len({ju.block.dtype.name for ju in join_units}) == 1

711 )

712

713

714def _trim_join_unit(join_unit: JoinUnit, length: int) -> JoinUnit:

715 """

716 Reduce join_unit's shape along item axis to length.

717

718 Extra items that didn't fit are returned as a separate block.

719 """

720 if 0 not in join_unit.indexers:

721 extra_indexers = join_unit.indexers

722

723 if join_unit.block is None:

724 extra_block = None

725 else:

726 extra_block = join_unit.block.getitem_block(slice(length, None))

727 join_unit.block = join_unit.block.getitem_block(slice(length))

728 else:

729 extra_block = join_unit.block

730

731 extra_indexers = cp.copy(join_unit.indexers)

732 extra_indexers[0] = extra_indexers[0][length:]

733 join_unit.indexers[0] = join_unit.indexers[0][:length]

734

735 extra_shape = (join_unit.shape[0] - length,) + join_unit.shape[1:]

736 join_unit.shape = (length,) + join_unit.shape[1:]

737

738 return JoinUnit(block=extra_block, indexers=extra_indexers, shape=extra_shape)

739

740

741def _combine_concat_plans(plans):

742 """

743 Combine multiple concatenation plans into one.

744

745 existing_plan is updated in-place.

746

747 We only get here with concat_axis == 1.

748 """

749 if len(plans) == 1:

750 for p in plans[0]:

751 yield p[0], [p[1]]

752

753 else:

754 # singleton list so we can modify it as a side-effect within _next_or_none

755 num_ended = [0]

756

757 def _next_or_none(seq):

758 retval = next(seq, None)

759 if retval is None:

760 num_ended[0] += 1

761 return retval

762

763 plans = list(map(iter, plans))

764 next_items = list(map(_next_or_none, plans))

765

766 while num_ended[0] != len(next_items):

767 if num_ended[0] > 0:

768 raise ValueError("Plan shapes are not aligned")

769

770 placements, units = zip(*next_items)

771

772 lengths = list(map(len, placements))

773 min_len, max_len = min(lengths), max(lengths)

774

775 if min_len == max_len:

776 yield placements[0], units

777 next_items[:] = map(_next_or_none, plans)

778 else:

779 yielded_placement = None

780 yielded_units = [None] * len(next_items)

781 for i, (plc, unit) in enumerate(next_items):

782 yielded_units[i] = unit

783 if len(plc) > min_len:

784 # _trim_join_unit updates unit in place, so only

785 # placement needs to be sliced to skip min_len.

786 next_items[i] = (plc[min_len:], _trim_join_unit(unit, min_len))

787 else:

788 yielded_placement = plc

789 next_items[i] = _next_or_none(plans[i])

790

791 yield yielded_placement, yielded_units