Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pandas/core/internals/concat.py: 13%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

350 statements  

1from __future__ import annotations 

2 

3import copy as cp 

4import itertools 

5from typing import ( 

6 TYPE_CHECKING, 

7 Sequence, 

8 cast, 

9) 

10 

11import numpy as np 

12 

13from pandas._libs import ( 

14 NaT, 

15 internals as libinternals, 

16) 

17from pandas._libs.missing import NA 

18from pandas._typing import ( 

19 ArrayLike, 

20 AxisInt, 

21 DtypeObj, 

22 Manager, 

23 Shape, 

24) 

25from pandas.util._decorators import cache_readonly 

26 

27from pandas.core.dtypes.astype import astype_array 

28from pandas.core.dtypes.cast import ( 

29 ensure_dtype_can_hold_na, 

30 find_common_type, 

31 np_find_common_type, 

32) 

33from pandas.core.dtypes.common import ( 

34 is_1d_only_ea_dtype, 

35 is_dtype_equal, 

36 is_scalar, 

37 needs_i8_conversion, 

38) 

39from pandas.core.dtypes.concat import concat_compat 

40from pandas.core.dtypes.dtypes import ( 

41 DatetimeTZDtype, 

42 ExtensionDtype, 

43) 

44from pandas.core.dtypes.missing import ( 

45 is_valid_na_for_dtype, 

46 isna, 

47 isna_all, 

48) 

49 

50import pandas.core.algorithms as algos 

51from pandas.core.arrays import ( 

52 DatetimeArray, 

53 ExtensionArray, 

54) 

55from pandas.core.arrays.sparse import SparseDtype 

56from pandas.core.construction import ensure_wrapped_if_datetimelike 

57from pandas.core.internals.array_manager import ( 

58 ArrayManager, 

59 NullArrayProxy, 

60) 

61from pandas.core.internals.blocks import ( 

62 ensure_block_shape, 

63 new_block_2d, 

64) 

65from pandas.core.internals.managers import BlockManager 

66 

67if TYPE_CHECKING: 

68 from pandas import Index 

69 from pandas.core.internals.blocks import Block 

70 

71 

72def _concatenate_array_managers( 

73 mgrs_indexers, axes: list[Index], concat_axis: AxisInt, copy: bool 

74) -> Manager: 

75 """ 

76 Concatenate array managers into one. 

77 

78 Parameters 

79 ---------- 

80 mgrs_indexers : list of (ArrayManager, {axis: indexer,...}) tuples 

81 axes : list of Index 

82 concat_axis : int 

83 copy : bool 

84 

85 Returns 

86 ------- 

87 ArrayManager 

88 """ 

89 # reindex all arrays 

90 mgrs = [] 

91 for mgr, indexers in mgrs_indexers: 

92 axis1_made_copy = False 

93 for ax, indexer in indexers.items(): 

94 mgr = mgr.reindex_indexer( 

95 axes[ax], indexer, axis=ax, allow_dups=True, use_na_proxy=True 

96 ) 

97 if ax == 1 and indexer is not None: 

98 axis1_made_copy = True 

99 if copy and concat_axis == 0 and not axis1_made_copy: 

100 # for concat_axis 1 we will always get a copy through concat_arrays 

101 mgr = mgr.copy() 

102 mgrs.append(mgr) 

103 

104 if concat_axis == 1: 

105 # concatting along the rows -> concat the reindexed arrays 

106 # TODO(ArrayManager) doesn't yet preserve the correct dtype 

107 arrays = [ 

108 concat_arrays([mgrs[i].arrays[j] for i in range(len(mgrs))]) 

109 for j in range(len(mgrs[0].arrays)) 

110 ] 

111 else: 

112 # concatting along the columns -> combine reindexed arrays in a single manager 

113 assert concat_axis == 0 

114 arrays = list(itertools.chain.from_iterable([mgr.arrays for mgr in mgrs])) 

115 

116 new_mgr = ArrayManager(arrays, [axes[1], axes[0]], verify_integrity=False) 

117 return new_mgr 

118 

119 

120def concat_arrays(to_concat: list) -> ArrayLike: 

121 """ 

122 Alternative for concat_compat but specialized for use in the ArrayManager. 

123 

124 Differences: only deals with 1D arrays (no axis keyword), assumes 

125 ensure_wrapped_if_datetimelike and does not skip empty arrays to determine 

126 the dtype. 

127 In addition ensures that all NullArrayProxies get replaced with actual 

128 arrays. 

129 

130 Parameters 

131 ---------- 

132 to_concat : list of arrays 

133 

134 Returns 

135 ------- 

136 np.ndarray or ExtensionArray 

137 """ 

138 # ignore the all-NA proxies to determine the resulting dtype 

139 to_concat_no_proxy = [x for x in to_concat if not isinstance(x, NullArrayProxy)] 

140 

141 dtypes = {x.dtype for x in to_concat_no_proxy} 

142 single_dtype = len(dtypes) == 1 

143 

144 if single_dtype: 

145 target_dtype = to_concat_no_proxy[0].dtype 

146 elif all(x.kind in ["i", "u", "b"] and isinstance(x, np.dtype) for x in dtypes): 

147 # GH#42092 

148 target_dtype = np_find_common_type(*dtypes) 

149 else: 

150 target_dtype = find_common_type([arr.dtype for arr in to_concat_no_proxy]) 

151 

152 to_concat = [ 

153 arr.to_array(target_dtype) 

154 if isinstance(arr, NullArrayProxy) 

155 else astype_array(arr, target_dtype, copy=False) 

156 for arr in to_concat 

157 ] 

158 

159 if isinstance(to_concat[0], ExtensionArray): 

160 cls = type(to_concat[0]) 

161 return cls._concat_same_type(to_concat) 

162 

163 result = np.concatenate(to_concat) 

164 

165 # TODO decide on exact behaviour (we shouldn't do this only for empty result) 

166 # see https://github.com/pandas-dev/pandas/issues/39817 

167 if len(result) == 0: 

168 # all empties -> check for bool to not coerce to float 

169 kinds = {obj.dtype.kind for obj in to_concat_no_proxy} 

170 if len(kinds) != 1: 

171 if "b" in kinds: 

172 result = result.astype(object) 

173 return result 

174 

175 

176def concatenate_managers( 

177 mgrs_indexers, axes: list[Index], concat_axis: AxisInt, copy: bool 

178) -> Manager: 

179 """ 

180 Concatenate block managers into one. 

181 

182 Parameters 

183 ---------- 

184 mgrs_indexers : list of (BlockManager, {axis: indexer,...}) tuples 

185 axes : list of Index 

186 concat_axis : int 

187 copy : bool 

188 

189 Returns 

190 ------- 

191 BlockManager 

192 """ 

193 # TODO(ArrayManager) this assumes that all managers are of the same type 

194 if isinstance(mgrs_indexers[0][0], ArrayManager): 

195 return _concatenate_array_managers(mgrs_indexers, axes, concat_axis, copy) 

196 

197 # Assertions disabled for performance 

198 # for tup in mgrs_indexers: 

199 # # caller is responsible for ensuring this 

200 # indexers = tup[1] 

201 # assert concat_axis not in indexers 

202 

203 if concat_axis == 0: 

204 return _concat_managers_axis0(mgrs_indexers, axes, copy) 

205 

206 mgrs_indexers = _maybe_reindex_columns_na_proxy(axes, mgrs_indexers) 

207 

208 concat_plans = [ 

209 _get_mgr_concatenation_plan(mgr, indexers) for mgr, indexers in mgrs_indexers 

210 ] 

211 concat_plan = _combine_concat_plans(concat_plans) 

212 blocks = [] 

213 

214 for placement, join_units in concat_plan: 

215 unit = join_units[0] 

216 blk = unit.block 

217 

218 if len(join_units) == 1 and not join_units[0].indexers: 

219 values = blk.values 

220 if copy: 

221 values = values.copy() 

222 else: 

223 values = values.view() 

224 fastpath = True 

225 elif _is_uniform_join_units(join_units): 

226 vals = [ju.block.values for ju in join_units] 

227 

228 if not blk.is_extension: 

229 # _is_uniform_join_units ensures a single dtype, so 

230 # we can use np.concatenate, which is more performant 

231 # than concat_compat 

232 values = np.concatenate(vals, axis=1) 

233 else: 

234 # TODO(EA2D): special-casing not needed with 2D EAs 

235 values = concat_compat(vals, axis=1) 

236 values = ensure_block_shape(values, ndim=2) 

237 

238 values = ensure_wrapped_if_datetimelike(values) 

239 

240 fastpath = blk.values.dtype == values.dtype 

241 else: 

242 values = _concatenate_join_units(join_units, copy=copy) 

243 fastpath = False 

244 

245 if fastpath: 

246 b = blk.make_block_same_class(values, placement=placement) 

247 else: 

248 b = new_block_2d(values, placement=placement) 

249 

250 blocks.append(b) 

251 

252 return BlockManager(tuple(blocks), axes) 

253 

254 

255def _concat_managers_axis0( 

256 mgrs_indexers, axes: list[Index], copy: bool 

257) -> BlockManager: 

258 """ 

259 concat_managers specialized to concat_axis=0, with reindexing already 

260 having been done in _maybe_reindex_columns_na_proxy. 

261 """ 

262 had_reindexers = { 

263 i: len(mgrs_indexers[i][1]) > 0 for i in range(len(mgrs_indexers)) 

264 } 

265 mgrs_indexers = _maybe_reindex_columns_na_proxy(axes, mgrs_indexers) 

266 

267 mgrs = [x[0] for x in mgrs_indexers] 

268 

269 offset = 0 

270 blocks = [] 

271 for i, mgr in enumerate(mgrs): 

272 # If we already reindexed, then we definitely don't need another copy 

273 made_copy = had_reindexers[i] 

274 

275 for blk in mgr.blocks: 

276 if made_copy: 

277 nb = blk.copy(deep=False) 

278 elif copy: 

279 nb = blk.copy() 

280 else: 

281 # by slicing instead of copy(deep=False), we get a new array 

282 # object, see test_concat_copy 

283 nb = blk.getitem_block(slice(None)) 

284 nb._mgr_locs = nb._mgr_locs.add(offset) 

285 blocks.append(nb) 

286 

287 offset += len(mgr.items) 

288 

289 result = BlockManager(tuple(blocks), axes) 

290 return result 

291 

292 

293def _maybe_reindex_columns_na_proxy( 

294 axes: list[Index], mgrs_indexers: list[tuple[BlockManager, dict[int, np.ndarray]]] 

295) -> list[tuple[BlockManager, dict[int, np.ndarray]]]: 

296 """ 

297 Reindex along columns so that all of the BlockManagers being concatenated 

298 have matching columns. 

299 

300 Columns added in this reindexing have dtype=np.void, indicating they 

301 should be ignored when choosing a column's final dtype. 

302 """ 

303 new_mgrs_indexers: list[tuple[BlockManager, dict[int, np.ndarray]]] = [] 

304 

305 for mgr, indexers in mgrs_indexers: 

306 # For axis=0 (i.e. columns) we use_na_proxy and only_slice, so this 

307 # is a cheap reindexing. 

308 for i, indexer in indexers.items(): 

309 mgr = mgr.reindex_indexer( 

310 axes[i], 

311 indexers[i], 

312 axis=i, 

313 copy=False, 

314 only_slice=True, # only relevant for i==0 

315 allow_dups=True, 

316 use_na_proxy=True, # only relevant for i==0 

317 ) 

318 new_mgrs_indexers.append((mgr, {})) 

319 return new_mgrs_indexers 

320 

321 

322def _get_mgr_concatenation_plan(mgr: BlockManager, indexers: dict[int, np.ndarray]): 

323 """ 

324 Construct concatenation plan for given block manager and indexers. 

325 

326 Parameters 

327 ---------- 

328 mgr : BlockManager 

329 indexers : dict of {axis: indexer} 

330 

331 Returns 

332 ------- 

333 plan : list of (BlockPlacement, JoinUnit) tuples 

334 

335 """ 

336 assert len(indexers) == 0 

337 

338 # Calculate post-reindex shape, save for item axis which will be separate 

339 # for each block anyway. 

340 mgr_shape_list = list(mgr.shape) 

341 for ax, indexer in indexers.items(): 

342 mgr_shape_list[ax] = len(indexer) 

343 mgr_shape = tuple(mgr_shape_list) 

344 

345 assert 0 not in indexers 

346 

347 if mgr.is_single_block: 

348 blk = mgr.blocks[0] 

349 return [(blk.mgr_locs, JoinUnit(blk, mgr_shape, indexers))] 

350 

351 blknos = mgr.blknos 

352 blklocs = mgr.blklocs 

353 

354 plan = [] 

355 for blkno, placements in libinternals.get_blkno_placements(blknos, group=False): 

356 assert placements.is_slice_like 

357 assert blkno != -1 

358 

359 join_unit_indexers = indexers.copy() 

360 

361 shape_list = list(mgr_shape) 

362 shape_list[0] = len(placements) 

363 shape = tuple(shape_list) 

364 

365 blk = mgr.blocks[blkno] 

366 ax0_blk_indexer = blklocs[placements.indexer] 

367 

368 unit_no_ax0_reindexing = ( 

369 len(placements) == len(blk.mgr_locs) 

370 and 

371 # Fastpath detection of join unit not 

372 # needing to reindex its block: no ax0 

373 # reindexing took place and block 

374 # placement was sequential before. 

375 ( 

376 (blk.mgr_locs.is_slice_like and blk.mgr_locs.as_slice.step == 1) 

377 or 

378 # Slow-ish detection: all indexer locs 

379 # are sequential (and length match is 

380 # checked above). 

381 (np.diff(ax0_blk_indexer) == 1).all() 

382 ) 

383 ) 

384 

385 # Omit indexer if no item reindexing is required. 

386 if unit_no_ax0_reindexing: 

387 join_unit_indexers.pop(0, None) 

388 else: 

389 join_unit_indexers[0] = ax0_blk_indexer 

390 

391 unit = JoinUnit(blk, shape, join_unit_indexers) 

392 

393 plan.append((placements, unit)) 

394 

395 return plan 

396 

397 

398class JoinUnit: 

399 def __init__(self, block: Block, shape: Shape, indexers=None) -> None: 

400 # Passing shape explicitly is required for cases when block is None. 

401 # Note: block is None implies indexers is None, but not vice-versa 

402 if indexers is None: 

403 indexers = {} 

404 self.block = block 

405 self.indexers = indexers 

406 self.shape = shape 

407 

408 def __repr__(self) -> str: 

409 return f"{type(self).__name__}({repr(self.block)}, {self.indexers})" 

410 

411 @cache_readonly 

412 def needs_filling(self) -> bool: 

413 for indexer in self.indexers.values(): 

414 # FIXME: cache results of indexer == -1 checks. 

415 if (indexer == -1).any(): 

416 return True 

417 

418 return False 

419 

420 @cache_readonly 

421 def dtype(self) -> DtypeObj: 

422 blk = self.block 

423 if blk.values.dtype.kind == "V": 

424 raise AssertionError("Block is None, no dtype") 

425 

426 if not self.needs_filling: 

427 return blk.dtype 

428 return ensure_dtype_can_hold_na(blk.dtype) 

429 

430 def _is_valid_na_for(self, dtype: DtypeObj) -> bool: 

431 """ 

432 Check that we are all-NA of a type/dtype that is compatible with this dtype. 

433 Augments `self.is_na` with an additional check of the type of NA values. 

434 """ 

435 if not self.is_na: 

436 return False 

437 if self.block.dtype.kind == "V": 

438 return True 

439 

440 if self.dtype == object: 

441 values = self.block.values 

442 return all(is_valid_na_for_dtype(x, dtype) for x in values.ravel(order="K")) 

443 

444 na_value = self.block.fill_value 

445 if na_value is NaT and not is_dtype_equal(self.dtype, dtype): 

446 # e.g. we are dt64 and other is td64 

447 # fill_values match but we should not cast self.block.values to dtype 

448 # TODO: this will need updating if we ever have non-nano dt64/td64 

449 return False 

450 

451 if na_value is NA and needs_i8_conversion(dtype): 

452 # FIXME: kludge; test_append_empty_frame_with_timedelta64ns_nat 

453 # e.g. self.dtype == "Int64" and dtype is td64, we dont want 

454 # to consider these as matching 

455 return False 

456 

457 # TODO: better to use can_hold_element? 

458 return is_valid_na_for_dtype(na_value, dtype) 

459 

460 @cache_readonly 

461 def is_na(self) -> bool: 

462 blk = self.block 

463 if blk.dtype.kind == "V": 

464 return True 

465 

466 if not blk._can_hold_na: 

467 return False 

468 

469 values = blk.values 

470 if values.size == 0: 

471 return True 

472 if isinstance(values.dtype, SparseDtype): 

473 return False 

474 

475 if values.ndim == 1: 

476 # TODO(EA2D): no need for special case with 2D EAs 

477 val = values[0] 

478 if not is_scalar(val) or not isna(val): 

479 # ideally isna_all would do this short-circuiting 

480 return False 

481 return isna_all(values) 

482 else: 

483 val = values[0][0] 

484 if not is_scalar(val) or not isna(val): 

485 # ideally isna_all would do this short-circuiting 

486 return False 

487 return all(isna_all(row) for row in values) 

488 

489 def get_reindexed_values(self, empty_dtype: DtypeObj, upcasted_na) -> ArrayLike: 

490 values: ArrayLike 

491 

492 if upcasted_na is None and self.block.dtype.kind != "V": 

493 # No upcasting is necessary 

494 fill_value = self.block.fill_value 

495 values = self.block.values 

496 else: 

497 fill_value = upcasted_na 

498 

499 if self._is_valid_na_for(empty_dtype): 

500 # note: always holds when self.block.dtype.kind == "V" 

501 blk_dtype = self.block.dtype 

502 

503 if blk_dtype == np.dtype("object"): 

504 # we want to avoid filling with np.nan if we are 

505 # using None; we already know that we are all 

506 # nulls 

507 values = self.block.values.ravel(order="K") 

508 if len(values) and values[0] is None: 

509 fill_value = None 

510 

511 if isinstance(empty_dtype, DatetimeTZDtype): 

512 # NB: exclude e.g. pyarrow[dt64tz] dtypes 

513 i8values = np.full(self.shape, fill_value._value) 

514 return DatetimeArray(i8values, dtype=empty_dtype) 

515 

516 elif is_1d_only_ea_dtype(empty_dtype): 

517 if is_dtype_equal(blk_dtype, empty_dtype) and self.indexers: 

518 # avoid creating new empty array if we already have an array 

519 # with correct dtype that can be reindexed 

520 pass 

521 else: 

522 empty_dtype = cast(ExtensionDtype, empty_dtype) 

523 cls = empty_dtype.construct_array_type() 

524 

525 missing_arr = cls._from_sequence([], dtype=empty_dtype) 

526 ncols, nrows = self.shape 

527 assert ncols == 1, ncols 

528 empty_arr = -1 * np.ones((nrows,), dtype=np.intp) 

529 return missing_arr.take( 

530 empty_arr, allow_fill=True, fill_value=fill_value 

531 ) 

532 elif isinstance(empty_dtype, ExtensionDtype): 

533 # TODO: no tests get here, a handful would if we disabled 

534 # the dt64tz special-case above (which is faster) 

535 cls = empty_dtype.construct_array_type() 

536 missing_arr = cls._empty(shape=self.shape, dtype=empty_dtype) 

537 missing_arr[:] = fill_value 

538 return missing_arr 

539 else: 

540 # NB: we should never get here with empty_dtype integer or bool; 

541 # if we did, the missing_arr.fill would cast to gibberish 

542 missing_arr = np.empty(self.shape, dtype=empty_dtype) 

543 missing_arr.fill(fill_value) 

544 return missing_arr 

545 

546 if (not self.indexers) and (not self.block._can_consolidate): 

547 # preserve these for validation in concat_compat 

548 return self.block.values 

549 

550 if self.block.is_bool: 

551 # External code requested filling/upcasting, bool values must 

552 # be upcasted to object to avoid being upcasted to numeric. 

553 values = self.block.astype(np.dtype("object")).values 

554 else: 

555 # No dtype upcasting is done here, it will be performed during 

556 # concatenation itself. 

557 values = self.block.values 

558 

559 if not self.indexers: 

560 # If there's no indexing to be done, we want to signal outside 

561 # code that this array must be copied explicitly. This is done 

562 # by returning a view and checking `retval.base`. 

563 values = values.view() 

564 

565 else: 

566 for ax, indexer in self.indexers.items(): 

567 values = algos.take_nd(values, indexer, axis=ax) 

568 

569 return values 

570 

571 

572def _concatenate_join_units(join_units: list[JoinUnit], copy: bool) -> ArrayLike: 

573 """ 

574 Concatenate values from several join units along axis=1. 

575 """ 

576 empty_dtype = _get_empty_dtype(join_units) 

577 

578 has_none_blocks = any(unit.block.dtype.kind == "V" for unit in join_units) 

579 upcasted_na = _dtype_to_na_value(empty_dtype, has_none_blocks) 

580 

581 to_concat = [ 

582 ju.get_reindexed_values(empty_dtype=empty_dtype, upcasted_na=upcasted_na) 

583 for ju in join_units 

584 ] 

585 

586 if len(to_concat) == 1: 

587 # Only one block, nothing to concatenate. 

588 concat_values = to_concat[0] 

589 if copy: 

590 if isinstance(concat_values, np.ndarray): 

591 # non-reindexed (=not yet copied) arrays are made into a view 

592 # in JoinUnit.get_reindexed_values 

593 if concat_values.base is not None: 

594 concat_values = concat_values.copy() 

595 else: 

596 concat_values = concat_values.copy() 

597 

598 elif any(is_1d_only_ea_dtype(t.dtype) for t in to_concat): 

599 # TODO(EA2D): special case not needed if all EAs used HybridBlocks 

600 

601 # error: No overload variant of "__getitem__" of "ExtensionArray" matches 

602 # argument type "Tuple[int, slice]" 

603 to_concat = [ 

604 t 

605 if is_1d_only_ea_dtype(t.dtype) 

606 else t[0, :] # type: ignore[call-overload] 

607 for t in to_concat 

608 ] 

609 concat_values = concat_compat(to_concat, axis=0, ea_compat_axis=True) 

610 concat_values = ensure_block_shape(concat_values, 2) 

611 

612 else: 

613 concat_values = concat_compat(to_concat, axis=1) 

614 

615 return concat_values 

616 

617 

618def _dtype_to_na_value(dtype: DtypeObj, has_none_blocks: bool): 

619 """ 

620 Find the NA value to go with this dtype. 

621 """ 

622 if isinstance(dtype, ExtensionDtype): 

623 return dtype.na_value 

624 elif dtype.kind in ["m", "M"]: 

625 return dtype.type("NaT") 

626 elif dtype.kind in ["f", "c"]: 

627 return dtype.type("NaN") 

628 elif dtype.kind == "b": 

629 # different from missing.na_value_for_dtype 

630 return None 

631 elif dtype.kind in ["i", "u"]: 

632 if not has_none_blocks: 

633 # different from missing.na_value_for_dtype 

634 return None 

635 return np.nan 

636 elif dtype.kind == "O": 

637 return np.nan 

638 raise NotImplementedError 

639 

640 

641def _get_empty_dtype(join_units: Sequence[JoinUnit]) -> DtypeObj: 

642 """ 

643 Return dtype and N/A values to use when concatenating specified units. 

644 

645 Returned N/A value may be None which means there was no casting involved. 

646 

647 Returns 

648 ------- 

649 dtype 

650 """ 

651 if len(join_units) == 1: 

652 blk = join_units[0].block 

653 return blk.dtype 

654 

655 if _is_uniform_reindex(join_units): 

656 empty_dtype = join_units[0].block.dtype 

657 return empty_dtype 

658 

659 has_none_blocks = any(unit.block.dtype.kind == "V" for unit in join_units) 

660 

661 dtypes = [unit.dtype for unit in join_units if not unit.is_na] 

662 if not len(dtypes): 

663 dtypes = [unit.dtype for unit in join_units if unit.block.dtype.kind != "V"] 

664 

665 dtype = find_common_type(dtypes) 

666 if has_none_blocks: 

667 dtype = ensure_dtype_can_hold_na(dtype) 

668 return dtype 

669 

670 

671def _is_uniform_join_units(join_units: list[JoinUnit]) -> bool: 

672 """ 

673 Check if the join units consist of blocks of uniform type that can 

674 be concatenated using Block.concat_same_type instead of the generic 

675 _concatenate_join_units (which uses `concat_compat`). 

676 

677 """ 

678 first = join_units[0].block 

679 if first.dtype.kind == "V": 

680 return False 

681 return ( 

682 # exclude cases where a) ju.block is None or b) we have e.g. Int64+int64 

683 all(type(ju.block) is type(first) for ju in join_units) 

684 and 

685 # e.g. DatetimeLikeBlock can be dt64 or td64, but these are not uniform 

686 all( 

687 is_dtype_equal(ju.block.dtype, first.dtype) 

688 # GH#42092 we only want the dtype_equal check for non-numeric blocks 

689 # (for now, may change but that would need a deprecation) 

690 or ju.block.dtype.kind in ["b", "i", "u"] 

691 for ju in join_units 

692 ) 

693 and 

694 # no blocks that would get missing values (can lead to type upcasts) 

695 # unless we're an extension dtype. 

696 all(not ju.is_na or ju.block.is_extension for ju in join_units) 

697 and 

698 # no blocks with indexers (as then the dimensions do not fit) 

699 all(not ju.indexers for ju in join_units) 

700 and 

701 # only use this path when there is something to concatenate 

702 len(join_units) > 1 

703 ) 

704 

705 

706def _is_uniform_reindex(join_units) -> bool: 

707 return ( 

708 # TODO: should this be ju.block._can_hold_na? 

709 all(ju.block.is_extension for ju in join_units) 

710 and len({ju.block.dtype.name for ju in join_units}) == 1 

711 ) 

712 

713 

714def _trim_join_unit(join_unit: JoinUnit, length: int) -> JoinUnit: 

715 """ 

716 Reduce join_unit's shape along item axis to length. 

717 

718 Extra items that didn't fit are returned as a separate block. 

719 """ 

720 if 0 not in join_unit.indexers: 

721 extra_indexers = join_unit.indexers 

722 

723 if join_unit.block is None: 

724 extra_block = None 

725 else: 

726 extra_block = join_unit.block.getitem_block(slice(length, None)) 

727 join_unit.block = join_unit.block.getitem_block(slice(length)) 

728 else: 

729 extra_block = join_unit.block 

730 

731 extra_indexers = cp.copy(join_unit.indexers) 

732 extra_indexers[0] = extra_indexers[0][length:] 

733 join_unit.indexers[0] = join_unit.indexers[0][:length] 

734 

735 extra_shape = (join_unit.shape[0] - length,) + join_unit.shape[1:] 

736 join_unit.shape = (length,) + join_unit.shape[1:] 

737 

738 return JoinUnit(block=extra_block, indexers=extra_indexers, shape=extra_shape) 

739 

740 

741def _combine_concat_plans(plans): 

742 """ 

743 Combine multiple concatenation plans into one. 

744 

745 existing_plan is updated in-place. 

746 

747 We only get here with concat_axis == 1. 

748 """ 

749 if len(plans) == 1: 

750 for p in plans[0]: 

751 yield p[0], [p[1]] 

752 

753 else: 

754 # singleton list so we can modify it as a side-effect within _next_or_none 

755 num_ended = [0] 

756 

757 def _next_or_none(seq): 

758 retval = next(seq, None) 

759 if retval is None: 

760 num_ended[0] += 1 

761 return retval 

762 

763 plans = list(map(iter, plans)) 

764 next_items = list(map(_next_or_none, plans)) 

765 

766 while num_ended[0] != len(next_items): 

767 if num_ended[0] > 0: 

768 raise ValueError("Plan shapes are not aligned") 

769 

770 placements, units = zip(*next_items) 

771 

772 lengths = list(map(len, placements)) 

773 min_len, max_len = min(lengths), max(lengths) 

774 

775 if min_len == max_len: 

776 yield placements[0], units 

777 next_items[:] = map(_next_or_none, plans) 

778 else: 

779 yielded_placement = None 

780 yielded_units = [None] * len(next_items) 

781 for i, (plc, unit) in enumerate(next_items): 

782 yielded_units[i] = unit 

783 if len(plc) > min_len: 

784 # _trim_join_unit updates unit in place, so only 

785 # placement needs to be sliced to skip min_len. 

786 next_items[i] = (plc[min_len:], _trim_join_unit(unit, min_len)) 

787 else: 

788 yielded_placement = plc 

789 next_items[i] = _next_or_none(plans[i]) 

790 

791 yield yielded_placement, yielded_units