Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pandas/core/sorting.py: 12%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

252 statements  

1""" miscellaneous sorting / groupby utilities """ 

2from __future__ import annotations 

3 

4from collections import defaultdict 

5from typing import ( 

6 TYPE_CHECKING, 

7 Callable, 

8 DefaultDict, 

9 Hashable, 

10 Iterable, 

11 Sequence, 

12 cast, 

13) 

14 

15import numpy as np 

16 

17from pandas._libs import ( 

18 algos, 

19 hashtable, 

20 lib, 

21) 

22from pandas._libs.hashtable import unique_label_indices 

23from pandas._typing import ( 

24 AxisInt, 

25 IndexKeyFunc, 

26 Level, 

27 NaPosition, 

28 Shape, 

29 SortKind, 

30 npt, 

31) 

32 

33from pandas.core.dtypes.common import ( 

34 ensure_int64, 

35 ensure_platform_int, 

36 is_extension_array_dtype, 

37) 

38from pandas.core.dtypes.generic import ( 

39 ABCMultiIndex, 

40 ABCRangeIndex, 

41) 

42from pandas.core.dtypes.missing import isna 

43 

44from pandas.core.construction import extract_array 

45 

46if TYPE_CHECKING: 

47 from pandas import MultiIndex 

48 from pandas.core.arrays import ExtensionArray 

49 from pandas.core.indexes.base import Index 

50 

51 

52def get_indexer_indexer( 

53 target: Index, 

54 level: Level | list[Level] | None, 

55 ascending: list[bool] | bool, 

56 kind: SortKind, 

57 na_position: NaPosition, 

58 sort_remaining: bool, 

59 key: IndexKeyFunc, 

60) -> npt.NDArray[np.intp] | None: 

61 """ 

62 Helper method that return the indexer according to input parameters for 

63 the sort_index method of DataFrame and Series. 

64 

65 Parameters 

66 ---------- 

67 target : Index 

68 level : int or level name or list of ints or list of level names 

69 ascending : bool or list of bools, default True 

70 kind : {'quicksort', 'mergesort', 'heapsort', 'stable'}, default 'quicksort' 

71 na_position : {'first', 'last'}, default 'last' 

72 sort_remaining : bool, default True 

73 key : callable, optional 

74 

75 Returns 

76 ------- 

77 Optional[ndarray[intp]] 

78 The indexer for the new index. 

79 """ 

80 

81 target = ensure_key_mapped(target, key, levels=level) 

82 target = target._sort_levels_monotonic() 

83 

84 if level is not None: 

85 _, indexer = target.sortlevel( 

86 level, ascending=ascending, sort_remaining=sort_remaining 

87 ) 

88 elif isinstance(target, ABCMultiIndex): 

89 indexer = lexsort_indexer( 

90 target._get_codes_for_sorting(), orders=ascending, na_position=na_position 

91 ) 

92 else: 

93 # Check monotonic-ness before sort an index (GH 11080) 

94 if (ascending and target.is_monotonic_increasing) or ( 

95 not ascending and target.is_monotonic_decreasing 

96 ): 

97 return None 

98 

99 # ascending can only be a Sequence for MultiIndex 

100 indexer = nargsort( 

101 target, 

102 kind=kind, 

103 ascending=cast(bool, ascending), 

104 na_position=na_position, 

105 ) 

106 return indexer 

107 

108 

109def get_group_index( 

110 labels, shape: Shape, sort: bool, xnull: bool 

111) -> npt.NDArray[np.int64]: 

112 """ 

113 For the particular label_list, gets the offsets into the hypothetical list 

114 representing the totally ordered cartesian product of all possible label 

115 combinations, *as long as* this space fits within int64 bounds; 

116 otherwise, though group indices identify unique combinations of 

117 labels, they cannot be deconstructed. 

118 - If `sort`, rank of returned ids preserve lexical ranks of labels. 

119 i.e. returned id's can be used to do lexical sort on labels; 

120 - If `xnull` nulls (-1 labels) are passed through. 

121 

122 Parameters 

123 ---------- 

124 labels : sequence of arrays 

125 Integers identifying levels at each location 

126 shape : tuple[int, ...] 

127 Number of unique levels at each location 

128 sort : bool 

129 If the ranks of returned ids should match lexical ranks of labels 

130 xnull : bool 

131 If true nulls are excluded. i.e. -1 values in the labels are 

132 passed through. 

133 

134 Returns 

135 ------- 

136 An array of type int64 where two elements are equal if their corresponding 

137 labels are equal at all location. 

138 

139 Notes 

140 ----- 

141 The length of `labels` and `shape` must be identical. 

142 """ 

143 

144 def _int64_cut_off(shape) -> int: 

145 acc = 1 

146 for i, mul in enumerate(shape): 

147 acc *= int(mul) 

148 if not acc < lib.i8max: 

149 return i 

150 return len(shape) 

151 

152 def maybe_lift(lab, size) -> tuple[np.ndarray, int]: 

153 # promote nan values (assigned -1 label in lab array) 

154 # so that all output values are non-negative 

155 return (lab + 1, size + 1) if (lab == -1).any() else (lab, size) 

156 

157 labels = [ensure_int64(x) for x in labels] 

158 lshape = list(shape) 

159 if not xnull: 

160 for i, (lab, size) in enumerate(zip(labels, shape)): 

161 lab, size = maybe_lift(lab, size) 

162 labels[i] = lab 

163 lshape[i] = size 

164 

165 labels = list(labels) 

166 

167 # Iteratively process all the labels in chunks sized so less 

168 # than lib.i8max unique int ids will be required for each chunk 

169 while True: 

170 # how many levels can be done without overflow: 

171 nlev = _int64_cut_off(lshape) 

172 

173 # compute flat ids for the first `nlev` levels 

174 stride = np.prod(lshape[1:nlev], dtype="i8") 

175 out = stride * labels[0].astype("i8", subok=False, copy=False) 

176 

177 for i in range(1, nlev): 

178 if lshape[i] == 0: 

179 stride = np.int64(0) 

180 else: 

181 stride //= lshape[i] 

182 out += labels[i] * stride 

183 

184 if xnull: # exclude nulls 

185 mask = labels[0] == -1 

186 for lab in labels[1:nlev]: 

187 mask |= lab == -1 

188 out[mask] = -1 

189 

190 if nlev == len(lshape): # all levels done! 

191 break 

192 

193 # compress what has been done so far in order to avoid overflow 

194 # to retain lexical ranks, obs_ids should be sorted 

195 comp_ids, obs_ids = compress_group_index(out, sort=sort) 

196 

197 labels = [comp_ids] + labels[nlev:] 

198 lshape = [len(obs_ids)] + lshape[nlev:] 

199 

200 return out 

201 

202 

203def get_compressed_ids( 

204 labels, sizes: Shape 

205) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.int64]]: 

206 """ 

207 Group_index is offsets into cartesian product of all possible labels. This 

208 space can be huge, so this function compresses it, by computing offsets 

209 (comp_ids) into the list of unique labels (obs_group_ids). 

210 

211 Parameters 

212 ---------- 

213 labels : list of label arrays 

214 sizes : tuple[int] of size of the levels 

215 

216 Returns 

217 ------- 

218 np.ndarray[np.intp] 

219 comp_ids 

220 np.ndarray[np.int64] 

221 obs_group_ids 

222 """ 

223 ids = get_group_index(labels, sizes, sort=True, xnull=False) 

224 return compress_group_index(ids, sort=True) 

225 

226 

227def is_int64_overflow_possible(shape: Shape) -> bool: 

228 the_prod = 1 

229 for x in shape: 

230 the_prod *= int(x) 

231 

232 return the_prod >= lib.i8max 

233 

234 

235def _decons_group_index( 

236 comp_labels: npt.NDArray[np.intp], shape: Shape 

237) -> list[npt.NDArray[np.intp]]: 

238 # reconstruct labels 

239 if is_int64_overflow_possible(shape): 

240 # at some point group indices are factorized, 

241 # and may not be deconstructed here! wrong path! 

242 raise ValueError("cannot deconstruct factorized group indices!") 

243 

244 label_list = [] 

245 factor = 1 

246 y = np.array(0) 

247 x = comp_labels 

248 for i in reversed(range(len(shape))): 

249 labels = (x - y) % (factor * shape[i]) // factor 

250 np.putmask(labels, comp_labels < 0, -1) 

251 label_list.append(labels) 

252 y = labels * factor 

253 factor *= shape[i] 

254 return label_list[::-1] 

255 

256 

257def decons_obs_group_ids( 

258 comp_ids: npt.NDArray[np.intp], 

259 obs_ids: npt.NDArray[np.intp], 

260 shape: Shape, 

261 labels: Sequence[npt.NDArray[np.signedinteger]], 

262 xnull: bool, 

263) -> list[npt.NDArray[np.intp]]: 

264 """ 

265 Reconstruct labels from observed group ids. 

266 

267 Parameters 

268 ---------- 

269 comp_ids : np.ndarray[np.intp] 

270 obs_ids: np.ndarray[np.intp] 

271 shape : tuple[int] 

272 labels : Sequence[np.ndarray[np.signedinteger]] 

273 xnull : bool 

274 If nulls are excluded; i.e. -1 labels are passed through. 

275 """ 

276 if not xnull: 

277 lift = np.fromiter(((a == -1).any() for a in labels), dtype=np.intp) 

278 arr_shape = np.asarray(shape, dtype=np.intp) + lift 

279 shape = tuple(arr_shape) 

280 

281 if not is_int64_overflow_possible(shape): 

282 # obs ids are deconstructable! take the fast route! 

283 out = _decons_group_index(obs_ids, shape) 

284 return out if xnull or not lift.any() else [x - y for x, y in zip(out, lift)] 

285 

286 indexer = unique_label_indices(comp_ids) 

287 return [lab[indexer].astype(np.intp, subok=False, copy=True) for lab in labels] 

288 

289 

290def indexer_from_factorized( 

291 labels, shape: Shape, compress: bool = True 

292) -> npt.NDArray[np.intp]: 

293 ids = get_group_index(labels, shape, sort=True, xnull=False) 

294 

295 if not compress: 

296 ngroups = (ids.size and ids.max()) + 1 

297 else: 

298 ids, obs = compress_group_index(ids, sort=True) 

299 ngroups = len(obs) 

300 

301 return get_group_index_sorter(ids, ngroups) 

302 

303 

304def lexsort_indexer( 

305 keys, orders=None, na_position: str = "last", key: Callable | None = None 

306) -> npt.NDArray[np.intp]: 

307 """ 

308 Performs lexical sorting on a set of keys 

309 

310 Parameters 

311 ---------- 

312 keys : sequence of arrays 

313 Sequence of ndarrays to be sorted by the indexer 

314 orders : bool or list of booleans, optional 

315 Determines the sorting order for each element in keys. If a list, 

316 it must be the same length as keys. This determines whether the 

317 corresponding element in keys should be sorted in ascending 

318 (True) or descending (False) order. if bool, applied to all 

319 elements as above. if None, defaults to True. 

320 na_position : {'first', 'last'}, default 'last' 

321 Determines placement of NA elements in the sorted list ("last" or "first") 

322 key : Callable, optional 

323 Callable key function applied to every element in keys before sorting 

324 

325 Returns 

326 ------- 

327 np.ndarray[np.intp] 

328 """ 

329 from pandas.core.arrays import Categorical 

330 

331 labels = [] 

332 shape = [] 

333 if isinstance(orders, bool): 

334 orders = [orders] * len(keys) 

335 elif orders is None: 

336 orders = [True] * len(keys) 

337 

338 keys = [ensure_key_mapped(k, key) for k in keys] 

339 

340 for k, order in zip(keys, orders): 

341 cat = Categorical(k, ordered=True) 

342 

343 if na_position not in ["last", "first"]: 

344 raise ValueError(f"invalid na_position: {na_position}") 

345 

346 n = len(cat.categories) 

347 codes = cat.codes.copy() 

348 

349 mask = cat.codes == -1 

350 if order: # ascending 

351 if na_position == "last": 

352 codes = np.where(mask, n, codes) 

353 elif na_position == "first": 

354 codes += 1 

355 else: # not order means descending 

356 if na_position == "last": 

357 codes = np.where(mask, n, n - codes - 1) 

358 elif na_position == "first": 

359 codes = np.where(mask, 0, n - codes) 

360 if mask.any(): 

361 n += 1 

362 

363 shape.append(n) 

364 labels.append(codes) 

365 

366 return indexer_from_factorized(labels, tuple(shape)) 

367 

368 

369def nargsort( 

370 items, 

371 kind: str = "quicksort", 

372 ascending: bool = True, 

373 na_position: str = "last", 

374 key: Callable | None = None, 

375 mask: npt.NDArray[np.bool_] | None = None, 

376) -> npt.NDArray[np.intp]: 

377 """ 

378 Intended to be a drop-in replacement for np.argsort which handles NaNs. 

379 

380 Adds ascending, na_position, and key parameters. 

381 

382 (GH #6399, #5231, #27237) 

383 

384 Parameters 

385 ---------- 

386 kind : str, default 'quicksort' 

387 ascending : bool, default True 

388 na_position : {'first', 'last'}, default 'last' 

389 key : Optional[Callable], default None 

390 mask : Optional[np.ndarray[bool]], default None 

391 Passed when called by ExtensionArray.argsort. 

392 

393 Returns 

394 ------- 

395 np.ndarray[np.intp] 

396 """ 

397 

398 if key is not None: 

399 items = ensure_key_mapped(items, key) 

400 return nargsort( 

401 items, 

402 kind=kind, 

403 ascending=ascending, 

404 na_position=na_position, 

405 key=None, 

406 mask=mask, 

407 ) 

408 

409 if isinstance(items, ABCRangeIndex): 

410 return items.argsort(ascending=ascending) # TODO: test coverage with key? 

411 elif not isinstance(items, ABCMultiIndex): 

412 items = extract_array(items) 

413 if mask is None: 

414 mask = np.asarray(isna(items)) # TODO: does this exclude MultiIndex too? 

415 

416 if is_extension_array_dtype(items): 

417 return items.argsort(ascending=ascending, kind=kind, na_position=na_position) 

418 else: 

419 items = np.asanyarray(items) 

420 

421 idx = np.arange(len(items)) 

422 non_nans = items[~mask] 

423 non_nan_idx = idx[~mask] 

424 

425 nan_idx = np.nonzero(mask)[0] 

426 if not ascending: 

427 non_nans = non_nans[::-1] 

428 non_nan_idx = non_nan_idx[::-1] 

429 indexer = non_nan_idx[non_nans.argsort(kind=kind)] 

430 if not ascending: 

431 indexer = indexer[::-1] 

432 # Finally, place the NaNs at the end or the beginning according to 

433 # na_position 

434 if na_position == "last": 

435 indexer = np.concatenate([indexer, nan_idx]) 

436 elif na_position == "first": 

437 indexer = np.concatenate([nan_idx, indexer]) 

438 else: 

439 raise ValueError(f"invalid na_position: {na_position}") 

440 return ensure_platform_int(indexer) 

441 

442 

443def nargminmax(values: ExtensionArray, method: str, axis: AxisInt = 0): 

444 """ 

445 Implementation of np.argmin/argmax but for ExtensionArray and which 

446 handles missing values. 

447 

448 Parameters 

449 ---------- 

450 values : ExtensionArray 

451 method : {"argmax", "argmin"} 

452 axis : int, default 0 

453 

454 Returns 

455 ------- 

456 int 

457 """ 

458 assert method in {"argmax", "argmin"} 

459 func = np.argmax if method == "argmax" else np.argmin 

460 

461 mask = np.asarray(isna(values)) 

462 arr_values = values._values_for_argsort() 

463 

464 if arr_values.ndim > 1: 

465 if mask.any(): 

466 if axis == 1: 

467 zipped = zip(arr_values, mask) 

468 else: 

469 zipped = zip(arr_values.T, mask.T) 

470 return np.array([_nanargminmax(v, m, func) for v, m in zipped]) 

471 return func(arr_values, axis=axis) 

472 

473 return _nanargminmax(arr_values, mask, func) 

474 

475 

476def _nanargminmax(values: np.ndarray, mask: npt.NDArray[np.bool_], func) -> int: 

477 """ 

478 See nanargminmax.__doc__. 

479 """ 

480 idx = np.arange(values.shape[0]) 

481 non_nans = values[~mask] 

482 non_nan_idx = idx[~mask] 

483 

484 return non_nan_idx[func(non_nans)] 

485 

486 

487def _ensure_key_mapped_multiindex( 

488 index: MultiIndex, key: Callable, level=None 

489) -> MultiIndex: 

490 """ 

491 Returns a new MultiIndex in which key has been applied 

492 to all levels specified in level (or all levels if level 

493 is None). Used for key sorting for MultiIndex. 

494 

495 Parameters 

496 ---------- 

497 index : MultiIndex 

498 Index to which to apply the key function on the 

499 specified levels. 

500 key : Callable 

501 Function that takes an Index and returns an Index of 

502 the same shape. This key is applied to each level 

503 separately. The name of the level can be used to 

504 distinguish different levels for application. 

505 level : list-like, int or str, default None 

506 Level or list of levels to apply the key function to. 

507 If None, key function is applied to all levels. Other 

508 levels are left unchanged. 

509 

510 Returns 

511 ------- 

512 labels : MultiIndex 

513 Resulting MultiIndex with modified levels. 

514 """ 

515 

516 if level is not None: 

517 if isinstance(level, (str, int)): 

518 sort_levels = [level] 

519 else: 

520 sort_levels = level 

521 

522 sort_levels = [index._get_level_number(lev) for lev in sort_levels] 

523 else: 

524 sort_levels = list(range(index.nlevels)) # satisfies mypy 

525 

526 mapped = [ 

527 ensure_key_mapped(index._get_level_values(level), key) 

528 if level in sort_levels 

529 else index._get_level_values(level) 

530 for level in range(index.nlevels) 

531 ] 

532 

533 return type(index).from_arrays(mapped) 

534 

535 

536def ensure_key_mapped(values, key: Callable | None, levels=None): 

537 """ 

538 Applies a callable key function to the values function and checks 

539 that the resulting value has the same shape. Can be called on Index 

540 subclasses, Series, DataFrames, or ndarrays. 

541 

542 Parameters 

543 ---------- 

544 values : Series, DataFrame, Index subclass, or ndarray 

545 key : Optional[Callable], key to be called on the values array 

546 levels : Optional[List], if values is a MultiIndex, list of levels to 

547 apply the key to. 

548 """ 

549 from pandas.core.indexes.api import Index 

550 

551 if not key: 

552 return values 

553 

554 if isinstance(values, ABCMultiIndex): 

555 return _ensure_key_mapped_multiindex(values, key, level=levels) 

556 

557 result = key(values.copy()) 

558 if len(result) != len(values): 

559 raise ValueError( 

560 "User-provided `key` function must not change the shape of the array." 

561 ) 

562 

563 try: 

564 if isinstance( 

565 values, Index 

566 ): # convert to a new Index subclass, not necessarily the same 

567 result = Index(result) 

568 else: 

569 type_of_values = type(values) 

570 result = type_of_values(result) # try to revert to original type otherwise 

571 except TypeError: 

572 raise TypeError( 

573 f"User-provided `key` function returned an invalid type {type(result)} \ 

574 which could not be converted to {type(values)}." 

575 ) 

576 

577 return result 

578 

579 

580def get_flattened_list( 

581 comp_ids: npt.NDArray[np.intp], 

582 ngroups: int, 

583 levels: Iterable[Index], 

584 labels: Iterable[np.ndarray], 

585) -> list[tuple]: 

586 """Map compressed group id -> key tuple.""" 

587 comp_ids = comp_ids.astype(np.int64, copy=False) 

588 arrays: DefaultDict[int, list[int]] = defaultdict(list) 

589 for labs, level in zip(labels, levels): 

590 table = hashtable.Int64HashTable(ngroups) 

591 table.map_keys_to_values(comp_ids, labs.astype(np.int64, copy=False)) 

592 for i in range(ngroups): 

593 arrays[i].append(level[table.get_item(i)]) 

594 return [tuple(array) for array in arrays.values()] 

595 

596 

597def get_indexer_dict( 

598 label_list: list[np.ndarray], keys: list[Index] 

599) -> dict[Hashable, npt.NDArray[np.intp]]: 

600 """ 

601 Returns 

602 ------- 

603 dict: 

604 Labels mapped to indexers. 

605 """ 

606 shape = tuple(len(x) for x in keys) 

607 

608 group_index = get_group_index(label_list, shape, sort=True, xnull=True) 

609 if np.all(group_index == -1): 

610 # Short-circuit, lib.indices_fast will return the same 

611 return {} 

612 ngroups = ( 

613 ((group_index.size and group_index.max()) + 1) 

614 if is_int64_overflow_possible(shape) 

615 else np.prod(shape, dtype="i8") 

616 ) 

617 

618 sorter = get_group_index_sorter(group_index, ngroups) 

619 

620 sorted_labels = [lab.take(sorter) for lab in label_list] 

621 group_index = group_index.take(sorter) 

622 

623 return lib.indices_fast(sorter, group_index, keys, sorted_labels) 

624 

625 

626# ---------------------------------------------------------------------- 

627# sorting levels...cleverly? 

628 

629 

630def get_group_index_sorter( 

631 group_index: npt.NDArray[np.intp], ngroups: int | None = None 

632) -> npt.NDArray[np.intp]: 

633 """ 

634 algos.groupsort_indexer implements `counting sort` and it is at least 

635 O(ngroups), where 

636 ngroups = prod(shape) 

637 shape = map(len, keys) 

638 that is, linear in the number of combinations (cartesian product) of unique 

639 values of groupby keys. This can be huge when doing multi-key groupby. 

640 np.argsort(kind='mergesort') is O(count x log(count)) where count is the 

641 length of the data-frame; 

642 Both algorithms are `stable` sort and that is necessary for correctness of 

643 groupby operations. e.g. consider: 

644 df.groupby(key)[col].transform('first') 

645 

646 Parameters 

647 ---------- 

648 group_index : np.ndarray[np.intp] 

649 signed integer dtype 

650 ngroups : int or None, default None 

651 

652 Returns 

653 ------- 

654 np.ndarray[np.intp] 

655 """ 

656 if ngroups is None: 

657 ngroups = 1 + group_index.max() 

658 count = len(group_index) 

659 alpha = 0.0 # taking complexities literally; there may be 

660 beta = 1.0 # some room for fine-tuning these parameters 

661 do_groupsort = count > 0 and ((alpha + beta * ngroups) < (count * np.log(count))) 

662 if do_groupsort: 

663 sorter, _ = algos.groupsort_indexer( 

664 ensure_platform_int(group_index), 

665 ngroups, 

666 ) 

667 # sorter _should_ already be intp, but mypy is not yet able to verify 

668 else: 

669 sorter = group_index.argsort(kind="mergesort") 

670 return ensure_platform_int(sorter) 

671 

672 

673def compress_group_index( 

674 group_index: npt.NDArray[np.int64], sort: bool = True 

675) -> tuple[npt.NDArray[np.int64], npt.NDArray[np.int64]]: 

676 """ 

677 Group_index is offsets into cartesian product of all possible labels. This 

678 space can be huge, so this function compresses it, by computing offsets 

679 (comp_ids) into the list of unique labels (obs_group_ids). 

680 """ 

681 size_hint = len(group_index) 

682 table = hashtable.Int64HashTable(size_hint) 

683 

684 group_index = ensure_int64(group_index) 

685 

686 # note, group labels come out ascending (ie, 1,2,3 etc) 

687 comp_ids, obs_group_ids = table.get_labels_groupby(group_index) 

688 

689 if sort and len(obs_group_ids) > 0: 

690 obs_group_ids, comp_ids = _reorder_by_uniques(obs_group_ids, comp_ids) 

691 

692 return ensure_int64(comp_ids), ensure_int64(obs_group_ids) 

693 

694 

695def _reorder_by_uniques( 

696 uniques: npt.NDArray[np.int64], labels: npt.NDArray[np.intp] 

697) -> tuple[npt.NDArray[np.int64], npt.NDArray[np.intp]]: 

698 """ 

699 Parameters 

700 ---------- 

701 uniques : np.ndarray[np.int64] 

702 labels : np.ndarray[np.intp] 

703 

704 Returns 

705 ------- 

706 np.ndarray[np.int64] 

707 np.ndarray[np.intp] 

708 """ 

709 # sorter is index where elements ought to go 

710 sorter = uniques.argsort() 

711 

712 # reverse_indexer is where elements came from 

713 reverse_indexer = np.empty(len(sorter), dtype=np.intp) 

714 reverse_indexer.put(sorter, np.arange(len(sorter))) 

715 

716 mask = labels < 0 

717 

718 # move labels to right locations (ie, unsort ascending labels) 

719 labels = reverse_indexer.take(labels) 

720 np.putmask(labels, mask, -1) 

721 

722 # sort observed ids 

723 uniques = uniques.take(sorter) 

724 

725 return uniques, labels