Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/scikit_learn-1.4.dev0-py3.8-linux-x86

1"""Base classes for all estimators."""

3# Author: Gael Varoquaux <gael.varoquaux@normalesup.org>

4# License: BSD 3 clause

6import copy

7import functools

8import inspect

9import platform

10import re

11import warnings

12from collections import defaultdict

14import numpy as np

16from . import __version__

17from ._config import config_context, get_config

18from .exceptions import InconsistentVersionWarning

19from .utils import _IS_32BIT

20from .utils._estimator_html_repr import _HTMLDocumentationLinkMixin, estimator_html_repr

21from .utils._metadata_requests import _MetadataRequester, _routing_enabled

22from .utils._param_validation import validate_parameter_constraints

23from .utils._set_output import _SetOutputMixin

24from .utils._tags import (

25 _DEFAULT_TAGS,

26)

27from .utils.validation import (

28 _check_feature_names_in,

29 _check_y,

30 _generate_get_feature_names_out,

31 _get_feature_names,

32 _is_fitted,

33 _num_features,

34 check_array,

35 check_is_fitted,

36 check_X_y,

37)

40def clone(estimator, *, safe=True):

41 """Construct a new unfitted estimator with the same parameters.

43 Clone does a deep copy of the model in an estimator

44 without actually copying attached data. It returns a new estimator

45 with the same parameters that has not been fitted on any data.

47 .. versionchanged:: 1.3

48 Delegates to `estimator.__sklearn_clone__` if the method exists.

50 Parameters

51 ----------

52 estimator : {list, tuple, set} of estimator instance or a single \

53 estimator instance

54 The estimator or group of estimators to be cloned.

55 safe : bool, default=True

56 If safe is False, clone will fall back to a deep copy on objects

57 that are not estimators. Ignored if `estimator.__sklearn_clone__`

58 exists.

60 Returns

61 -------

62 estimator : object

63 The deep copy of the input, an estimator if input is an estimator.

65 Notes

66 -----

67 If the estimator's `random_state` parameter is an integer (or if the

68 estimator doesn't have a `random_state` parameter), an *exact clone* is

69 returned: the clone and the original estimator will give the exact same

70 results. Otherwise, *statistical clone* is returned: the clone might

71 return different results from the original estimator. More details can be

72 found in :ref:`randomness`.

73 """

74 if hasattr(estimator, "__sklearn_clone__") and not inspect.isclass(estimator):

75 return estimator.__sklearn_clone__()

76 return _clone_parametrized(estimator, safe=safe)

79def _clone_parametrized(estimator, *, safe=True):

80 """Default implementation of clone. See :func:`sklearn.base.clone` for details."""

82 estimator_type = type(estimator)

83 if estimator_type is dict:

84 return {k: clone(v, safe=safe) for k, v in estimator.items()}

85 elif estimator_type in (list, tuple, set, frozenset):

86 return estimator_type([clone(e, safe=safe) for e in estimator])

87 elif not hasattr(estimator, "get_params") or isinstance(estimator, type):

88 if not safe:

89 return copy.deepcopy(estimator)

90 else:

91 if isinstance(estimator, type):

92 raise TypeError(

93 "Cannot clone object. "

94 + "You should provide an instance of "

95 + "scikit-learn estimator instead of a class."

96 )

97 else:

98 raise TypeError(

99 "Cannot clone object '%s' (type %s): "

100 "it does not seem to be a scikit-learn "

101 "estimator as it does not implement a "

102 "'get_params' method." % (repr(estimator), type(estimator))

103 )

104

105 klass = estimator.__class__

106 new_object_params = estimator.get_params(deep=False)

107 for name, param in new_object_params.items():

108 new_object_params[name] = clone(param, safe=False)

109

110 new_object = klass(**new_object_params)

111 try:

112 new_object._metadata_request = copy.deepcopy(estimator._metadata_request)

113 except AttributeError:

114 pass

115

116 params_set = new_object.get_params(deep=False)

117

118 # quick sanity check of the parameters of the clone

119 for name in new_object_params:

120 param1 = new_object_params[name]

121 param2 = params_set[name]

122 if param1 is not param2:

123 raise RuntimeError(

124 "Cannot clone object %s, as the constructor "

125 "either does not set or modifies parameter %s" % (estimator, name)

126 )

127

128 # _sklearn_output_config is used by `set_output` to configure the output

129 # container of an estimator.

130 if hasattr(estimator, "_sklearn_output_config"):

131 new_object._sklearn_output_config = copy.deepcopy(

132 estimator._sklearn_output_config

133 )

134 return new_object

135

136

137class BaseEstimator(_HTMLDocumentationLinkMixin, _MetadataRequester):

138 """Base class for all estimators in scikit-learn.

139

140 Notes

141 -----

142 All estimators should specify all the parameters that can be set

143 at the class level in their ``__init__`` as explicit keyword

144 arguments (no ``*args`` or ``**kwargs``).

145 """

146

147 @classmethod

148 def _get_param_names(cls):

149 """Get parameter names for the estimator"""

150 # fetch the constructor or the original constructor before

151 # deprecation wrapping if any

152 init = getattr(cls.__init__, "deprecated_original", cls.__init__)

153 if init is object.__init__:

154 # No explicit constructor to introspect

155 return []

156

157 # introspect the constructor arguments to find the model parameters

158 # to represent

159 init_signature = inspect.signature(init)

160 # Consider the constructor parameters excluding 'self'

161 parameters = [

162 p

163 for p in init_signature.parameters.values()

164 if p.name != "self" and p.kind != p.VAR_KEYWORD

165 ]

166 for p in parameters:

167 if p.kind == p.VAR_POSITIONAL:

168 raise RuntimeError(

169 "scikit-learn estimators should always "

170 "specify their parameters in the signature"

171 " of their __init__ (no varargs)."

172 " %s with constructor %s doesn't "

173 " follow this convention." % (cls, init_signature)

174 )

175 # Extract and sort argument names excluding 'self'

176 return sorted([p.name for p in parameters])

177

178 def get_params(self, deep=True):

179 """

180 Get parameters for this estimator.

181

182 Parameters

183 ----------

184 deep : bool, default=True

185 If True, will return the parameters for this estimator and

186 contained subobjects that are estimators.

187

188 Returns

189 -------

190 params : dict

191 Parameter names mapped to their values.

192 """

193 out = dict()

194 for key in self._get_param_names():

195 value = getattr(self, key)

196 if deep and hasattr(value, "get_params") and not isinstance(value, type):

197 deep_items = value.get_params().items()

198 out.update((key + "__" + k, val) for k, val in deep_items)

199 out[key] = value

200 return out

201

202 def set_params(self, **params):

203 """Set the parameters of this estimator.

204

205 The method works on simple estimators as well as on nested objects

206 (such as :class:`~sklearn.pipeline.Pipeline`). The latter have

207 parameters of the form ``<component>__<parameter>`` so that it's

208 possible to update each component of a nested object.

209

210 Parameters

211 ----------

212 **params : dict

213 Estimator parameters.

214

215 Returns

216 -------

217 self : estimator instance

218 Estimator instance.

219 """

220 if not params:

221 # Simple optimization to gain speed (inspect is slow)

222 return self

223 valid_params = self.get_params(deep=True)

224

225 nested_params = defaultdict(dict) # grouped by prefix

226 for key, value in params.items():

227 key, delim, sub_key = key.partition("__")

228 if key not in valid_params:

229 local_valid_params = self._get_param_names()

230 raise ValueError(

231 f"Invalid parameter {key!r} for estimator {self}. "

232 f"Valid parameters are: {local_valid_params!r}."

233 )

234

235 if delim:

236 nested_params[key][sub_key] = value

237 else:

238 setattr(self, key, value)

239 valid_params[key] = value

240

241 for key, sub_params in nested_params.items():

242 valid_params[key].set_params(**sub_params)

243

244 return self

245

246 def __sklearn_clone__(self):

247 return _clone_parametrized(self)

248

249 def __repr__(self, N_CHAR_MAX=700):

250 # N_CHAR_MAX is the (approximate) maximum number of non-blank

251 # characters to render. We pass it as an optional parameter to ease

252 # the tests.

253

254 from .utils._pprint import _EstimatorPrettyPrinter

255

256 N_MAX_ELEMENTS_TO_SHOW = 30 # number of elements to show in sequences

257

258 # use ellipsis for sequences with a lot of elements

259 pp = _EstimatorPrettyPrinter(

260 compact=True,

261 indent=1,

262 indent_at_name=True,

263 n_max_elements_to_show=N_MAX_ELEMENTS_TO_SHOW,

264 )

265

266 repr_ = pp.pformat(self)

267

268 # Use bruteforce ellipsis when there are a lot of non-blank characters

269 n_nonblank = len("".join(repr_.split()))

270 if n_nonblank > N_CHAR_MAX:

271 lim = N_CHAR_MAX // 2 # apprx number of chars to keep on both ends

272 regex = r"^(\s*\S){%d}" % lim

273 # The regex '^(\s*\S){%d}' % n

274 # matches from the start of the string until the nth non-blank

275 # character:

276 # - ^ matches the start of string

277 # - (pattern){n} matches n repetitions of pattern

278 # - \s*\S matches a non-blank char following zero or more blanks

279 left_lim = re.match(regex, repr_).end()

280 right_lim = re.match(regex, repr_[::-1]).end()

281

282 if "\n" in repr_[left_lim:-right_lim]:

283 # The left side and right side aren't on the same line.

284 # To avoid weird cuts, e.g.:

285 # categoric...ore',

286 # we need to start the right side with an appropriate newline

287 # character so that it renders properly as:

288 # categoric...

289 # handle_unknown='ignore',

290 # so we add [^\n]*\n which matches until the next \n

291 regex += r"[^\n]*\n"

292 right_lim = re.match(regex, repr_[::-1]).end()

293

294 ellipsis = "..."

295 if left_lim + len(ellipsis) < len(repr_) - right_lim:

296 # Only add ellipsis if it results in a shorter repr

297 repr_ = repr_[:left_lim] + "..." + repr_[-right_lim:]

298

299 return repr_

300

301 def __getstate__(self):

302 if getattr(self, "__slots__", None):

303 raise TypeError(

304 "You cannot use `__slots__` in objects inheriting from "

305 "`sklearn.base.BaseEstimator`."

306 )

307

308 try:

309 state = super().__getstate__()

310 if state is None:

311 # For Python 3.11+, empty instance (no `__slots__`,

312 # and `__dict__`) will return a state equal to `None`.

313 state = self.__dict__.copy()

314 except AttributeError:

315 # Python < 3.11

316 state = self.__dict__.copy()

317

318 if type(self).__module__.startswith("sklearn."):

319 return dict(state.items(), _sklearn_version=__version__)

320 else:

321 return state

322

323 def __setstate__(self, state):

324 if type(self).__module__.startswith("sklearn."):

325 pickle_version = state.pop("_sklearn_version", "pre-0.18")

326 if pickle_version != __version__:

327 warnings.warn(

328 InconsistentVersionWarning(

329 estimator_name=self.__class__.__name__,

330 current_sklearn_version=__version__,

331 original_sklearn_version=pickle_version,

332 ),

333 )

334 try:

335 super().__setstate__(state)

336 except AttributeError:

337 self.__dict__.update(state)

338

339 def _more_tags(self):

340 return _DEFAULT_TAGS

341

342 def _get_tags(self):

343 collected_tags = {}

344 for base_class in reversed(inspect.getmro(self.__class__)):

345 if hasattr(base_class, "_more_tags"):

346 # need the if because mixins might not have _more_tags

347 # but might do redundant work in estimators

348 # (i.e. calling more tags on BaseEstimator multiple times)

349 more_tags = base_class._more_tags(self)

350 collected_tags.update(more_tags)

351 return collected_tags

352

353 def _check_n_features(self, X, reset):

354 """Set the `n_features_in_` attribute, or check against it.

355

356 Parameters

357 ----------

358 X : {ndarray, sparse matrix} of shape (n_samples, n_features)

359 The input samples.

360 reset : bool

361 If True, the `n_features_in_` attribute is set to `X.shape[1]`.

362 If False and the attribute exists, then check that it is equal to

363 `X.shape[1]`. If False and the attribute does *not* exist, then

364 the check is skipped.

365 .. note::

366 It is recommended to call reset=True in `fit` and in the first

367 call to `partial_fit`. All other methods that validate `X`

368 should set `reset=False`.

369 """

370 try:

371 n_features = _num_features(X)

372 except TypeError as e:

373 if not reset and hasattr(self, "n_features_in_"):

374 raise ValueError(

375 "X does not contain any features, but "

376 f"{self.__class__.__name__} is expecting "

377 f"{self.n_features_in_} features"

378 ) from e

379 # If the number of features is not defined and reset=True,

380 # then we skip this check

381 return

382

383 if reset:

384 self.n_features_in_ = n_features

385 return

386

387 if not hasattr(self, "n_features_in_"):

388 # Skip this check if the expected number of expected input features

389 # was not recorded by calling fit first. This is typically the case

390 # for stateless transformers.

391 return

392

393 if n_features != self.n_features_in_:

394 raise ValueError(

395 f"X has {n_features} features, but {self.__class__.__name__} "

396 f"is expecting {self.n_features_in_} features as input."

397 )

398

399 def _check_feature_names(self, X, *, reset):

400 """Set or check the `feature_names_in_` attribute.

401

402 .. versionadded:: 1.0

403

404 Parameters

405 ----------

406 X : {ndarray, dataframe} of shape (n_samples, n_features)

407 The input samples.

408

409 reset : bool

410 Whether to reset the `feature_names_in_` attribute.

411 If False, the input will be checked for consistency with

412 feature names of data provided when reset was last True.

413 .. note::

414 It is recommended to call `reset=True` in `fit` and in the first

415 call to `partial_fit`. All other methods that validate `X`

416 should set `reset=False`.

417 """

418

419 if reset:

420 feature_names_in = _get_feature_names(X)

421 if feature_names_in is not None:

422 self.feature_names_in_ = feature_names_in

423 elif hasattr(self, "feature_names_in_"):

424 # Delete the attribute when the estimator is fitted on a new dataset

425 # that has no feature names.

426 delattr(self, "feature_names_in_")

427 return

428

429 fitted_feature_names = getattr(self, "feature_names_in_", None)

430 X_feature_names = _get_feature_names(X)

431

432 if fitted_feature_names is None and X_feature_names is None:

433 # no feature names seen in fit and in X

434 return

435

436 if X_feature_names is not None and fitted_feature_names is None:

437 warnings.warn(

438 f"X has feature names, but {self.__class__.__name__} was fitted without"

439 " feature names"

440 )

441 return

442

443 if X_feature_names is None and fitted_feature_names is not None:

444 warnings.warn(

445 "X does not have valid feature names, but"

446 f" {self.__class__.__name__} was fitted with feature names"

447 )

448 return

449

450 # validate the feature names against the `feature_names_in_` attribute

451 if len(fitted_feature_names) != len(X_feature_names) or np.any(

452 fitted_feature_names != X_feature_names

453 ):

454 message = (

455 "The feature names should match those that were passed during fit.\n"

456 )

457 fitted_feature_names_set = set(fitted_feature_names)

458 X_feature_names_set = set(X_feature_names)

459

460 unexpected_names = sorted(X_feature_names_set - fitted_feature_names_set)

461 missing_names = sorted(fitted_feature_names_set - X_feature_names_set)

462

463 def add_names(names):

464 output = ""

465 max_n_names = 5

466 for i, name in enumerate(names):

467 if i >= max_n_names:

468 output += "- ...\n"

469 break

470 output += f"- {name}\n"

471 return output

472

473 if unexpected_names:

474 message += "Feature names unseen at fit time:\n"

475 message += add_names(unexpected_names)

476

477 if missing_names:

478 message += "Feature names seen at fit time, yet now missing:\n"

479 message += add_names(missing_names)

480

481 if not missing_names and not unexpected_names:

482 message += (

483 "Feature names must be in the same order as they were in fit.\n"

484 )

485

486 raise ValueError(message)

487

488 def _validate_data(

489 self,

490 X="no_validation",

491 y="no_validation",

492 reset=True,

493 validate_separately=False,

494 cast_to_ndarray=True,

495 **check_params,

496 ):

497 """Validate input data and set or check the `n_features_in_` attribute.

498

499 Parameters

500 ----------

501 X : {array-like, sparse matrix, dataframe} of shape \

502 (n_samples, n_features), default='no validation'

503 The input samples.

504 If `'no_validation'`, no validation is performed on `X`. This is

505 useful for meta-estimator which can delegate input validation to

506 their underlying estimator(s). In that case `y` must be passed and

507 the only accepted `check_params` are `multi_output` and

508 `y_numeric`.

509

510 y : array-like of shape (n_samples,), default='no_validation'

511 The targets.

512

513 - If `None`, `check_array` is called on `X`. If the estimator's

514 requires_y tag is True, then an error will be raised.

515 - If `'no_validation'`, `check_array` is called on `X` and the

516 estimator's requires_y tag is ignored. This is a default

517 placeholder and is never meant to be explicitly set. In that case

518 `X` must be passed.

519 - Otherwise, only `y` with `_check_y` or both `X` and `y` are

520 checked with either `check_array` or `check_X_y` depending on

521 `validate_separately`.

522

523 reset : bool, default=True

524 Whether to reset the `n_features_in_` attribute.

525 If False, the input will be checked for consistency with data

526 provided when reset was last True.

527 .. note::

528 It is recommended to call reset=True in `fit` and in the first

529 call to `partial_fit`. All other methods that validate `X`

530 should set `reset=False`.

531

532 validate_separately : False or tuple of dicts, default=False

533 Only used if y is not None.

534 If False, call validate_X_y(). Else, it must be a tuple of kwargs

535 to be used for calling check_array() on X and y respectively.

536

537 `estimator=self` is automatically added to these dicts to generate

538 more informative error message in case of invalid input data.

539

540 cast_to_ndarray : bool, default=True

541 Cast `X` and `y` to ndarray with checks in `check_params`. If

542 `False`, `X` and `y` are unchanged and only `feature_names_in_` and

543 `n_features_in_` are checked.

544

545 **check_params : kwargs

546 Parameters passed to :func:`sklearn.utils.check_array` or

547 :func:`sklearn.utils.check_X_y`. Ignored if validate_separately

548 is not False.

549

550 `estimator=self` is automatically added to these params to generate

551 more informative error message in case of invalid input data.

552

553 Returns

554 -------

555 out : {ndarray, sparse matrix} or tuple of these

556 The validated input. A tuple is returned if both `X` and `y` are

557 validated.

558 """

559 self._check_feature_names(X, reset=reset)

560

561 if y is None and self._get_tags()["requires_y"]:

562 raise ValueError(

563 f"This {self.__class__.__name__} estimator "

564 "requires y to be passed, but the target y is None."

565 )

566

567 no_val_X = isinstance(X, str) and X == "no_validation"

568 no_val_y = y is None or isinstance(y, str) and y == "no_validation"

569

570 if no_val_X and no_val_y:

571 raise ValueError("Validation should be done on X, y or both.")

572

573 default_check_params = {"estimator": self}

574 check_params = {**default_check_params, **check_params}

575

576 if not cast_to_ndarray:

577 if not no_val_X and no_val_y:

578 out = X

579 elif no_val_X and not no_val_y:

580 out = y

581 else:

582 out = X, y

583 elif not no_val_X and no_val_y:

584 out = check_array(X, input_name="X", **check_params)

585 elif no_val_X and not no_val_y:

586 out = _check_y(y, **check_params)

587 else:

588 if validate_separately:

589 # We need this because some estimators validate X and y

590 # separately, and in general, separately calling check_array()

591 # on X and y isn't equivalent to just calling check_X_y()

592 # :(

593 check_X_params, check_y_params = validate_separately

594 if "estimator" not in check_X_params:

595 check_X_params = {**default_check_params, **check_X_params}

596 X = check_array(X, input_name="X", **check_X_params)

597 if "estimator" not in check_y_params:

598 check_y_params = {**default_check_params, **check_y_params}

599 y = check_array(y, input_name="y", **check_y_params)

600 else:

601 X, y = check_X_y(X, y, **check_params)

602 out = X, y

603

604 if not no_val_X and check_params.get("ensure_2d", True):

605 self._check_n_features(X, reset=reset)

606

607 return out

608

609 def _validate_params(self):

610 """Validate types and values of constructor parameters

611

612 The expected type and values must be defined in the `_parameter_constraints`

613 class attribute, which is a dictionary `param_name: list of constraints`. See

614 the docstring of `validate_parameter_constraints` for a description of the

615 accepted constraints.

616 """

617 validate_parameter_constraints(

618 self._parameter_constraints,

619 self.get_params(deep=False),

620 caller_name=self.__class__.__name__,

621 )

622

623 @property

624 def _repr_html_(self):

625 """HTML representation of estimator.

626

627 This is redundant with the logic of `_repr_mimebundle_`. The latter

628 should be favorted in the long term, `_repr_html_` is only

629 implemented for consumers who do not interpret `_repr_mimbundle_`.

630 """

631 if get_config()["display"] != "diagram":

632 raise AttributeError(

633 "_repr_html_ is only defined when the "

634 "'display' configuration option is set to "

635 "'diagram'"

636 )

637 return self._repr_html_inner

638

639 def _repr_html_inner(self):

640 """This function is returned by the @property `_repr_html_` to make

641 `hasattr(estimator, "_repr_html_") return `True` or `False` depending

642 on `get_config()["display"]`.

643 """

644 return estimator_html_repr(self)

645

646 def _repr_mimebundle_(self, **kwargs):

647 """Mime bundle used by jupyter kernels to display estimator"""

648 output = {"text/plain": repr(self)}

649 if get_config()["display"] == "diagram":

650 output["text/html"] = estimator_html_repr(self)

651 return output

652

653

654class ClassifierMixin:

655 """Mixin class for all classifiers in scikit-learn."""

656

657 _estimator_type = "classifier"

658

659 def score(self, X, y, sample_weight=None):

660 """

661 Return the mean accuracy on the given test data and labels.

662

663 In multi-label classification, this is the subset accuracy

664 which is a harsh metric since you require for each sample that

665 each label set be correctly predicted.

666

667 Parameters

668 ----------

669 X : array-like of shape (n_samples, n_features)

670 Test samples.

671

672 y : array-like of shape (n_samples,) or (n_samples, n_outputs)

673 True labels for `X`.

674

675 sample_weight : array-like of shape (n_samples,), default=None

676 Sample weights.

677

678 Returns

679 -------

680 score : float

681 Mean accuracy of ``self.predict(X)`` w.r.t. `y`.

682 """

683 from .metrics import accuracy_score

684

685 return accuracy_score(y, self.predict(X), sample_weight=sample_weight)

686

687 def _more_tags(self):

688 return {"requires_y": True}

689

690

691class RegressorMixin:

692 """Mixin class for all regression estimators in scikit-learn."""

693

694 _estimator_type = "regressor"

695

696 def score(self, X, y, sample_weight=None):

697 """Return the coefficient of determination of the prediction.

698

699 The coefficient of determination :math:`R^2` is defined as

700 :math:`(1 - \\frac{u}{v})`, where :math:`u` is the residual

701 sum of squares ``((y_true - y_pred)** 2).sum()`` and :math:`v`

702 is the total sum of squares ``((y_true - y_true.mean()) ** 2).sum()``.

703 The best possible score is 1.0 and it can be negative (because the

704 model can be arbitrarily worse). A constant model that always predicts

705 the expected value of `y`, disregarding the input features, would get

706 a :math:`R^2` score of 0.0.

707

708 Parameters

709 ----------

710 X : array-like of shape (n_samples, n_features)

711 Test samples. For some estimators this may be a precomputed

712 kernel matrix or a list of generic objects instead with shape

713 ``(n_samples, n_samples_fitted)``, where ``n_samples_fitted``

714 is the number of samples used in the fitting for the estimator.

715

716 y : array-like of shape (n_samples,) or (n_samples, n_outputs)

717 True values for `X`.

718

719 sample_weight : array-like of shape (n_samples,), default=None

720 Sample weights.

721

722 Returns

723 -------

724 score : float

725 :math:`R^2` of ``self.predict(X)`` w.r.t. `y`.

726

727 Notes

728 -----

729 The :math:`R^2` score used when calling ``score`` on a regressor uses

730 ``multioutput='uniform_average'`` from version 0.23 to keep consistent

731 with default value of :func:`~sklearn.metrics.r2_score`.

732 This influences the ``score`` method of all the multioutput

733 regressors (except for

734 :class:`~sklearn.multioutput.MultiOutputRegressor`).

735 """

736

737 from .metrics import r2_score

738

739 y_pred = self.predict(X)

740 return r2_score(y, y_pred, sample_weight=sample_weight)

741

742 def _more_tags(self):

743 return {"requires_y": True}

744

745

746class ClusterMixin:

747 """Mixin class for all cluster estimators in scikit-learn."""

748

749 _estimator_type = "clusterer"

750

751 def fit_predict(self, X, y=None, **kwargs):

752 """

753 Perform clustering on `X` and returns cluster labels.

754

755 Parameters

756 ----------

757 X : array-like of shape (n_samples, n_features)

758 Input data.

759

760 y : Ignored

761 Not used, present for API consistency by convention.

762

763 **kwargs : dict

764 Arguments to be passed to ``fit``.

765

766 .. versionadded:: 1.4

767

768 Returns

769 -------

770 labels : ndarray of shape (n_samples,), dtype=np.int64

771 Cluster labels.

772 """

773 # non-optimized default implementation; override when a better

774 # method is possible for a given clustering algorithm

775 self.fit(X, **kwargs)

776 return self.labels_

777

778 def _more_tags(self):

779 return {"preserves_dtype": []}

780

781

782class BiclusterMixin:

783 """Mixin class for all bicluster estimators in scikit-learn."""

784

785 @property

786 def biclusters_(self):

787 """Convenient way to get row and column indicators together.

788

789 Returns the ``rows_`` and ``columns_`` members.

790 """

791 return self.rows_, self.columns_

792

793 def get_indices(self, i):

794 """Row and column indices of the `i`'th bicluster.

795

796 Only works if ``rows_`` and ``columns_`` attributes exist.

797

798 Parameters

799 ----------

800 i : int

801 The index of the cluster.

802

803 Returns

804 -------

805 row_ind : ndarray, dtype=np.intp

806 Indices of rows in the dataset that belong to the bicluster.

807 col_ind : ndarray, dtype=np.intp

808 Indices of columns in the dataset that belong to the bicluster.

809 """

810 rows = self.rows_[i]

811 columns = self.columns_[i]

812 return np.nonzero(rows)[0], np.nonzero(columns)[0]

813

814 def get_shape(self, i):

815 """Shape of the `i`'th bicluster.

816

817 Parameters

818 ----------

819 i : int

820 The index of the cluster.

821

822 Returns

823 -------

824 n_rows : int

825 Number of rows in the bicluster.

826

827 n_cols : int

828 Number of columns in the bicluster.

829 """

830 indices = self.get_indices(i)

831 return tuple(len(i) for i in indices)

832

833 def get_submatrix(self, i, data):

834 """Return the submatrix corresponding to bicluster `i`.

835

836 Parameters

837 ----------

838 i : int

839 The index of the cluster.

840 data : array-like of shape (n_samples, n_features)

841 The data.

842

843 Returns

844 -------

845 submatrix : ndarray of shape (n_rows, n_cols)

846 The submatrix corresponding to bicluster `i`.

847

848 Notes

849 -----

850 Works with sparse matrices. Only works if ``rows_`` and

851 ``columns_`` attributes exist.

852 """

853 from .utils.validation import check_array

854

855 data = check_array(data, accept_sparse="csr")

856 row_ind, col_ind = self.get_indices(i)

857 return data[row_ind[:, np.newaxis], col_ind]

858

859

860class TransformerMixin(_SetOutputMixin):

861 """Mixin class for all transformers in scikit-learn.

862

863 If :term:`get_feature_names_out` is defined, then :class:`BaseEstimator` will

864 automatically wrap `transform` and `fit_transform` to follow the `set_output`

865 API. See the :ref:`developer_api_set_output` for details.

866

867 :class:`OneToOneFeatureMixin` and

868 :class:`ClassNamePrefixFeaturesOutMixin` are helpful mixins for

869 defining :term:`get_feature_names_out`.

870 """

871

872 def fit_transform(self, X, y=None, **fit_params):

873 """

874 Fit to data, then transform it.

875

876 Fits transformer to `X` and `y` with optional parameters `fit_params`

877 and returns a transformed version of `X`.

878

879 Parameters

880 ----------

881 X : array-like of shape (n_samples, n_features)

882 Input samples.

883

884 y : array-like of shape (n_samples,) or (n_samples, n_outputs), \

885 default=None

886 Target values (None for unsupervised transformations).

887

888 **fit_params : dict

889 Additional fit parameters.

890

891 Returns

892 -------

893 X_new : ndarray array of shape (n_samples, n_features_new)

894 Transformed array.

895 """

896 # non-optimized default implementation; override when a better

897 # method is possible for a given clustering algorithm

898

899 # we do not route parameters here, since consumers don't route. But

900 # since it's possible for a `transform` method to also consume

901 # metadata, we check if that's the case, and we raise a warning telling

902 # users that they should implement a custom `fit_transform` method

903 # to forward metadata to `transform` as well.

904 #

905 # For that, we calculate routing and check if anything would be routed

906 # to `transform` if we were to route them.

907 if _routing_enabled():

908 transform_params = self.get_metadata_routing().consumes(

909 method="transform", params=fit_params.keys()

910 )

911 if transform_params:

912 warnings.warn(

913 (

914 f"This object ({self.__class__.__name__}) has a `transform`"

915 " method which consumes metadata, but `fit_transform` does not"

916 " forward metadata to `transform`. Please implement a custom"

917 " `fit_transform` method to forward metadata to `transform` as"

918 " well. Alternatively, you can explicitly do"

919 " `set_transform_request`and set all values to `False` to"

920 " disable metadata routed to `transform`, if that's an option."

921 ),

922 UserWarning,

923 )

924

925 if y is None:

926 # fit method of arity 1 (unsupervised transformation)

927 return self.fit(X, **fit_params).transform(X)

928 else:

929 # fit method of arity 2 (supervised transformation)

930 return self.fit(X, y, **fit_params).transform(X)

931

932

933class OneToOneFeatureMixin:

934 """Provides `get_feature_names_out` for simple transformers.

935

936 This mixin assumes there's a 1-to-1 correspondence between input features

937 and output features, such as :class:`~sklearn.preprocessing.StandardScaler`.

938 """

939

940 def get_feature_names_out(self, input_features=None):

941 """Get output feature names for transformation.

942

943 Parameters

944 ----------

945 input_features : array-like of str or None, default=None

946 Input features.

947

948 - If `input_features` is `None`, then `feature_names_in_` is

949 used as feature names in. If `feature_names_in_` is not defined,

950 then the following input feature names are generated:

951 `["x0", "x1", ..., "x(n_features_in_ - 1)"]`.

952 - If `input_features` is an array-like, then `input_features` must

953 match `feature_names_in_` if `feature_names_in_` is defined.

954

955 Returns

956 -------

957 feature_names_out : ndarray of str objects

958 Same as input features.

959 """

960 check_is_fitted(self, "n_features_in_")

961 return _check_feature_names_in(self, input_features)

962

963

964class ClassNamePrefixFeaturesOutMixin:

965 """Mixin class for transformers that generate their own names by prefixing.

966

967 This mixin is useful when the transformer needs to generate its own feature

968 names out, such as :class:`~sklearn.decomposition.PCA`. For example, if

969 :class:`~sklearn.decomposition.PCA` outputs 3 features, then the generated feature

970 names out are: `["pca0", "pca1", "pca2"]`.

971

972 This mixin assumes that a `_n_features_out` attribute is defined when the

973 transformer is fitted. `_n_features_out` is the number of output features

974 that the transformer will return in `transform` of `fit_transform`.

975 """

976

977 def get_feature_names_out(self, input_features=None):

978 """Get output feature names for transformation.

979

980 The feature names out will prefixed by the lowercased class name. For

981 example, if the transformer outputs 3 features, then the feature names

982 out are: `["class_name0", "class_name1", "class_name2"]`.

983

984 Parameters

985 ----------

986 input_features : array-like of str or None, default=None

987 Only used to validate feature names with the names seen in `fit`.

988

989 Returns

990 -------

991 feature_names_out : ndarray of str objects

992 Transformed feature names.

993 """

994 check_is_fitted(self, "_n_features_out")

995 return _generate_get_feature_names_out(

996 self, self._n_features_out, input_features=input_features

997 )

998

999

1000class DensityMixin:

1001 """Mixin class for all density estimators in scikit-learn."""

1002

1003 _estimator_type = "DensityEstimator"

1004

1005 def score(self, X, y=None):

1006 """Return the score of the model on the data `X`.

1007

1008 Parameters

1009 ----------

1010 X : array-like of shape (n_samples, n_features)

1011 Test samples.

1012

1013 y : Ignored

1014 Not used, present for API consistency by convention.

1015

1016 Returns

1017 -------

1018 score : float

1019 """

1020 pass

1021

1022

1023class OutlierMixin:

1024 """Mixin class for all outlier detection estimators in scikit-learn."""

1025

1026 _estimator_type = "outlier_detector"

1027

1028 def fit_predict(self, X, y=None, **kwargs):

1029 """Perform fit on X and returns labels for X.

1030

1031 Returns -1 for outliers and 1 for inliers.

1032

1033 Parameters

1034 ----------

1035 X : {array-like, sparse matrix} of shape (n_samples, n_features)

1036 The input samples.

1037

1038 y : Ignored

1039 Not used, present for API consistency by convention.

1040

1041 **kwargs : dict

1042 Arguments to be passed to ``fit``.

1043

1044 .. versionadded:: 1.4

1045

1046 Returns

1047 -------

1048 y : ndarray of shape (n_samples,)

1049 1 for inliers, -1 for outliers.

1050 """

1051 # we do not route parameters here, since consumers don't route. But

1052 # since it's possible for a `predict` method to also consume

1053 # metadata, we check if that's the case, and we raise a warning telling

1054 # users that they should implement a custom `fit_predict` method

1055 # to forward metadata to `predict` as well.

1056 #

1057 # For that, we calculate routing and check if anything would be routed

1058 # to `predict` if we were to route them.

1059 if _routing_enabled():

1060 transform_params = self.get_metadata_routing().consumes(

1061 method="predict", params=kwargs.keys()

1062 )

1063 if transform_params:

1064 warnings.warn(

1065 (

1066 f"This object ({self.__class__.__name__}) has a `predict` "

1067 "method which consumes metadata, but `fit_predict` does not "

1068 "forward metadata to `predict`. Please implement a custom "

1069 "`fit_predict` method to forward metadata to `predict` as well."

1070 "Alternatively, you can explicitly do `set_predict_request`"

1071 "and set all values to `False` to disable metadata routed to "

1072 "`predict`, if that's an option."

1073 ),

1074 UserWarning,

1075 )

1076

1077 # override for transductive outlier detectors like LocalOulierFactor

1078 return self.fit(X, **kwargs).predict(X)

1079

1080

1081class MetaEstimatorMixin:

1082 _required_parameters = ["estimator"]

1083 """Mixin class for all meta estimators in scikit-learn."""

1084

1085

1086class MultiOutputMixin:

1087 """Mixin to mark estimators that support multioutput."""

1088

1089 def _more_tags(self):

1090 return {"multioutput": True}

1091

1092

1093class _UnstableArchMixin:

1094 """Mark estimators that are non-determinstic on 32bit or PowerPC"""

1095

1096 def _more_tags(self):

1097 return {

1098 "non_deterministic": _IS_32BIT or platform.machine().startswith(

1099 ("ppc", "powerpc")

1100 )

1101 }

1102

1103

1104def is_classifier(estimator):

1105 """Return True if the given estimator is (probably) a classifier.

1106

1107 Parameters

1108 ----------

1109 estimator : object

1110 Estimator object to test.

1111

1112 Returns

1113 -------

1114 out : bool

1115 True if estimator is a classifier and False otherwise.

1116 """

1117 return getattr(estimator, "_estimator_type", None) == "classifier"

1118

1119

1120def is_regressor(estimator):

1121 """Return True if the given estimator is (probably) a regressor.

1122

1123 Parameters

1124 ----------

1125 estimator : estimator instance

1126 Estimator object to test.

1127

1128 Returns

1129 -------

1130 out : bool

1131 True if estimator is a regressor and False otherwise.

1132 """

1133 return getattr(estimator, "_estimator_type", None) == "regressor"

1134

1135

1136def is_outlier_detector(estimator):

1137 """Return True if the given estimator is (probably) an outlier detector.

1138

1139 Parameters

1140 ----------

1141 estimator : estimator instance

1142 Estimator object to test.

1143

1144 Returns

1145 -------

1146 out : bool

1147 True if estimator is an outlier detector and False otherwise.

1148 """

1149 return getattr(estimator, "_estimator_type", None) == "outlier_detector"

1150

1151

1152def _fit_context(*, prefer_skip_nested_validation):

1153 """Decorator to run the fit methods of estimators within context managers.

1154

1155 Parameters

1156 ----------

1157 prefer_skip_nested_validation : bool

1158 If True, the validation of parameters of inner estimators or functions

1159 called during fit will be skipped.

1160

1161 This is useful to avoid validating many times the parameters passed by the

1162 user from the public facing API. It's also useful to avoid validating

1163 parameters that we pass internally to inner functions that are guaranteed to

1164 be valid by the test suite.

1165

1166 It should be set to True for most estimators, except for those that receive

1167 non-validated objects as parameters, such as meta-estimators that are given

1168 estimator objects.

1169

1170 Returns

1171 -------

1172 decorated_fit : method

1173 The decorated fit method.

1174 """

1175

1176 def decorator(fit_method):

1177 @functools.wraps(fit_method)

1178 def wrapper(estimator, *args, **kwargs):

1179 global_skip_validation = get_config()["skip_parameter_validation"]

1180

1181 # we don't want to validate again for each call to partial_fit

1182 partial_fit_and_fitted = (

1183 fit_method.__name__ == "partial_fit" and _is_fitted(estimator)

1184 )

1185

1186 if not global_skip_validation and not partial_fit_and_fitted:

1187 estimator._validate_params()

1188

1189 with config_context(

1190 skip_parameter_validation=(

1191 prefer_skip_nested_validation or global_skip_validation

1192 )

1193 ):

1194 return fit_method(estimator, *args, **kwargs)

1195

1196 return wrapper

1197

1198 return decorator

Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/scikit_learn-1.4.dev0-py3.8-linux-x86_64.egg/sklearn/base.py: 36%

337 statements