1from __future__ import annotations
2
3import operator
4from typing import (
5 TYPE_CHECKING,
6 Literal,
7 NoReturn,
8 cast,
9)
10
11import numpy as np
12
13from pandas._libs import lib
14from pandas._libs.missing import is_matching_na
15from pandas._libs.sparse import SparseIndex
16import pandas._libs.testing as _testing
17from pandas._libs.tslibs.np_datetime import compare_mismatched_resolutions
18
19from pandas.core.dtypes.common import (
20 is_bool,
21 is_float_dtype,
22 is_integer_dtype,
23 is_number,
24 is_numeric_dtype,
25 needs_i8_conversion,
26)
27from pandas.core.dtypes.dtypes import (
28 CategoricalDtype,
29 DatetimeTZDtype,
30 ExtensionDtype,
31 NumpyEADtype,
32)
33from pandas.core.dtypes.missing import array_equivalent
34
35import pandas as pd
36from pandas import (
37 Categorical,
38 DataFrame,
39 DatetimeIndex,
40 Index,
41 IntervalDtype,
42 IntervalIndex,
43 MultiIndex,
44 PeriodIndex,
45 RangeIndex,
46 Series,
47 TimedeltaIndex,
48)
49from pandas.core.arrays import (
50 DatetimeArray,
51 ExtensionArray,
52 IntervalArray,
53 PeriodArray,
54 TimedeltaArray,
55)
56from pandas.core.arrays.datetimelike import DatetimeLikeArrayMixin
57from pandas.core.arrays.string_ import StringDtype
58from pandas.core.indexes.api import safe_sort_index
59
60from pandas.io.formats.printing import pprint_thing
61
62if TYPE_CHECKING:
63 from pandas._typing import DtypeObj
64
65
66def assert_almost_equal(
67 left,
68 right,
69 check_dtype: bool | Literal["equiv"] = "equiv",
70 rtol: float = 1.0e-5,
71 atol: float = 1.0e-8,
72 **kwargs,
73) -> None:
74 """
75 Check that the left and right objects are approximately equal.
76
77 By approximately equal, we refer to objects that are numbers or that
78 contain numbers which may be equivalent to specific levels of precision.
79
80 Parameters
81 ----------
82 left : object
83 right : object
84 check_dtype : bool or {'equiv'}, default 'equiv'
85 Check dtype if both a and b are the same type. If 'equiv' is passed in,
86 then `RangeIndex` and `Index` with int64 dtype are also considered
87 equivalent when doing type checking.
88 rtol : float, default 1e-5
89 Relative tolerance.
90 atol : float, default 1e-8
91 Absolute tolerance.
92 """
93 if isinstance(left, Index):
94 assert_index_equal(
95 left,
96 right,
97 check_exact=False,
98 exact=check_dtype,
99 rtol=rtol,
100 atol=atol,
101 **kwargs,
102 )
103
104 elif isinstance(left, Series):
105 assert_series_equal(
106 left,
107 right,
108 check_exact=False,
109 check_dtype=check_dtype,
110 rtol=rtol,
111 atol=atol,
112 **kwargs,
113 )
114
115 elif isinstance(left, DataFrame):
116 assert_frame_equal(
117 left,
118 right,
119 check_exact=False,
120 check_dtype=check_dtype,
121 rtol=rtol,
122 atol=atol,
123 **kwargs,
124 )
125
126 else:
127 # Other sequences.
128 if check_dtype:
129 if is_number(left) and is_number(right):
130 # Do not compare numeric classes, like np.float64 and float.
131 pass
132 elif is_bool(left) and is_bool(right):
133 # Do not compare bool classes, like np.bool_ and bool.
134 pass
135 else:
136 if isinstance(left, np.ndarray) or isinstance(right, np.ndarray):
137 obj = "numpy array"
138 else:
139 obj = "Input"
140 assert_class_equal(left, right, obj=obj)
141
142 # if we have "equiv", this becomes True
143 _testing.assert_almost_equal(
144 left, right, check_dtype=bool(check_dtype), rtol=rtol, atol=atol, **kwargs
145 )
146
147
148def _check_isinstance(left, right, cls) -> None:
149 """
150 Helper method for our assert_* methods that ensures that
151 the two objects being compared have the right type before
152 proceeding with the comparison.
153
154 Parameters
155 ----------
156 left : The first object being compared.
157 right : The second object being compared.
158 cls : The class type to check against.
159
160 Raises
161 ------
162 AssertionError : Either `left` or `right` is not an instance of `cls`.
163 """
164 cls_name = cls.__name__
165
166 if not isinstance(left, cls):
167 raise AssertionError(
168 f"{cls_name} Expected type {cls}, found {type(left)} instead"
169 )
170 if not isinstance(right, cls):
171 raise AssertionError(
172 f"{cls_name} Expected type {cls}, found {type(right)} instead"
173 )
174
175
176def assert_dict_equal(left, right, compare_keys: bool = True) -> None:
177 _check_isinstance(left, right, dict)
178 _testing.assert_dict_equal(left, right, compare_keys=compare_keys)
179
180
181def assert_index_equal(
182 left: Index,
183 right: Index,
184 exact: bool | str = "equiv",
185 check_names: bool = True,
186 check_exact: bool = True,
187 check_categorical: bool = True,
188 check_order: bool = True,
189 rtol: float = 1.0e-5,
190 atol: float = 1.0e-8,
191 obj: str = "Index",
192) -> None:
193 """
194 Check that left and right Index are equal.
195
196 Parameters
197 ----------
198 left : Index
199 right : Index
200 exact : bool or {'equiv'}, default 'equiv'
201 Whether to check the Index class, dtype and inferred_type
202 are identical. If 'equiv', then RangeIndex can be substituted for
203 Index with an int64 dtype as well.
204 check_names : bool, default True
205 Whether to check the names attribute.
206 check_exact : bool, default True
207 Whether to compare number exactly.
208 check_categorical : bool, default True
209 Whether to compare internal Categorical exactly.
210 check_order : bool, default True
211 Whether to compare the order of index entries as well as their values.
212 If True, both indexes must contain the same elements, in the same order.
213 If False, both indexes must contain the same elements, but in any order.
214 rtol : float, default 1e-5
215 Relative tolerance. Only used when check_exact is False.
216 atol : float, default 1e-8
217 Absolute tolerance. Only used when check_exact is False.
218 obj : str, default 'Index'
219 Specify object name being compared, internally used to show appropriate
220 assertion message.
221
222 Examples
223 --------
224 >>> from pandas import testing as tm
225 >>> a = pd.Index([1, 2, 3])
226 >>> b = pd.Index([1, 2, 3])
227 >>> tm.assert_index_equal(a, b)
228 """
229 __tracebackhide__ = True
230
231 def _check_types(left, right, obj: str = "Index") -> None:
232 if not exact:
233 return
234
235 assert_class_equal(left, right, exact=exact, obj=obj)
236 assert_attr_equal("inferred_type", left, right, obj=obj)
237
238 # Skip exact dtype checking when `check_categorical` is False
239 if isinstance(left.dtype, CategoricalDtype) and isinstance(
240 right.dtype, CategoricalDtype
241 ):
242 if check_categorical:
243 assert_attr_equal("dtype", left, right, obj=obj)
244 assert_index_equal(left.categories, right.categories, exact=exact)
245 return
246
247 assert_attr_equal("dtype", left, right, obj=obj)
248
249 # instance validation
250 _check_isinstance(left, right, Index)
251
252 # class / dtype comparison
253 _check_types(left, right, obj=obj)
254
255 # level comparison
256 if left.nlevels != right.nlevels:
257 msg1 = f"{obj} levels are different"
258 msg2 = f"{left.nlevels}, {left}"
259 msg3 = f"{right.nlevels}, {right}"
260 raise_assert_detail(obj, msg1, msg2, msg3)
261
262 # length comparison
263 if len(left) != len(right):
264 msg1 = f"{obj} length are different"
265 msg2 = f"{len(left)}, {left}"
266 msg3 = f"{len(right)}, {right}"
267 raise_assert_detail(obj, msg1, msg2, msg3)
268
269 # If order doesn't matter then sort the index entries
270 if not check_order:
271 left = safe_sort_index(left)
272 right = safe_sort_index(right)
273
274 # MultiIndex special comparison for little-friendly error messages
275 if isinstance(left, MultiIndex):
276 right = cast(MultiIndex, right)
277
278 for level in range(left.nlevels):
279 lobj = f"MultiIndex level [{level}]"
280 try:
281 # try comparison on levels/codes to avoid densifying MultiIndex
282 assert_index_equal(
283 left.levels[level],
284 right.levels[level],
285 exact=exact,
286 check_names=check_names,
287 check_exact=check_exact,
288 check_categorical=check_categorical,
289 rtol=rtol,
290 atol=atol,
291 obj=lobj,
292 )
293 assert_numpy_array_equal(left.codes[level], right.codes[level])
294 except AssertionError:
295 llevel = left.get_level_values(level)
296 rlevel = right.get_level_values(level)
297
298 assert_index_equal(
299 llevel,
300 rlevel,
301 exact=exact,
302 check_names=check_names,
303 check_exact=check_exact,
304 check_categorical=check_categorical,
305 rtol=rtol,
306 atol=atol,
307 obj=lobj,
308 )
309 # get_level_values may change dtype
310 _check_types(left.levels[level], right.levels[level], obj=obj)
311
312 # skip exact index checking when `check_categorical` is False
313 elif check_exact and check_categorical:
314 if not left.equals(right):
315 mismatch = left._values != right._values
316
317 if not isinstance(mismatch, np.ndarray):
318 mismatch = cast("ExtensionArray", mismatch).fillna(True)
319
320 diff = np.sum(mismatch.astype(int)) * 100.0 / len(left)
321 msg = f"{obj} values are different ({np.round(diff, 5)} %)"
322 raise_assert_detail(obj, msg, left, right)
323 else:
324 # if we have "equiv", this becomes True
325 exact_bool = bool(exact)
326 _testing.assert_almost_equal(
327 left.values,
328 right.values,
329 rtol=rtol,
330 atol=atol,
331 check_dtype=exact_bool,
332 obj=obj,
333 lobj=left,
334 robj=right,
335 )
336
337 # metadata comparison
338 if check_names:
339 assert_attr_equal("names", left, right, obj=obj)
340 if isinstance(left, PeriodIndex) or isinstance(right, PeriodIndex):
341 assert_attr_equal("dtype", left, right, obj=obj)
342 if isinstance(left, IntervalIndex) or isinstance(right, IntervalIndex):
343 assert_interval_array_equal(left._values, right._values)
344
345 if check_categorical:
346 if isinstance(left.dtype, CategoricalDtype) or isinstance(
347 right.dtype, CategoricalDtype
348 ):
349 assert_categorical_equal(left._values, right._values, obj=f"{obj} category")
350
351
352def assert_class_equal(
353 left, right, exact: bool | str = True, obj: str = "Input"
354) -> None:
355 """
356 Checks classes are equal.
357 """
358 __tracebackhide__ = True
359
360 def repr_class(x):
361 if isinstance(x, Index):
362 # return Index as it is to include values in the error message
363 return x
364
365 return type(x).__name__
366
367 def is_class_equiv(idx: Index) -> bool:
368 """Classes that are a RangeIndex (sub-)instance or exactly an `Index` .
369
370 This only checks class equivalence. There is a separate check that the
371 dtype is int64.
372 """
373 return type(idx) is Index or isinstance(idx, RangeIndex)
374
375 if type(left) == type(right):
376 return
377
378 if exact == "equiv":
379 if is_class_equiv(left) and is_class_equiv(right):
380 return
381
382 msg = f"{obj} classes are different"
383 raise_assert_detail(obj, msg, repr_class(left), repr_class(right))
384
385
386def assert_attr_equal(attr: str, left, right, obj: str = "Attributes") -> None:
387 """
388 Check attributes are equal. Both objects must have attribute.
389
390 Parameters
391 ----------
392 attr : str
393 Attribute name being compared.
394 left : object
395 right : object
396 obj : str, default 'Attributes'
397 Specify object name being compared, internally used to show appropriate
398 assertion message
399 """
400 __tracebackhide__ = True
401
402 left_attr = getattr(left, attr)
403 right_attr = getattr(right, attr)
404
405 if left_attr is right_attr or is_matching_na(left_attr, right_attr):
406 # e.g. both np.nan, both NaT, both pd.NA, ...
407 return None
408
409 try:
410 result = left_attr == right_attr
411 except TypeError:
412 # datetimetz on rhs may raise TypeError
413 result = False
414 if (left_attr is pd.NA) ^ (right_attr is pd.NA):
415 result = False
416 elif not isinstance(result, bool):
417 result = result.all()
418
419 if not result:
420 msg = f'Attribute "{attr}" are different'
421 raise_assert_detail(obj, msg, left_attr, right_attr)
422 return None
423
424
425def assert_is_valid_plot_return_object(objs) -> None:
426 from matplotlib.artist import Artist
427 from matplotlib.axes import Axes
428
429 if isinstance(objs, (Series, np.ndarray)):
430 if isinstance(objs, Series):
431 objs = objs._values
432 for el in objs.ravel():
433 msg = (
434 "one of 'objs' is not a matplotlib Axes instance, "
435 f"type encountered {repr(type(el).__name__)}"
436 )
437 assert isinstance(el, (Axes, dict)), msg
438 else:
439 msg = (
440 "objs is neither an ndarray of Artist instances nor a single "
441 "ArtistArtist instance, tuple, or dict, 'objs' is a "
442 f"{repr(type(objs).__name__)}"
443 )
444 assert isinstance(objs, (Artist, tuple, dict)), msg
445
446
447def assert_is_sorted(seq) -> None:
448 """Assert that the sequence is sorted."""
449 if isinstance(seq, (Index, Series)):
450 seq = seq.values
451 # sorting does not change precisions
452 if isinstance(seq, np.ndarray):
453 assert_numpy_array_equal(seq, np.sort(np.array(seq)))
454 else:
455 assert_extension_array_equal(seq, seq[seq.argsort()])
456
457
458def assert_categorical_equal(
459 left,
460 right,
461 check_dtype: bool = True,
462 check_category_order: bool = True,
463 obj: str = "Categorical",
464) -> None:
465 """
466 Test that Categoricals are equivalent.
467
468 Parameters
469 ----------
470 left : Categorical
471 right : Categorical
472 check_dtype : bool, default True
473 Check that integer dtype of the codes are the same.
474 check_category_order : bool, default True
475 Whether the order of the categories should be compared, which
476 implies identical integer codes. If False, only the resulting
477 values are compared. The ordered attribute is
478 checked regardless.
479 obj : str, default 'Categorical'
480 Specify object name being compared, internally used to show appropriate
481 assertion message.
482 """
483 _check_isinstance(left, right, Categorical)
484
485 exact: bool | str
486 if isinstance(left.categories, RangeIndex) or isinstance(
487 right.categories, RangeIndex
488 ):
489 exact = "equiv"
490 else:
491 # We still want to require exact matches for Index
492 exact = True
493
494 if check_category_order:
495 assert_index_equal(
496 left.categories, right.categories, obj=f"{obj}.categories", exact=exact
497 )
498 assert_numpy_array_equal(
499 left.codes, right.codes, check_dtype=check_dtype, obj=f"{obj}.codes"
500 )
501 else:
502 try:
503 lc = left.categories.sort_values()
504 rc = right.categories.sort_values()
505 except TypeError:
506 # e.g. '<' not supported between instances of 'int' and 'str'
507 lc, rc = left.categories, right.categories
508 assert_index_equal(lc, rc, obj=f"{obj}.categories", exact=exact)
509 assert_index_equal(
510 left.categories.take(left.codes),
511 right.categories.take(right.codes),
512 obj=f"{obj}.values",
513 exact=exact,
514 )
515
516 assert_attr_equal("ordered", left, right, obj=obj)
517
518
519def assert_interval_array_equal(
520 left, right, exact: bool | Literal["equiv"] = "equiv", obj: str = "IntervalArray"
521) -> None:
522 """
523 Test that two IntervalArrays are equivalent.
524
525 Parameters
526 ----------
527 left, right : IntervalArray
528 The IntervalArrays to compare.
529 exact : bool or {'equiv'}, default 'equiv'
530 Whether to check the Index class, dtype and inferred_type
531 are identical. If 'equiv', then RangeIndex can be substituted for
532 Index with an int64 dtype as well.
533 obj : str, default 'IntervalArray'
534 Specify object name being compared, internally used to show appropriate
535 assertion message
536 """
537 _check_isinstance(left, right, IntervalArray)
538
539 kwargs = {}
540 if left._left.dtype.kind in "mM":
541 # We have a DatetimeArray or TimedeltaArray
542 kwargs["check_freq"] = False
543
544 assert_equal(left._left, right._left, obj=f"{obj}.left", **kwargs)
545 assert_equal(left._right, right._right, obj=f"{obj}.left", **kwargs)
546
547 assert_attr_equal("closed", left, right, obj=obj)
548
549
550def assert_period_array_equal(left, right, obj: str = "PeriodArray") -> None:
551 _check_isinstance(left, right, PeriodArray)
552
553 assert_numpy_array_equal(left._ndarray, right._ndarray, obj=f"{obj}._ndarray")
554 assert_attr_equal("dtype", left, right, obj=obj)
555
556
557def assert_datetime_array_equal(
558 left, right, obj: str = "DatetimeArray", check_freq: bool = True
559) -> None:
560 __tracebackhide__ = True
561 _check_isinstance(left, right, DatetimeArray)
562
563 assert_numpy_array_equal(left._ndarray, right._ndarray, obj=f"{obj}._ndarray")
564 if check_freq:
565 assert_attr_equal("freq", left, right, obj=obj)
566 assert_attr_equal("tz", left, right, obj=obj)
567
568
569def assert_timedelta_array_equal(
570 left, right, obj: str = "TimedeltaArray", check_freq: bool = True
571) -> None:
572 __tracebackhide__ = True
573 _check_isinstance(left, right, TimedeltaArray)
574 assert_numpy_array_equal(left._ndarray, right._ndarray, obj=f"{obj}._ndarray")
575 if check_freq:
576 assert_attr_equal("freq", left, right, obj=obj)
577
578
579def raise_assert_detail(
580 obj, message, left, right, diff=None, first_diff=None, index_values=None
581) -> NoReturn:
582 __tracebackhide__ = True
583
584 msg = f"""{obj} are different
585
586{message}"""
587
588 if isinstance(index_values, Index):
589 index_values = np.asarray(index_values)
590
591 if isinstance(index_values, np.ndarray):
592 msg += f"\n[index]: {pprint_thing(index_values)}"
593
594 if isinstance(left, np.ndarray):
595 left = pprint_thing(left)
596 elif isinstance(left, (CategoricalDtype, NumpyEADtype, StringDtype)):
597 left = repr(left)
598
599 if isinstance(right, np.ndarray):
600 right = pprint_thing(right)
601 elif isinstance(right, (CategoricalDtype, NumpyEADtype, StringDtype)):
602 right = repr(right)
603
604 msg += f"""
605[left]: {left}
606[right]: {right}"""
607
608 if diff is not None:
609 msg += f"\n[diff]: {diff}"
610
611 if first_diff is not None:
612 msg += f"\n{first_diff}"
613
614 raise AssertionError(msg)
615
616
617def assert_numpy_array_equal(
618 left,
619 right,
620 strict_nan: bool = False,
621 check_dtype: bool | Literal["equiv"] = True,
622 err_msg=None,
623 check_same=None,
624 obj: str = "numpy array",
625 index_values=None,
626) -> None:
627 """
628 Check that 'np.ndarray' is equivalent.
629
630 Parameters
631 ----------
632 left, right : numpy.ndarray or iterable
633 The two arrays to be compared.
634 strict_nan : bool, default False
635 If True, consider NaN and None to be different.
636 check_dtype : bool, default True
637 Check dtype if both a and b are np.ndarray.
638 err_msg : str, default None
639 If provided, used as assertion message.
640 check_same : None|'copy'|'same', default None
641 Ensure left and right refer/do not refer to the same memory area.
642 obj : str, default 'numpy array'
643 Specify object name being compared, internally used to show appropriate
644 assertion message.
645 index_values : Index | numpy.ndarray, default None
646 optional index (shared by both left and right), used in output.
647 """
648 __tracebackhide__ = True
649
650 # instance validation
651 # Show a detailed error message when classes are different
652 assert_class_equal(left, right, obj=obj)
653 # both classes must be an np.ndarray
654 _check_isinstance(left, right, np.ndarray)
655
656 def _get_base(obj):
657 return obj.base if getattr(obj, "base", None) is not None else obj
658
659 left_base = _get_base(left)
660 right_base = _get_base(right)
661
662 if check_same == "same":
663 if left_base is not right_base:
664 raise AssertionError(f"{repr(left_base)} is not {repr(right_base)}")
665 elif check_same == "copy":
666 if left_base is right_base:
667 raise AssertionError(f"{repr(left_base)} is {repr(right_base)}")
668
669 def _raise(left, right, err_msg) -> NoReturn:
670 if err_msg is None:
671 if left.shape != right.shape:
672 raise_assert_detail(
673 obj, f"{obj} shapes are different", left.shape, right.shape
674 )
675
676 diff = 0
677 for left_arr, right_arr in zip(left, right):
678 # count up differences
679 if not array_equivalent(left_arr, right_arr, strict_nan=strict_nan):
680 diff += 1
681
682 diff = diff * 100.0 / left.size
683 msg = f"{obj} values are different ({np.round(diff, 5)} %)"
684 raise_assert_detail(obj, msg, left, right, index_values=index_values)
685
686 raise AssertionError(err_msg)
687
688 # compare shape and values
689 if not array_equivalent(left, right, strict_nan=strict_nan):
690 _raise(left, right, err_msg)
691
692 if check_dtype:
693 if isinstance(left, np.ndarray) and isinstance(right, np.ndarray):
694 assert_attr_equal("dtype", left, right, obj=obj)
695
696
697def assert_extension_array_equal(
698 left,
699 right,
700 check_dtype: bool | Literal["equiv"] = True,
701 index_values=None,
702 check_exact: bool | lib.NoDefault = lib.no_default,
703 rtol: float | lib.NoDefault = lib.no_default,
704 atol: float | lib.NoDefault = lib.no_default,
705 obj: str = "ExtensionArray",
706) -> None:
707 """
708 Check that left and right ExtensionArrays are equal.
709
710 Parameters
711 ----------
712 left, right : ExtensionArray
713 The two arrays to compare.
714 check_dtype : bool, default True
715 Whether to check if the ExtensionArray dtypes are identical.
716 index_values : Index | numpy.ndarray, default None
717 Optional index (shared by both left and right), used in output.
718 check_exact : bool, default False
719 Whether to compare number exactly.
720
721 .. versionchanged:: 2.2.0
722
723 Defaults to True for integer dtypes if none of
724 ``check_exact``, ``rtol`` and ``atol`` are specified.
725 rtol : float, default 1e-5
726 Relative tolerance. Only used when check_exact is False.
727 atol : float, default 1e-8
728 Absolute tolerance. Only used when check_exact is False.
729 obj : str, default 'ExtensionArray'
730 Specify object name being compared, internally used to show appropriate
731 assertion message.
732
733 .. versionadded:: 2.0.0
734
735 Notes
736 -----
737 Missing values are checked separately from valid values.
738 A mask of missing values is computed for each and checked to match.
739 The remaining all-valid values are cast to object dtype and checked.
740
741 Examples
742 --------
743 >>> from pandas import testing as tm
744 >>> a = pd.Series([1, 2, 3, 4])
745 >>> b, c = a.array, a.array
746 >>> tm.assert_extension_array_equal(b, c)
747 """
748 if (
749 check_exact is lib.no_default
750 and rtol is lib.no_default
751 and atol is lib.no_default
752 ):
753 check_exact = (
754 is_numeric_dtype(left.dtype)
755 and not is_float_dtype(left.dtype)
756 or is_numeric_dtype(right.dtype)
757 and not is_float_dtype(right.dtype)
758 )
759 elif check_exact is lib.no_default:
760 check_exact = False
761
762 rtol = rtol if rtol is not lib.no_default else 1.0e-5
763 atol = atol if atol is not lib.no_default else 1.0e-8
764
765 assert isinstance(left, ExtensionArray), "left is not an ExtensionArray"
766 assert isinstance(right, ExtensionArray), "right is not an ExtensionArray"
767 if check_dtype:
768 assert_attr_equal("dtype", left, right, obj=f"Attributes of {obj}")
769
770 if (
771 isinstance(left, DatetimeLikeArrayMixin)
772 and isinstance(right, DatetimeLikeArrayMixin)
773 and type(right) == type(left)
774 ):
775 # GH 52449
776 if not check_dtype and left.dtype.kind in "mM":
777 if not isinstance(left.dtype, np.dtype):
778 l_unit = cast(DatetimeTZDtype, left.dtype).unit
779 else:
780 l_unit = np.datetime_data(left.dtype)[0]
781 if not isinstance(right.dtype, np.dtype):
782 r_unit = cast(DatetimeTZDtype, right.dtype).unit
783 else:
784 r_unit = np.datetime_data(right.dtype)[0]
785 if (
786 l_unit != r_unit
787 and compare_mismatched_resolutions(
788 left._ndarray, right._ndarray, operator.eq
789 ).all()
790 ):
791 return
792 # Avoid slow object-dtype comparisons
793 # np.asarray for case where we have a np.MaskedArray
794 assert_numpy_array_equal(
795 np.asarray(left.asi8),
796 np.asarray(right.asi8),
797 index_values=index_values,
798 obj=obj,
799 )
800 return
801
802 left_na = np.asarray(left.isna())
803 right_na = np.asarray(right.isna())
804 assert_numpy_array_equal(
805 left_na, right_na, obj=f"{obj} NA mask", index_values=index_values
806 )
807
808 left_valid = left[~left_na].to_numpy(dtype=object)
809 right_valid = right[~right_na].to_numpy(dtype=object)
810 if check_exact:
811 assert_numpy_array_equal(
812 left_valid, right_valid, obj=obj, index_values=index_values
813 )
814 else:
815 _testing.assert_almost_equal(
816 left_valid,
817 right_valid,
818 check_dtype=bool(check_dtype),
819 rtol=rtol,
820 atol=atol,
821 obj=obj,
822 index_values=index_values,
823 )
824
825
826# This could be refactored to use the NDFrame.equals method
827def assert_series_equal(
828 left,
829 right,
830 check_dtype: bool | Literal["equiv"] = True,
831 check_index_type: bool | Literal["equiv"] = "equiv",
832 check_series_type: bool = True,
833 check_names: bool = True,
834 check_exact: bool | lib.NoDefault = lib.no_default,
835 check_datetimelike_compat: bool = False,
836 check_categorical: bool = True,
837 check_category_order: bool = True,
838 check_freq: bool = True,
839 check_flags: bool = True,
840 rtol: float | lib.NoDefault = lib.no_default,
841 atol: float | lib.NoDefault = lib.no_default,
842 obj: str = "Series",
843 *,
844 check_index: bool = True,
845 check_like: bool = False,
846) -> None:
847 """
848 Check that left and right Series are equal.
849
850 Parameters
851 ----------
852 left : Series
853 right : Series
854 check_dtype : bool, default True
855 Whether to check the Series dtype is identical.
856 check_index_type : bool or {'equiv'}, default 'equiv'
857 Whether to check the Index class, dtype and inferred_type
858 are identical.
859 check_series_type : bool, default True
860 Whether to check the Series class is identical.
861 check_names : bool, default True
862 Whether to check the Series and Index names attribute.
863 check_exact : bool, default False
864 Whether to compare number exactly.
865
866 .. versionchanged:: 2.2.0
867
868 Defaults to True for integer dtypes if none of
869 ``check_exact``, ``rtol`` and ``atol`` are specified.
870 check_datetimelike_compat : bool, default False
871 Compare datetime-like which is comparable ignoring dtype.
872 check_categorical : bool, default True
873 Whether to compare internal Categorical exactly.
874 check_category_order : bool, default True
875 Whether to compare category order of internal Categoricals.
876 check_freq : bool, default True
877 Whether to check the `freq` attribute on a DatetimeIndex or TimedeltaIndex.
878 check_flags : bool, default True
879 Whether to check the `flags` attribute.
880 rtol : float, default 1e-5
881 Relative tolerance. Only used when check_exact is False.
882 atol : float, default 1e-8
883 Absolute tolerance. Only used when check_exact is False.
884 obj : str, default 'Series'
885 Specify object name being compared, internally used to show appropriate
886 assertion message.
887 check_index : bool, default True
888 Whether to check index equivalence. If False, then compare only values.
889
890 .. versionadded:: 1.3.0
891 check_like : bool, default False
892 If True, ignore the order of the index. Must be False if check_index is False.
893 Note: same labels must be with the same data.
894
895 .. versionadded:: 1.5.0
896
897 Examples
898 --------
899 >>> from pandas import testing as tm
900 >>> a = pd.Series([1, 2, 3, 4])
901 >>> b = pd.Series([1, 2, 3, 4])
902 >>> tm.assert_series_equal(a, b)
903 """
904 __tracebackhide__ = True
905 check_exact_index = False if check_exact is lib.no_default else check_exact
906 if (
907 check_exact is lib.no_default
908 and rtol is lib.no_default
909 and atol is lib.no_default
910 ):
911 check_exact = (
912 is_numeric_dtype(left.dtype)
913 and not is_float_dtype(left.dtype)
914 or is_numeric_dtype(right.dtype)
915 and not is_float_dtype(right.dtype)
916 )
917 elif check_exact is lib.no_default:
918 check_exact = False
919
920 rtol = rtol if rtol is not lib.no_default else 1.0e-5
921 atol = atol if atol is not lib.no_default else 1.0e-8
922
923 if not check_index and check_like:
924 raise ValueError("check_like must be False if check_index is False")
925
926 # instance validation
927 _check_isinstance(left, right, Series)
928
929 if check_series_type:
930 assert_class_equal(left, right, obj=obj)
931
932 # length comparison
933 if len(left) != len(right):
934 msg1 = f"{len(left)}, {left.index}"
935 msg2 = f"{len(right)}, {right.index}"
936 raise_assert_detail(obj, "Series length are different", msg1, msg2)
937
938 if check_flags:
939 assert left.flags == right.flags, f"{repr(left.flags)} != {repr(right.flags)}"
940
941 if check_index:
942 # GH #38183
943 assert_index_equal(
944 left.index,
945 right.index,
946 exact=check_index_type,
947 check_names=check_names,
948 check_exact=check_exact_index,
949 check_categorical=check_categorical,
950 check_order=not check_like,
951 rtol=rtol,
952 atol=atol,
953 obj=f"{obj}.index",
954 )
955
956 if check_like:
957 left = left.reindex_like(right)
958
959 if check_freq and isinstance(left.index, (DatetimeIndex, TimedeltaIndex)):
960 lidx = left.index
961 ridx = right.index
962 assert lidx.freq == ridx.freq, (lidx.freq, ridx.freq)
963
964 if check_dtype:
965 # We want to skip exact dtype checking when `check_categorical`
966 # is False. We'll still raise if only one is a `Categorical`,
967 # regardless of `check_categorical`
968 if (
969 isinstance(left.dtype, CategoricalDtype)
970 and isinstance(right.dtype, CategoricalDtype)
971 and not check_categorical
972 ):
973 pass
974 else:
975 assert_attr_equal("dtype", left, right, obj=f"Attributes of {obj}")
976 if check_exact:
977 left_values = left._values
978 right_values = right._values
979 # Only check exact if dtype is numeric
980 if isinstance(left_values, ExtensionArray) and isinstance(
981 right_values, ExtensionArray
982 ):
983 assert_extension_array_equal(
984 left_values,
985 right_values,
986 check_dtype=check_dtype,
987 index_values=left.index,
988 obj=str(obj),
989 )
990 else:
991 # convert both to NumPy if not, check_dtype would raise earlier
992 lv, rv = left_values, right_values
993 if isinstance(left_values, ExtensionArray):
994 lv = left_values.to_numpy()
995 if isinstance(right_values, ExtensionArray):
996 rv = right_values.to_numpy()
997 assert_numpy_array_equal(
998 lv,
999 rv,
1000 check_dtype=check_dtype,
1001 obj=str(obj),
1002 index_values=left.index,
1003 )
1004 elif check_datetimelike_compat and (
1005 needs_i8_conversion(left.dtype) or needs_i8_conversion(right.dtype)
1006 ):
1007 # we want to check only if we have compat dtypes
1008 # e.g. integer and M|m are NOT compat, but we can simply check
1009 # the values in that case
1010
1011 # datetimelike may have different objects (e.g. datetime.datetime
1012 # vs Timestamp) but will compare equal
1013 if not Index(left._values).equals(Index(right._values)):
1014 msg = (
1015 f"[datetimelike_compat=True] {left._values} "
1016 f"is not equal to {right._values}."
1017 )
1018 raise AssertionError(msg)
1019 elif isinstance(left.dtype, IntervalDtype) and isinstance(
1020 right.dtype, IntervalDtype
1021 ):
1022 assert_interval_array_equal(left.array, right.array)
1023 elif isinstance(left.dtype, CategoricalDtype) or isinstance(
1024 right.dtype, CategoricalDtype
1025 ):
1026 _testing.assert_almost_equal(
1027 left._values,
1028 right._values,
1029 rtol=rtol,
1030 atol=atol,
1031 check_dtype=bool(check_dtype),
1032 obj=str(obj),
1033 index_values=left.index,
1034 )
1035 elif isinstance(left.dtype, ExtensionDtype) and isinstance(
1036 right.dtype, ExtensionDtype
1037 ):
1038 assert_extension_array_equal(
1039 left._values,
1040 right._values,
1041 rtol=rtol,
1042 atol=atol,
1043 check_dtype=check_dtype,
1044 index_values=left.index,
1045 obj=str(obj),
1046 )
1047 elif is_extension_array_dtype_and_needs_i8_conversion(
1048 left.dtype, right.dtype
1049 ) or is_extension_array_dtype_and_needs_i8_conversion(right.dtype, left.dtype):
1050 assert_extension_array_equal(
1051 left._values,
1052 right._values,
1053 check_dtype=check_dtype,
1054 index_values=left.index,
1055 obj=str(obj),
1056 )
1057 elif needs_i8_conversion(left.dtype) and needs_i8_conversion(right.dtype):
1058 # DatetimeArray or TimedeltaArray
1059 assert_extension_array_equal(
1060 left._values,
1061 right._values,
1062 check_dtype=check_dtype,
1063 index_values=left.index,
1064 obj=str(obj),
1065 )
1066 else:
1067 _testing.assert_almost_equal(
1068 left._values,
1069 right._values,
1070 rtol=rtol,
1071 atol=atol,
1072 check_dtype=bool(check_dtype),
1073 obj=str(obj),
1074 index_values=left.index,
1075 )
1076
1077 # metadata comparison
1078 if check_names:
1079 assert_attr_equal("name", left, right, obj=obj)
1080
1081 if check_categorical:
1082 if isinstance(left.dtype, CategoricalDtype) or isinstance(
1083 right.dtype, CategoricalDtype
1084 ):
1085 assert_categorical_equal(
1086 left._values,
1087 right._values,
1088 obj=f"{obj} category",
1089 check_category_order=check_category_order,
1090 )
1091
1092
1093# This could be refactored to use the NDFrame.equals method
1094def assert_frame_equal(
1095 left,
1096 right,
1097 check_dtype: bool | Literal["equiv"] = True,
1098 check_index_type: bool | Literal["equiv"] = "equiv",
1099 check_column_type: bool | Literal["equiv"] = "equiv",
1100 check_frame_type: bool = True,
1101 check_names: bool = True,
1102 by_blocks: bool = False,
1103 check_exact: bool | lib.NoDefault = lib.no_default,
1104 check_datetimelike_compat: bool = False,
1105 check_categorical: bool = True,
1106 check_like: bool = False,
1107 check_freq: bool = True,
1108 check_flags: bool = True,
1109 rtol: float | lib.NoDefault = lib.no_default,
1110 atol: float | lib.NoDefault = lib.no_default,
1111 obj: str = "DataFrame",
1112) -> None:
1113 """
1114 Check that left and right DataFrame are equal.
1115
1116 This function is intended to compare two DataFrames and output any
1117 differences. It is mostly intended for use in unit tests.
1118 Additional parameters allow varying the strictness of the
1119 equality checks performed.
1120
1121 Parameters
1122 ----------
1123 left : DataFrame
1124 First DataFrame to compare.
1125 right : DataFrame
1126 Second DataFrame to compare.
1127 check_dtype : bool, default True
1128 Whether to check the DataFrame dtype is identical.
1129 check_index_type : bool or {'equiv'}, default 'equiv'
1130 Whether to check the Index class, dtype and inferred_type
1131 are identical.
1132 check_column_type : bool or {'equiv'}, default 'equiv'
1133 Whether to check the columns class, dtype and inferred_type
1134 are identical. Is passed as the ``exact`` argument of
1135 :func:`assert_index_equal`.
1136 check_frame_type : bool, default True
1137 Whether to check the DataFrame class is identical.
1138 check_names : bool, default True
1139 Whether to check that the `names` attribute for both the `index`
1140 and `column` attributes of the DataFrame is identical.
1141 by_blocks : bool, default False
1142 Specify how to compare internal data. If False, compare by columns.
1143 If True, compare by blocks.
1144 check_exact : bool, default False
1145 Whether to compare number exactly.
1146
1147 .. versionchanged:: 2.2.0
1148
1149 Defaults to True for integer dtypes if none of
1150 ``check_exact``, ``rtol`` and ``atol`` are specified.
1151 check_datetimelike_compat : bool, default False
1152 Compare datetime-like which is comparable ignoring dtype.
1153 check_categorical : bool, default True
1154 Whether to compare internal Categorical exactly.
1155 check_like : bool, default False
1156 If True, ignore the order of index & columns.
1157 Note: index labels must match their respective rows
1158 (same as in columns) - same labels must be with the same data.
1159 check_freq : bool, default True
1160 Whether to check the `freq` attribute on a DatetimeIndex or TimedeltaIndex.
1161 check_flags : bool, default True
1162 Whether to check the `flags` attribute.
1163 rtol : float, default 1e-5
1164 Relative tolerance. Only used when check_exact is False.
1165 atol : float, default 1e-8
1166 Absolute tolerance. Only used when check_exact is False.
1167 obj : str, default 'DataFrame'
1168 Specify object name being compared, internally used to show appropriate
1169 assertion message.
1170
1171 See Also
1172 --------
1173 assert_series_equal : Equivalent method for asserting Series equality.
1174 DataFrame.equals : Check DataFrame equality.
1175
1176 Examples
1177 --------
1178 This example shows comparing two DataFrames that are equal
1179 but with columns of differing dtypes.
1180
1181 >>> from pandas.testing import assert_frame_equal
1182 >>> df1 = pd.DataFrame({'a': [1, 2], 'b': [3, 4]})
1183 >>> df2 = pd.DataFrame({'a': [1, 2], 'b': [3.0, 4.0]})
1184
1185 df1 equals itself.
1186
1187 >>> assert_frame_equal(df1, df1)
1188
1189 df1 differs from df2 as column 'b' is of a different type.
1190
1191 >>> assert_frame_equal(df1, df2)
1192 Traceback (most recent call last):
1193 ...
1194 AssertionError: Attributes of DataFrame.iloc[:, 1] (column name="b") are different
1195
1196 Attribute "dtype" are different
1197 [left]: int64
1198 [right]: float64
1199
1200 Ignore differing dtypes in columns with check_dtype.
1201
1202 >>> assert_frame_equal(df1, df2, check_dtype=False)
1203 """
1204 __tracebackhide__ = True
1205 _rtol = rtol if rtol is not lib.no_default else 1.0e-5
1206 _atol = atol if atol is not lib.no_default else 1.0e-8
1207 _check_exact = check_exact if check_exact is not lib.no_default else False
1208
1209 # instance validation
1210 _check_isinstance(left, right, DataFrame)
1211
1212 if check_frame_type:
1213 assert isinstance(left, type(right))
1214 # assert_class_equal(left, right, obj=obj)
1215
1216 # shape comparison
1217 if left.shape != right.shape:
1218 raise_assert_detail(
1219 obj, f"{obj} shape mismatch", f"{repr(left.shape)}", f"{repr(right.shape)}"
1220 )
1221
1222 if check_flags:
1223 assert left.flags == right.flags, f"{repr(left.flags)} != {repr(right.flags)}"
1224
1225 # index comparison
1226 assert_index_equal(
1227 left.index,
1228 right.index,
1229 exact=check_index_type,
1230 check_names=check_names,
1231 check_exact=_check_exact,
1232 check_categorical=check_categorical,
1233 check_order=not check_like,
1234 rtol=_rtol,
1235 atol=_atol,
1236 obj=f"{obj}.index",
1237 )
1238
1239 # column comparison
1240 assert_index_equal(
1241 left.columns,
1242 right.columns,
1243 exact=check_column_type,
1244 check_names=check_names,
1245 check_exact=_check_exact,
1246 check_categorical=check_categorical,
1247 check_order=not check_like,
1248 rtol=_rtol,
1249 atol=_atol,
1250 obj=f"{obj}.columns",
1251 )
1252
1253 if check_like:
1254 left = left.reindex_like(right)
1255
1256 # compare by blocks
1257 if by_blocks:
1258 rblocks = right._to_dict_of_blocks()
1259 lblocks = left._to_dict_of_blocks()
1260 for dtype in list(set(list(lblocks.keys()) + list(rblocks.keys()))):
1261 assert dtype in lblocks
1262 assert dtype in rblocks
1263 assert_frame_equal(
1264 lblocks[dtype], rblocks[dtype], check_dtype=check_dtype, obj=obj
1265 )
1266
1267 # compare by columns
1268 else:
1269 for i, col in enumerate(left.columns):
1270 # We have already checked that columns match, so we can do
1271 # fast location-based lookups
1272 lcol = left._ixs(i, axis=1)
1273 rcol = right._ixs(i, axis=1)
1274
1275 # GH #38183
1276 # use check_index=False, because we do not want to run
1277 # assert_index_equal for each column,
1278 # as we already checked it for the whole dataframe before.
1279 assert_series_equal(
1280 lcol,
1281 rcol,
1282 check_dtype=check_dtype,
1283 check_index_type=check_index_type,
1284 check_exact=check_exact,
1285 check_names=check_names,
1286 check_datetimelike_compat=check_datetimelike_compat,
1287 check_categorical=check_categorical,
1288 check_freq=check_freq,
1289 obj=f'{obj}.iloc[:, {i}] (column name="{col}")',
1290 rtol=rtol,
1291 atol=atol,
1292 check_index=False,
1293 check_flags=False,
1294 )
1295
1296
1297def assert_equal(left, right, **kwargs) -> None:
1298 """
1299 Wrapper for tm.assert_*_equal to dispatch to the appropriate test function.
1300
1301 Parameters
1302 ----------
1303 left, right : Index, Series, DataFrame, ExtensionArray, or np.ndarray
1304 The two items to be compared.
1305 **kwargs
1306 All keyword arguments are passed through to the underlying assert method.
1307 """
1308 __tracebackhide__ = True
1309
1310 if isinstance(left, Index):
1311 assert_index_equal(left, right, **kwargs)
1312 if isinstance(left, (DatetimeIndex, TimedeltaIndex)):
1313 assert left.freq == right.freq, (left.freq, right.freq)
1314 elif isinstance(left, Series):
1315 assert_series_equal(left, right, **kwargs)
1316 elif isinstance(left, DataFrame):
1317 assert_frame_equal(left, right, **kwargs)
1318 elif isinstance(left, IntervalArray):
1319 assert_interval_array_equal(left, right, **kwargs)
1320 elif isinstance(left, PeriodArray):
1321 assert_period_array_equal(left, right, **kwargs)
1322 elif isinstance(left, DatetimeArray):
1323 assert_datetime_array_equal(left, right, **kwargs)
1324 elif isinstance(left, TimedeltaArray):
1325 assert_timedelta_array_equal(left, right, **kwargs)
1326 elif isinstance(left, ExtensionArray):
1327 assert_extension_array_equal(left, right, **kwargs)
1328 elif isinstance(left, np.ndarray):
1329 assert_numpy_array_equal(left, right, **kwargs)
1330 elif isinstance(left, str):
1331 assert kwargs == {}
1332 assert left == right
1333 else:
1334 assert kwargs == {}
1335 assert_almost_equal(left, right)
1336
1337
1338def assert_sp_array_equal(left, right) -> None:
1339 """
1340 Check that the left and right SparseArray are equal.
1341
1342 Parameters
1343 ----------
1344 left : SparseArray
1345 right : SparseArray
1346 """
1347 _check_isinstance(left, right, pd.arrays.SparseArray)
1348
1349 assert_numpy_array_equal(left.sp_values, right.sp_values)
1350
1351 # SparseIndex comparison
1352 assert isinstance(left.sp_index, SparseIndex)
1353 assert isinstance(right.sp_index, SparseIndex)
1354
1355 left_index = left.sp_index
1356 right_index = right.sp_index
1357
1358 if not left_index.equals(right_index):
1359 raise_assert_detail(
1360 "SparseArray.index", "index are not equal", left_index, right_index
1361 )
1362 else:
1363 # Just ensure a
1364 pass
1365
1366 assert_attr_equal("fill_value", left, right)
1367 assert_attr_equal("dtype", left, right)
1368 assert_numpy_array_equal(left.to_dense(), right.to_dense())
1369
1370
1371def assert_contains_all(iterable, dic) -> None:
1372 for k in iterable:
1373 assert k in dic, f"Did not contain item: {repr(k)}"
1374
1375
1376def assert_copy(iter1, iter2, **eql_kwargs) -> None:
1377 """
1378 iter1, iter2: iterables that produce elements
1379 comparable with assert_almost_equal
1380
1381 Checks that the elements are equal, but not
1382 the same object. (Does not check that items
1383 in sequences are also not the same object)
1384 """
1385 for elem1, elem2 in zip(iter1, iter2):
1386 assert_almost_equal(elem1, elem2, **eql_kwargs)
1387 msg = (
1388 f"Expected object {repr(type(elem1))} and object {repr(type(elem2))} to be "
1389 "different objects, but they were the same object."
1390 )
1391 assert elem1 is not elem2, msg
1392
1393
1394def is_extension_array_dtype_and_needs_i8_conversion(
1395 left_dtype: DtypeObj, right_dtype: DtypeObj
1396) -> bool:
1397 """
1398 Checks that we have the combination of an ExtensionArraydtype and
1399 a dtype that should be converted to int64
1400
1401 Returns
1402 -------
1403 bool
1404
1405 Related to issue #37609
1406 """
1407 return isinstance(left_dtype, ExtensionDtype) and needs_i8_conversion(right_dtype)
1408
1409
1410def assert_indexing_slices_equivalent(ser: Series, l_slc: slice, i_slc: slice) -> None:
1411 """
1412 Check that ser.iloc[i_slc] matches ser.loc[l_slc] and, if applicable,
1413 ser[l_slc].
1414 """
1415 expected = ser.iloc[i_slc]
1416
1417 assert_series_equal(ser.loc[l_slc], expected)
1418
1419 if not is_integer_dtype(ser.index):
1420 # For integer indices, .loc and plain getitem are position-based.
1421 assert_series_equal(ser[l_slc], expected)
1422
1423
1424def assert_metadata_equivalent(
1425 left: DataFrame | Series, right: DataFrame | Series | None = None
1426) -> None:
1427 """
1428 Check that ._metadata attributes are equivalent.
1429 """
1430 for attr in left._metadata:
1431 val = getattr(left, attr, None)
1432 if right is None:
1433 assert val is None
1434 else:
1435 assert val == getattr(right, attr, None)