1"""
2Methods used by Block.replace and related methods.
3"""
4from __future__ import annotations
5
6import operator
7import re
8from re import Pattern
9from typing import (
10 TYPE_CHECKING,
11 Any,
12)
13
14import numpy as np
15
16from pandas.core.dtypes.common import (
17 is_bool,
18 is_re,
19 is_re_compilable,
20)
21from pandas.core.dtypes.missing import isna
22
23if TYPE_CHECKING:
24 from pandas._typing import (
25 ArrayLike,
26 Scalar,
27 npt,
28 )
29
30
31def should_use_regex(regex: bool, to_replace: Any) -> bool:
32 """
33 Decide whether to treat `to_replace` as a regular expression.
34 """
35 if is_re(to_replace):
36 regex = True
37
38 regex = regex and is_re_compilable(to_replace)
39
40 # Don't use regex if the pattern is empty.
41 regex = regex and re.compile(to_replace).pattern != ""
42 return regex
43
44
45def compare_or_regex_search(
46 a: ArrayLike, b: Scalar | Pattern, regex: bool, mask: npt.NDArray[np.bool_]
47) -> ArrayLike:
48 """
49 Compare two array-like inputs of the same shape or two scalar values
50
51 Calls operator.eq or re.search, depending on regex argument. If regex is
52 True, perform an element-wise regex matching.
53
54 Parameters
55 ----------
56 a : array-like
57 b : scalar or regex pattern
58 regex : bool
59 mask : np.ndarray[bool]
60
61 Returns
62 -------
63 mask : array-like of bool
64 """
65 if isna(b):
66 return ~mask
67
68 def _check_comparison_types(
69 result: ArrayLike | bool, a: ArrayLike, b: Scalar | Pattern
70 ):
71 """
72 Raises an error if the two arrays (a,b) cannot be compared.
73 Otherwise, returns the comparison result as expected.
74 """
75 if is_bool(result) and isinstance(a, np.ndarray):
76 type_names = [type(a).__name__, type(b).__name__]
77
78 type_names[0] = f"ndarray(dtype={a.dtype})"
79
80 raise TypeError(
81 f"Cannot compare types {repr(type_names[0])} and {repr(type_names[1])}"
82 )
83
84 if not regex or not should_use_regex(regex, b):
85 # TODO: should use missing.mask_missing?
86 op = lambda x: operator.eq(x, b)
87 else:
88 op = np.vectorize(
89 lambda x: bool(re.search(b, x))
90 if isinstance(x, str) and isinstance(b, (str, Pattern))
91 else False
92 )
93
94 # GH#32621 use mask to avoid comparing to NAs
95 if isinstance(a, np.ndarray):
96 a = a[mask]
97
98 result = op(a)
99
100 if isinstance(result, np.ndarray) and mask is not None:
101 # The shape of the mask can differ to that of the result
102 # since we may compare only a subset of a's or b's elements
103 tmp = np.zeros(mask.shape, dtype=np.bool_)
104 np.place(tmp, mask, result)
105 result = tmp
106
107 _check_comparison_types(result, a, b)
108 return result
109
110
111def replace_regex(
112 values: ArrayLike, rx: re.Pattern, value, mask: npt.NDArray[np.bool_] | None
113) -> None:
114 """
115 Parameters
116 ----------
117 values : ArrayLike
118 Object dtype.
119 rx : re.Pattern
120 value : Any
121 mask : np.ndarray[bool], optional
122
123 Notes
124 -----
125 Alters values in-place.
126 """
127
128 # deal with replacing values with objects (strings) that match but
129 # whose replacement is not a string (numeric, nan, object)
130 if isna(value) or not isinstance(value, str):
131
132 def re_replacer(s):
133 if is_re(rx) and isinstance(s, str):
134 return value if rx.search(s) is not None else s
135 else:
136 return s
137
138 else:
139 # value is guaranteed to be a string here, s can be either a string
140 # or null if it's null it gets returned
141 def re_replacer(s):
142 if is_re(rx) and isinstance(s, str):
143 return rx.sub(value, s)
144 else:
145 return s
146
147 f = np.vectorize(re_replacer, otypes=[np.object_])
148
149 if mask is None:
150 values[:] = f(values)
151 else:
152 values[mask] = f(values[mask])