1"""
2Methods used by Block.replace and related methods.
3"""
4from __future__ import annotations
5
6import operator
7import re
8from typing import (
9 Any,
10 Pattern,
11)
12
13import numpy as np
14
15from pandas._typing import (
16 ArrayLike,
17 Scalar,
18 npt,
19)
20
21from pandas.core.dtypes.common import (
22 is_re,
23 is_re_compilable,
24 is_scalar,
25)
26from pandas.core.dtypes.missing import isna
27
28
29def should_use_regex(regex: bool, to_replace: Any) -> bool:
30 """
31 Decide whether to treat `to_replace` as a regular expression.
32 """
33 if is_re(to_replace):
34 regex = True
35
36 regex = regex and is_re_compilable(to_replace)
37
38 # Don't use regex if the pattern is empty.
39 regex = regex and re.compile(to_replace).pattern != ""
40 return regex
41
42
43def compare_or_regex_search(
44 a: ArrayLike, b: Scalar | Pattern, regex: bool, mask: npt.NDArray[np.bool_]
45) -> ArrayLike:
46 """
47 Compare two array-like inputs of the same shape or two scalar values
48
49 Calls operator.eq or re.search, depending on regex argument. If regex is
50 True, perform an element-wise regex matching.
51
52 Parameters
53 ----------
54 a : array-like
55 b : scalar or regex pattern
56 regex : bool
57 mask : np.ndarray[bool]
58
59 Returns
60 -------
61 mask : array-like of bool
62 """
63 if isna(b):
64 return ~mask
65
66 def _check_comparison_types(
67 result: ArrayLike | bool, a: ArrayLike, b: Scalar | Pattern
68 ):
69 """
70 Raises an error if the two arrays (a,b) cannot be compared.
71 Otherwise, returns the comparison result as expected.
72 """
73 if is_scalar(result) and isinstance(a, np.ndarray):
74 type_names = [type(a).__name__, type(b).__name__]
75
76 type_names[0] = f"ndarray(dtype={a.dtype})"
77
78 raise TypeError(
79 f"Cannot compare types {repr(type_names[0])} and {repr(type_names[1])}"
80 )
81
82 if not regex or not should_use_regex(regex, b):
83 # TODO: should use missing.mask_missing?
84 op = lambda x: operator.eq(x, b)
85 else:
86 op = np.vectorize(
87 lambda x: bool(re.search(b, x))
88 if isinstance(x, str) and isinstance(b, (str, Pattern))
89 else False
90 )
91
92 # GH#32621 use mask to avoid comparing to NAs
93 if isinstance(a, np.ndarray):
94 a = a[mask]
95
96 result = op(a)
97
98 if isinstance(result, np.ndarray) and mask is not None:
99 # The shape of the mask can differ to that of the result
100 # since we may compare only a subset of a's or b's elements
101 tmp = np.zeros(mask.shape, dtype=np.bool_)
102 np.place(tmp, mask, result)
103 result = tmp
104
105 _check_comparison_types(result, a, b)
106 return result
107
108
109def replace_regex(
110 values: ArrayLike, rx: re.Pattern, value, mask: npt.NDArray[np.bool_] | None
111) -> None:
112 """
113 Parameters
114 ----------
115 values : ArrayLike
116 Object dtype.
117 rx : re.Pattern
118 value : Any
119 mask : np.ndarray[bool], optional
120
121 Notes
122 -----
123 Alters values in-place.
124 """
125
126 # deal with replacing values with objects (strings) that match but
127 # whose replacement is not a string (numeric, nan, object)
128 if isna(value) or not isinstance(value, str):
129
130 def re_replacer(s):
131 if is_re(rx) and isinstance(s, str):
132 return value if rx.search(s) is not None else s
133 else:
134 return s
135
136 else:
137 # value is guaranteed to be a string here, s can be either a string
138 # or null if it's null it gets returned
139 def re_replacer(s):
140 if is_re(rx) and isinstance(s, str):
141 return rx.sub(value, s)
142 else:
143 return s
144
145 f = np.vectorize(re_replacer, otypes=[np.object_])
146
147 if mask is None:
148 values[:] = f(values)
149 else:
150 values[mask] = f(values[mask])