Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pandas/core/array_algos/replace.py: 23%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

48 statements  

1""" 

2Methods used by Block.replace and related methods. 

3""" 

4from __future__ import annotations 

5 

6import operator 

7import re 

8from typing import ( 

9 Any, 

10 Pattern, 

11) 

12 

13import numpy as np 

14 

15from pandas._typing import ( 

16 ArrayLike, 

17 Scalar, 

18 npt, 

19) 

20 

21from pandas.core.dtypes.common import ( 

22 is_re, 

23 is_re_compilable, 

24 is_scalar, 

25) 

26from pandas.core.dtypes.missing import isna 

27 

28 

29def should_use_regex(regex: bool, to_replace: Any) -> bool: 

30 """ 

31 Decide whether to treat `to_replace` as a regular expression. 

32 """ 

33 if is_re(to_replace): 

34 regex = True 

35 

36 regex = regex and is_re_compilable(to_replace) 

37 

38 # Don't use regex if the pattern is empty. 

39 regex = regex and re.compile(to_replace).pattern != "" 

40 return regex 

41 

42 

43def compare_or_regex_search( 

44 a: ArrayLike, b: Scalar | Pattern, regex: bool, mask: npt.NDArray[np.bool_] 

45) -> ArrayLike: 

46 """ 

47 Compare two array-like inputs of the same shape or two scalar values 

48 

49 Calls operator.eq or re.search, depending on regex argument. If regex is 

50 True, perform an element-wise regex matching. 

51 

52 Parameters 

53 ---------- 

54 a : array-like 

55 b : scalar or regex pattern 

56 regex : bool 

57 mask : np.ndarray[bool] 

58 

59 Returns 

60 ------- 

61 mask : array-like of bool 

62 """ 

63 if isna(b): 

64 return ~mask 

65 

66 def _check_comparison_types( 

67 result: ArrayLike | bool, a: ArrayLike, b: Scalar | Pattern 

68 ): 

69 """ 

70 Raises an error if the two arrays (a,b) cannot be compared. 

71 Otherwise, returns the comparison result as expected. 

72 """ 

73 if is_scalar(result) and isinstance(a, np.ndarray): 

74 type_names = [type(a).__name__, type(b).__name__] 

75 

76 type_names[0] = f"ndarray(dtype={a.dtype})" 

77 

78 raise TypeError( 

79 f"Cannot compare types {repr(type_names[0])} and {repr(type_names[1])}" 

80 ) 

81 

82 if not regex or not should_use_regex(regex, b): 

83 # TODO: should use missing.mask_missing? 

84 op = lambda x: operator.eq(x, b) 

85 else: 

86 op = np.vectorize( 

87 lambda x: bool(re.search(b, x)) 

88 if isinstance(x, str) and isinstance(b, (str, Pattern)) 

89 else False 

90 ) 

91 

92 # GH#32621 use mask to avoid comparing to NAs 

93 if isinstance(a, np.ndarray): 

94 a = a[mask] 

95 

96 result = op(a) 

97 

98 if isinstance(result, np.ndarray) and mask is not None: 

99 # The shape of the mask can differ to that of the result 

100 # since we may compare only a subset of a's or b's elements 

101 tmp = np.zeros(mask.shape, dtype=np.bool_) 

102 np.place(tmp, mask, result) 

103 result = tmp 

104 

105 _check_comparison_types(result, a, b) 

106 return result 

107 

108 

109def replace_regex( 

110 values: ArrayLike, rx: re.Pattern, value, mask: npt.NDArray[np.bool_] | None 

111) -> None: 

112 """ 

113 Parameters 

114 ---------- 

115 values : ArrayLike 

116 Object dtype. 

117 rx : re.Pattern 

118 value : Any 

119 mask : np.ndarray[bool], optional 

120 

121 Notes 

122 ----- 

123 Alters values in-place. 

124 """ 

125 

126 # deal with replacing values with objects (strings) that match but 

127 # whose replacement is not a string (numeric, nan, object) 

128 if isna(value) or not isinstance(value, str): 

129 

130 def re_replacer(s): 

131 if is_re(rx) and isinstance(s, str): 

132 return value if rx.search(s) is not None else s 

133 else: 

134 return s 

135 

136 else: 

137 # value is guaranteed to be a string here, s can be either a string 

138 # or null if it's null it gets returned 

139 def re_replacer(s): 

140 if is_re(rx) and isinstance(s, str): 

141 return rx.sub(value, s) 

142 else: 

143 return s 

144 

145 f = np.vectorize(re_replacer, otypes=[np.object_]) 

146 

147 if mask is None: 

148 values[:] = f(values) 

149 else: 

150 values[mask] = f(values[mask])