Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/pandas/core/arrays/_arrow_string_mixins.py: 31%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

55 statements  

1from __future__ import annotations 

2 

3from typing import Literal 

4 

5import numpy as np 

6 

7from pandas.compat import pa_version_under10p1 

8 

9if not pa_version_under10p1: 

10 import pyarrow as pa 

11 import pyarrow.compute as pc 

12 

13 

14class ArrowStringArrayMixin: 

15 _pa_array = None 

16 

17 def __init__(self, *args, **kwargs) -> None: 

18 raise NotImplementedError 

19 

20 def _str_pad( 

21 self, 

22 width: int, 

23 side: Literal["left", "right", "both"] = "left", 

24 fillchar: str = " ", 

25 ): 

26 if side == "left": 

27 pa_pad = pc.utf8_lpad 

28 elif side == "right": 

29 pa_pad = pc.utf8_rpad 

30 elif side == "both": 

31 pa_pad = pc.utf8_center 

32 else: 

33 raise ValueError( 

34 f"Invalid side: {side}. Side must be one of 'left', 'right', 'both'" 

35 ) 

36 return type(self)(pa_pad(self._pa_array, width=width, padding=fillchar)) 

37 

38 def _str_get(self, i: int): 

39 lengths = pc.utf8_length(self._pa_array) 

40 if i >= 0: 

41 out_of_bounds = pc.greater_equal(i, lengths) 

42 start = i 

43 stop = i + 1 

44 step = 1 

45 else: 

46 out_of_bounds = pc.greater(-i, lengths) 

47 start = i 

48 stop = i - 1 

49 step = -1 

50 not_out_of_bounds = pc.invert(out_of_bounds.fill_null(True)) 

51 selected = pc.utf8_slice_codeunits( 

52 self._pa_array, start=start, stop=stop, step=step 

53 ) 

54 null_value = pa.scalar( 

55 None, type=self._pa_array.type # type: ignore[attr-defined] 

56 ) 

57 result = pc.if_else(not_out_of_bounds, selected, null_value) 

58 return type(self)(result) 

59 

60 def _str_slice_replace( 

61 self, start: int | None = None, stop: int | None = None, repl: str | None = None 

62 ): 

63 if repl is None: 

64 repl = "" 

65 if start is None: 

66 start = 0 

67 if stop is None: 

68 stop = np.iinfo(np.int64).max 

69 return type(self)(pc.utf8_replace_slice(self._pa_array, start, stop, repl)) 

70 

71 def _str_capitalize(self): 

72 return type(self)(pc.utf8_capitalize(self._pa_array)) 

73 

74 def _str_title(self): 

75 return type(self)(pc.utf8_title(self._pa_array)) 

76 

77 def _str_swapcase(self): 

78 return type(self)(pc.utf8_swapcase(self._pa_array)) 

79 

80 def _str_removesuffix(self, suffix: str): 

81 ends_with = pc.ends_with(self._pa_array, pattern=suffix) 

82 removed = pc.utf8_slice_codeunits(self._pa_array, 0, stop=-len(suffix)) 

83 result = pc.if_else(ends_with, removed, self._pa_array) 

84 return type(self)(result)