Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pandas/core/interchange/utils.py: 75%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

44 statements  

1""" 

2Utility functions and objects for implementing the interchange API. 

3""" 

4 

5from __future__ import annotations 

6 

7import re 

8import typing 

9 

10import numpy as np 

11 

12from pandas._typing import DtypeObj 

13 

14import pandas as pd 

15from pandas.api.types import is_datetime64_dtype 

16 

17 

18class ArrowCTypes: 

19 """ 

20 Enum for Apache Arrow C type format strings. 

21 

22 The Arrow C data interface: 

23 https://arrow.apache.org/docs/format/CDataInterface.html#data-type-description-format-strings 

24 """ 

25 

26 NULL = "n" 

27 BOOL = "b" 

28 INT8 = "c" 

29 UINT8 = "C" 

30 INT16 = "s" 

31 UINT16 = "S" 

32 INT32 = "i" 

33 UINT32 = "I" 

34 INT64 = "l" 

35 UINT64 = "L" 

36 FLOAT16 = "e" 

37 FLOAT32 = "f" 

38 FLOAT64 = "g" 

39 STRING = "u" # utf-8 

40 LARGE_STRING = "U" # utf-8 

41 DATE32 = "tdD" 

42 DATE64 = "tdm" 

43 # Resoulution: 

44 # - seconds -> 's' 

45 # - milliseconds -> 'm' 

46 # - microseconds -> 'u' 

47 # - nanoseconds -> 'n' 

48 TIMESTAMP = "ts{resolution}:{tz}" 

49 TIME = "tt{resolution}" 

50 

51 

52class Endianness: 

53 """Enum indicating the byte-order of a data-type.""" 

54 

55 LITTLE = "<" 

56 BIG = ">" 

57 NATIVE = "=" 

58 NA = "|" 

59 

60 

61def dtype_to_arrow_c_fmt(dtype: DtypeObj) -> str: 

62 """ 

63 Represent pandas `dtype` as a format string in Apache Arrow C notation. 

64 

65 Parameters 

66 ---------- 

67 dtype : np.dtype 

68 Datatype of pandas DataFrame to represent. 

69 

70 Returns 

71 ------- 

72 str 

73 Format string in Apache Arrow C notation of the given `dtype`. 

74 """ 

75 if isinstance(dtype, pd.CategoricalDtype): 

76 return ArrowCTypes.INT64 

77 elif dtype == np.dtype("O"): 

78 return ArrowCTypes.STRING 

79 

80 format_str = getattr(ArrowCTypes, dtype.name.upper(), None) 

81 if format_str is not None: 

82 return format_str 

83 

84 if is_datetime64_dtype(dtype): 

85 # Selecting the first char of resolution string: 

86 # dtype.str -> '<M8[ns]' 

87 resolution = re.findall(r"\[(.*)\]", typing.cast(np.dtype, dtype).str)[0][:1] 

88 return ArrowCTypes.TIMESTAMP.format(resolution=resolution, tz="") 

89 

90 raise NotImplementedError( 

91 f"Conversion of {dtype} to Arrow C format string is not implemented." 

92 )