Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/google/cloud/bigquery/_pyarrow_helpers.py: 62%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

34 statements  

1# Copyright 2023 Google LLC 

2# 

3# Licensed under the Apache License, Version 2.0 (the "License"); 

4# you may not use this file except in compliance with the License. 

5# You may obtain a copy of the License at 

6# 

7# http://www.apache.org/licenses/LICENSE-2.0 

8# 

9# Unless required by applicable law or agreed to in writing, software 

10# distributed under the License is distributed on an "AS IS" BASIS, 

11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 

12# See the License for the specific language governing permissions and 

13# limitations under the License. 

14 

15"""Shared helper functions for connecting BigQuery and pyarrow. 

16 

17NOTE: This module is DEPRECATED. Please make updates in the pandas-gbq package, 

18instead. See: go/pandas-gbq-and-bigframes-redundancy, 

19https://github.com/googleapis/python-bigquery-pandas/blob/main/pandas_gbq/schema/bigquery_to_pyarrow.py 

20and 

21https://github.com/googleapis/python-bigquery-pandas/blob/main/pandas_gbq/schema/pyarrow_to_bigquery.py 

22""" 

23 

24from typing import Any 

25 

26try: 

27 import pyarrow # type: ignore 

28except ImportError: 

29 pyarrow = None 

30 

31try: 

32 import db_dtypes # type: ignore 

33 

34 db_dtypes_import_exception = None 

35except ImportError as exc: 

36 db_dtypes = None 

37 db_dtypes_import_exception = exc 

38 

39 

40def pyarrow_datetime(): 

41 return pyarrow.timestamp("us", tz=None) 

42 

43 

44def pyarrow_numeric(): 

45 return pyarrow.decimal128(38, 9) 

46 

47 

48def pyarrow_bignumeric(): 

49 # 77th digit is partial. 

50 # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#decimal_types 

51 return pyarrow.decimal256(76, 38) 

52 

53 

54def pyarrow_time(): 

55 return pyarrow.time64("us") 

56 

57 

58def pyarrow_timestamp(): 

59 return pyarrow.timestamp("us", tz="UTC") 

60 

61 

62_BQ_TO_ARROW_SCALARS = {} 

63_ARROW_SCALAR_IDS_TO_BQ = {} 

64 

65if pyarrow: 

66 # This dictionary is duplicated in bigquery_storage/test/unite/test_reader.py 

67 # When modifying it be sure to update it there as well. 

68 # Note(todo!!): type "BIGNUMERIC"'s matching pyarrow type is added in _pandas_helpers.py 

69 _BQ_TO_ARROW_SCALARS = { 

70 "BOOL": pyarrow.bool_, 

71 "BOOLEAN": pyarrow.bool_, 

72 "BYTES": pyarrow.binary, 

73 "DATE": pyarrow.date32, 

74 "DATETIME": pyarrow_datetime, 

75 "FLOAT": pyarrow.float64, 

76 "FLOAT64": pyarrow.float64, 

77 "GEOGRAPHY": pyarrow.string, 

78 "INT64": pyarrow.int64, 

79 "INTEGER": pyarrow.int64, 

80 # Normally, we'd prefer JSON type built-in to pyarrow (added in 19.0.0), 

81 # but we'd like this to map as closely to the BQ Storage API as 

82 # possible, which uses the string() dtype, as JSON support in Arrow 

83 # predates JSON support in BigQuery by several years. 

84 "JSON": pyarrow.string, 

85 "NUMERIC": pyarrow_numeric, 

86 "STRING": pyarrow.string, 

87 "TIME": pyarrow_time, 

88 "TIMESTAMP": pyarrow_timestamp, 

89 } 

90 

91 # DEPRECATED: update pandas_gbq.schema.pyarrow_to_bigquery, instead. 

92 _ARROW_SCALAR_IDS_TO_BQ = { 

93 # https://arrow.apache.org/docs/python/api/datatypes.html#type-classes 

94 pyarrow.bool_().id: "BOOL", 

95 pyarrow.int8().id: "INT64", 

96 pyarrow.int16().id: "INT64", 

97 pyarrow.int32().id: "INT64", 

98 pyarrow.int64().id: "INT64", 

99 pyarrow.uint8().id: "INT64", 

100 pyarrow.uint16().id: "INT64", 

101 pyarrow.uint32().id: "INT64", 

102 pyarrow.uint64().id: "INT64", 

103 pyarrow.float16().id: "FLOAT64", 

104 pyarrow.float32().id: "FLOAT64", 

105 pyarrow.float64().id: "FLOAT64", 

106 pyarrow.time32("ms").id: "TIME", 

107 pyarrow.time64("ns").id: "TIME", 

108 pyarrow.timestamp("ns").id: "TIMESTAMP", 

109 pyarrow.date32().id: "DATE", 

110 pyarrow.date64().id: "DATETIME", # because millisecond resolution 

111 pyarrow.binary().id: "BYTES", 

112 pyarrow.string().id: "STRING", # also alias for pyarrow.utf8() 

113 pyarrow.large_string().id: "STRING", 

114 # The exact scale and precision don't matter, see below. 

115 pyarrow.decimal128(38, scale=9).id: "NUMERIC", 

116 # NOTE: all extension types (e.g. json_, uuid, db_dtypes.JSONArrowType) 

117 # have the same id (31 as of version 19.0.1), so these should not be 

118 # matched by id. 

119 } 

120 

121 _BQ_TO_ARROW_SCALARS["BIGNUMERIC"] = pyarrow_bignumeric 

122 # The exact decimal's scale and precision are not important, as only 

123 # the type ID matters, and it's the same for all decimal256 instances. 

124 _ARROW_SCALAR_IDS_TO_BQ[pyarrow.decimal256(76, scale=38).id] = "BIGNUMERIC" 

125 

126 

127def bq_to_arrow_scalars(bq_scalar: str): 

128 """ 

129 DEPRECATED: update pandas_gbq.schema.bigquery_to_pyarrow, instead, which is 

130 to be added in https://github.com/googleapis/python-bigquery-pandas/pull/893. 

131 

132 Returns: 

133 The Arrow scalar type that the input BigQuery scalar type maps to. 

134 If it cannot find the BigQuery scalar, return None. 

135 """ 

136 return _BQ_TO_ARROW_SCALARS.get(bq_scalar) 

137 

138 

139def arrow_scalar_ids_to_bq(arrow_scalar: Any): 

140 """ 

141 DEPRECATED: update pandas_gbq.schema.pyarrow_to_bigquery, instead. 

142 

143 Returns: 

144 The BigQuery scalar type that the input arrow scalar type maps to. 

145 If it cannot find the arrow scalar, return None. 

146 """ 

147 return _ARROW_SCALAR_IDS_TO_BQ.get(arrow_scalar)