1# Copyright 2015 Google LLC
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
15"""Classes for extract (export) jobs."""
16
17import typing
18
19from google.cloud.bigquery import _helpers
20from google.cloud.bigquery.model import ModelReference
21from google.cloud.bigquery.table import Table
22from google.cloud.bigquery.table import TableListItem
23from google.cloud.bigquery.table import TableReference
24from google.cloud.bigquery.job.base import _AsyncJob
25from google.cloud.bigquery.job.base import _JobConfig
26from google.cloud.bigquery.job.base import _JobReference
27
28
29class ExtractJobConfig(_JobConfig):
30 """Configuration options for extract jobs.
31
32 All properties in this class are optional. Values which are :data:`None` ->
33 server defaults. Set properties on the constructed configuration by using
34 the property name as the name of a keyword argument.
35 """
36
37 def __init__(self, **kwargs):
38 super(ExtractJobConfig, self).__init__("extract", **kwargs)
39
40 @property
41 def compression(self):
42 """google.cloud.bigquery.job.Compression: Compression type to use for
43 exported files.
44
45 See
46 https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationExtract.FIELDS.compression
47 """
48 return self._get_sub_prop("compression")
49
50 @compression.setter
51 def compression(self, value):
52 self._set_sub_prop("compression", value)
53
54 @property
55 def destination_format(self):
56 """google.cloud.bigquery.job.DestinationFormat: Exported file format.
57
58 See
59 https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationExtract.FIELDS.destination_format
60 """
61 return self._get_sub_prop("destinationFormat")
62
63 @destination_format.setter
64 def destination_format(self, value):
65 self._set_sub_prop("destinationFormat", value)
66
67 @property
68 def field_delimiter(self):
69 """str: Delimiter to use between fields in the exported data.
70
71 See
72 https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationExtract.FIELDS.field_delimiter
73 """
74 return self._get_sub_prop("fieldDelimiter")
75
76 @field_delimiter.setter
77 def field_delimiter(self, value):
78 self._set_sub_prop("fieldDelimiter", value)
79
80 @property
81 def print_header(self):
82 """bool: Print a header row in the exported data.
83
84 See
85 https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationExtract.FIELDS.print_header
86 """
87 return self._get_sub_prop("printHeader")
88
89 @print_header.setter
90 def print_header(self, value):
91 self._set_sub_prop("printHeader", value)
92
93 @property
94 def use_avro_logical_types(self):
95 """bool: For loads of Avro data, governs whether Avro logical types are
96 converted to their corresponding BigQuery types (e.g. TIMESTAMP) rather than
97 raw types (e.g. INTEGER).
98 """
99 return self._get_sub_prop("useAvroLogicalTypes")
100
101 @use_avro_logical_types.setter
102 def use_avro_logical_types(self, value):
103 self._set_sub_prop("useAvroLogicalTypes", bool(value))
104
105
106class ExtractJob(_AsyncJob):
107 """Asynchronous job: extract data from a table into Cloud Storage.
108
109 Args:
110 job_id (str): the job's ID.
111
112 source (Union[ \
113 google.cloud.bigquery.table.TableReference, \
114 google.cloud.bigquery.model.ModelReference \
115 ]):
116 Table or Model from which data is to be loaded or extracted.
117
118 destination_uris (List[str]):
119 URIs describing where the extracted data will be written in Cloud
120 Storage, using the format ``gs://<bucket_name>/<object_name_or_glob>``.
121
122 client (google.cloud.bigquery.client.Client):
123 A client which holds credentials and project configuration.
124
125 job_config (Optional[google.cloud.bigquery.job.ExtractJobConfig]):
126 Extra configuration options for the extract job.
127 """
128
129 _JOB_TYPE = "extract"
130 _CONFIG_CLASS = ExtractJobConfig
131
132 def __init__(self, job_id, source, destination_uris, client, job_config=None):
133 super(ExtractJob, self).__init__(job_id, client)
134
135 if job_config is not None:
136 self._properties["configuration"] = job_config._properties
137
138 if source:
139 source_ref = {"projectId": source.project, "datasetId": source.dataset_id}
140
141 if isinstance(source, (Table, TableListItem, TableReference)):
142 source_ref["tableId"] = source.table_id
143 source_key = "sourceTable"
144 else:
145 source_ref["modelId"] = source.model_id
146 source_key = "sourceModel"
147
148 _helpers._set_sub_prop(
149 self._properties, ["configuration", "extract", source_key], source_ref
150 )
151
152 if destination_uris:
153 _helpers._set_sub_prop(
154 self._properties,
155 ["configuration", "extract", "destinationUris"],
156 destination_uris,
157 )
158
159 @property
160 def configuration(self) -> ExtractJobConfig:
161 """The configuration for this extract job."""
162 return typing.cast(ExtractJobConfig, super().configuration)
163
164 @property
165 def source(self):
166 """Union[ \
167 google.cloud.bigquery.table.TableReference, \
168 google.cloud.bigquery.model.ModelReference \
169 ]: Table or Model from which data is to be loaded or extracted.
170 """
171 source_config = _helpers._get_sub_prop(
172 self._properties, ["configuration", "extract", "sourceTable"]
173 )
174 if source_config:
175 return TableReference.from_api_repr(source_config)
176 else:
177 source_config = _helpers._get_sub_prop(
178 self._properties, ["configuration", "extract", "sourceModel"]
179 )
180 return ModelReference.from_api_repr(source_config)
181
182 @property
183 def destination_uris(self):
184 """List[str]: URIs describing where the extracted data will be
185 written in Cloud Storage, using the format
186 ``gs://<bucket_name>/<object_name_or_glob>``.
187 """
188 return _helpers._get_sub_prop(
189 self._properties, ["configuration", "extract", "destinationUris"]
190 )
191
192 @property
193 def compression(self):
194 """See
195 :attr:`google.cloud.bigquery.job.ExtractJobConfig.compression`.
196 """
197 return self.configuration.compression
198
199 @property
200 def destination_format(self):
201 """See
202 :attr:`google.cloud.bigquery.job.ExtractJobConfig.destination_format`.
203 """
204 return self.configuration.destination_format
205
206 @property
207 def field_delimiter(self):
208 """See
209 :attr:`google.cloud.bigquery.job.ExtractJobConfig.field_delimiter`.
210 """
211 return self.configuration.field_delimiter
212
213 @property
214 def print_header(self):
215 """See
216 :attr:`google.cloud.bigquery.job.ExtractJobConfig.print_header`.
217 """
218 return self.configuration.print_header
219
220 @property
221 def destination_uri_file_counts(self):
222 """Return file counts from job statistics, if present.
223
224 See:
225 https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics4.FIELDS.destination_uri_file_counts
226
227 Returns:
228 List[int]:
229 A list of integer counts, each representing the number of files
230 per destination URI or URI pattern specified in the extract
231 configuration. These values will be in the same order as the URIs
232 specified in the 'destinationUris' field. Returns None if job is
233 not yet complete.
234 """
235 counts = self._job_statistics().get("destinationUriFileCounts")
236 if counts is not None:
237 return [int(count) for count in counts]
238 return None
239
240 def to_api_repr(self):
241 """Generate a resource for :meth:`_begin`."""
242 # Exclude statistics, if set.
243 return {
244 "jobReference": self._properties["jobReference"],
245 "configuration": self._properties["configuration"],
246 }
247
248 @classmethod
249 def from_api_repr(cls, resource: dict, client) -> "ExtractJob":
250 """Factory: construct a job given its API representation
251
252 .. note::
253
254 This method assumes that the project found in the resource matches
255 the client's project.
256
257 Args:
258 resource (Dict): dataset job representation returned from the API
259
260 client (google.cloud.bigquery.client.Client):
261 Client which holds credentials and project
262 configuration for the dataset.
263
264 Returns:
265 google.cloud.bigquery.job.ExtractJob: Job parsed from ``resource``.
266 """
267 cls._check_resource_config(resource)
268 job_ref = _JobReference._from_api_repr(resource["jobReference"])
269 job = cls(job_ref, None, None, client=client)
270 job._set_properties(resource)
271 return job