1# Copyright 2015 Google LLC
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
15"""Classes for load jobs."""
16
17import typing
18from typing import FrozenSet, List, Iterable, Optional, Union
19
20from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration
21from google.cloud.bigquery.enums import SourceColumnMatch
22from google.cloud.bigquery.external_config import HivePartitioningOptions
23from google.cloud.bigquery.format_options import ParquetOptions
24from google.cloud.bigquery import _helpers
25from google.cloud.bigquery.schema import SchemaField
26from google.cloud.bigquery.schema import _to_schema_fields
27from google.cloud.bigquery.table import RangePartitioning
28from google.cloud.bigquery.table import TableReference
29from google.cloud.bigquery.table import TimePartitioning
30from google.cloud.bigquery.job.base import _AsyncJob
31from google.cloud.bigquery.job.base import _JobConfig
32from google.cloud.bigquery.job.base import _JobReference
33from google.cloud.bigquery.query import ConnectionProperty
34
35
36class ColumnNameCharacterMap:
37 """Indicates the character map used for column names.
38
39 https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#columnnamecharactermap
40 """
41
42 COLUMN_NAME_CHARACTER_MAP_UNSPECIFIED = "COLUMN_NAME_CHARACTER_MAP_UNSPECIFIED"
43 """Unspecified column name character map."""
44
45 STRICT = "STRICT"
46 """Support flexible column name and reject invalid column names."""
47
48 V1 = "V1"
49 """ Support alphanumeric + underscore characters and names must start with
50 a letter or underscore. Invalid column names will be normalized."""
51
52 V2 = "V2"
53 """Support flexible column name. Invalid column names will be normalized."""
54
55
56class LoadJobConfig(_JobConfig):
57 """Configuration options for load jobs.
58
59 Set properties on the constructed configuration by using the property name
60 as the name of a keyword argument. Values which are unset or :data:`None`
61 use the BigQuery REST API default values. See the `BigQuery REST API
62 reference documentation
63 <https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad>`_
64 for a list of default values.
65
66 Required options differ based on the
67 :attr:`~google.cloud.bigquery.job.LoadJobConfig.source_format` value.
68 For example, the BigQuery API's default value for
69 :attr:`~google.cloud.bigquery.job.LoadJobConfig.source_format` is ``"CSV"``.
70 When loading a CSV file, either
71 :attr:`~google.cloud.bigquery.job.LoadJobConfig.schema` must be set or
72 :attr:`~google.cloud.bigquery.job.LoadJobConfig.autodetect` must be set to
73 :data:`True`.
74 """
75
76 def __init__(self, **kwargs) -> None:
77 super(LoadJobConfig, self).__init__("load", **kwargs)
78
79 @property
80 def allow_jagged_rows(self):
81 """Optional[bool]: Allow missing trailing optional columns (CSV only).
82
83 See:
84 https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.allow_jagged_rows
85 """
86 return self._get_sub_prop("allowJaggedRows")
87
88 @allow_jagged_rows.setter
89 def allow_jagged_rows(self, value):
90 self._set_sub_prop("allowJaggedRows", value)
91
92 @property
93 def allow_quoted_newlines(self):
94 """Optional[bool]: Allow quoted data containing newline characters (CSV only).
95
96 See:
97 https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.allow_quoted_newlines
98 """
99 return self._get_sub_prop("allowQuotedNewlines")
100
101 @allow_quoted_newlines.setter
102 def allow_quoted_newlines(self, value):
103 self._set_sub_prop("allowQuotedNewlines", value)
104
105 @property
106 def autodetect(self):
107 """Optional[bool]: Automatically infer the schema from a sample of the data.
108
109 See:
110 https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.autodetect
111 """
112 return self._get_sub_prop("autodetect")
113
114 @autodetect.setter
115 def autodetect(self, value):
116 self._set_sub_prop("autodetect", value)
117
118 @property
119 def clustering_fields(self):
120 """Optional[List[str]]: Fields defining clustering for the table
121
122 (Defaults to :data:`None`).
123
124 Clustering fields are immutable after table creation.
125
126 .. note::
127
128 BigQuery supports clustering for both partitioned and
129 non-partitioned tables.
130 """
131 prop = self._get_sub_prop("clustering")
132 if prop is not None:
133 return list(prop.get("fields", ()))
134
135 @clustering_fields.setter
136 def clustering_fields(self, value):
137 """Optional[List[str]]: Fields defining clustering for the table
138
139 (Defaults to :data:`None`).
140 """
141 if value is not None:
142 self._set_sub_prop("clustering", {"fields": value})
143 else:
144 self._del_sub_prop("clustering")
145
146 @property
147 def connection_properties(self) -> List[ConnectionProperty]:
148 """Connection properties.
149
150 See
151 https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.connection_properties
152
153 .. versionadded:: 3.7.0
154 """
155 resource = self._get_sub_prop("connectionProperties", [])
156 return [ConnectionProperty.from_api_repr(prop) for prop in resource]
157
158 @connection_properties.setter
159 def connection_properties(self, value: Iterable[ConnectionProperty]):
160 self._set_sub_prop(
161 "connectionProperties",
162 [prop.to_api_repr() for prop in value],
163 )
164
165 @property
166 def create_disposition(self):
167 """Optional[google.cloud.bigquery.job.CreateDisposition]: Specifies behavior
168 for creating tables.
169
170 See:
171 https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.create_disposition
172 """
173 return self._get_sub_prop("createDisposition")
174
175 @create_disposition.setter
176 def create_disposition(self, value):
177 self._set_sub_prop("createDisposition", value)
178
179 @property
180 def create_session(self) -> Optional[bool]:
181 """[Preview] If :data:`True`, creates a new session, where
182 :attr:`~google.cloud.bigquery.job.LoadJob.session_info` will contain a
183 random server generated session id.
184
185 If :data:`False`, runs load job with an existing ``session_id`` passed in
186 :attr:`~google.cloud.bigquery.job.LoadJobConfig.connection_properties`,
187 otherwise runs load job in non-session mode.
188
189 See
190 https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.create_session
191
192 .. versionadded:: 3.7.0
193 """
194 return self._get_sub_prop("createSession")
195
196 @create_session.setter
197 def create_session(self, value: Optional[bool]):
198 self._set_sub_prop("createSession", value)
199
200 @property
201 def decimal_target_types(self) -> Optional[FrozenSet[str]]:
202 """Possible SQL data types to which the source decimal values are converted.
203
204 See:
205 https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.decimal_target_types
206
207 .. versionadded:: 2.21.0
208 """
209 prop = self._get_sub_prop("decimalTargetTypes")
210 if prop is not None:
211 prop = frozenset(prop)
212 return prop
213
214 @decimal_target_types.setter
215 def decimal_target_types(self, value: Optional[Iterable[str]]):
216 if value is not None:
217 self._set_sub_prop("decimalTargetTypes", list(value))
218 else:
219 self._del_sub_prop("decimalTargetTypes")
220
221 @property
222 def destination_encryption_configuration(self):
223 """Optional[google.cloud.bigquery.encryption_configuration.EncryptionConfiguration]: Custom
224 encryption configuration for the destination table.
225
226 Custom encryption configuration (e.g., Cloud KMS keys) or :data:`None`
227 if using default encryption.
228
229 See:
230 https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.destination_encryption_configuration
231 """
232 prop = self._get_sub_prop("destinationEncryptionConfiguration")
233 if prop is not None:
234 prop = EncryptionConfiguration.from_api_repr(prop)
235 return prop
236
237 @destination_encryption_configuration.setter
238 def destination_encryption_configuration(self, value):
239 api_repr = value
240 if value is not None:
241 api_repr = value.to_api_repr()
242 self._set_sub_prop("destinationEncryptionConfiguration", api_repr)
243 else:
244 self._del_sub_prop("destinationEncryptionConfiguration")
245
246 @property
247 def destination_table_description(self):
248 """Optional[str]: Description of the destination table.
249
250 See:
251 https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#DestinationTableProperties.FIELDS.description
252 """
253 prop = self._get_sub_prop("destinationTableProperties")
254 if prop is not None:
255 return prop["description"]
256
257 @destination_table_description.setter
258 def destination_table_description(self, value):
259 keys = [self._job_type, "destinationTableProperties", "description"]
260 if value is not None:
261 _helpers._set_sub_prop(self._properties, keys, value)
262 else:
263 _helpers._del_sub_prop(self._properties, keys)
264
265 @property
266 def destination_table_friendly_name(self):
267 """Optional[str]: Name given to destination table.
268
269 See:
270 https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#DestinationTableProperties.FIELDS.friendly_name
271 """
272 prop = self._get_sub_prop("destinationTableProperties")
273 if prop is not None:
274 return prop["friendlyName"]
275
276 @destination_table_friendly_name.setter
277 def destination_table_friendly_name(self, value):
278 keys = [self._job_type, "destinationTableProperties", "friendlyName"]
279 if value is not None:
280 _helpers._set_sub_prop(self._properties, keys, value)
281 else:
282 _helpers._del_sub_prop(self._properties, keys)
283
284 @property
285 def encoding(self):
286 """Optional[google.cloud.bigquery.job.Encoding]: The character encoding of the
287 data.
288
289 See:
290 https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.encoding
291 """
292 return self._get_sub_prop("encoding")
293
294 @encoding.setter
295 def encoding(self, value):
296 self._set_sub_prop("encoding", value)
297
298 @property
299 def field_delimiter(self):
300 """Optional[str]: The separator for fields in a CSV file.
301
302 See:
303 https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.field_delimiter
304 """
305 return self._get_sub_prop("fieldDelimiter")
306
307 @field_delimiter.setter
308 def field_delimiter(self, value):
309 self._set_sub_prop("fieldDelimiter", value)
310
311 @property
312 def hive_partitioning(self):
313 """Optional[:class:`~.external_config.HivePartitioningOptions`]: [Beta] When set, \
314 it configures hive partitioning support.
315
316 .. note::
317 **Experimental**. This feature is experimental and might change or
318 have limited support.
319
320 See:
321 https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.hive_partitioning_options
322 """
323 prop = self._get_sub_prop("hivePartitioningOptions")
324 if prop is None:
325 return None
326 return HivePartitioningOptions.from_api_repr(prop)
327
328 @hive_partitioning.setter
329 def hive_partitioning(self, value):
330 if value is not None:
331 if isinstance(value, HivePartitioningOptions):
332 value = value.to_api_repr()
333 else:
334 raise TypeError("Expected a HivePartitioningOptions instance or None.")
335
336 self._set_sub_prop("hivePartitioningOptions", value)
337
338 @property
339 def ignore_unknown_values(self):
340 """Optional[bool]: Ignore extra values not represented in the table schema.
341
342 See:
343 https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.ignore_unknown_values
344 """
345 return self._get_sub_prop("ignoreUnknownValues")
346
347 @ignore_unknown_values.setter
348 def ignore_unknown_values(self, value):
349 self._set_sub_prop("ignoreUnknownValues", value)
350
351 @property
352 def json_extension(self):
353 """Optional[str]: The extension to use for writing JSON data to BigQuery. Only supports GeoJSON currently.
354
355 See: https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.json_extension
356
357 """
358 return self._get_sub_prop("jsonExtension")
359
360 @json_extension.setter
361 def json_extension(self, value):
362 self._set_sub_prop("jsonExtension", value)
363
364 @property
365 def max_bad_records(self):
366 """Optional[int]: Number of invalid rows to ignore.
367
368 See:
369 https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.max_bad_records
370 """
371 return _helpers._int_or_none(self._get_sub_prop("maxBadRecords"))
372
373 @max_bad_records.setter
374 def max_bad_records(self, value):
375 self._set_sub_prop("maxBadRecords", value)
376
377 @property
378 def null_marker(self):
379 """Optional[str]: Represents a null value (CSV only).
380
381 See:
382 https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.null_marker
383 """
384 return self._get_sub_prop("nullMarker")
385
386 @null_marker.setter
387 def null_marker(self, value):
388 self._set_sub_prop("nullMarker", value)
389
390 @property
391 def null_markers(self) -> Optional[List[str]]:
392 """Optional[List[str]]: A list of strings represented as SQL NULL values in a CSV file.
393
394 .. note::
395 null_marker and null_markers can't be set at the same time.
396 If null_marker is set, null_markers has to be not set.
397 If null_markers is set, null_marker has to be not set.
398 If both null_marker and null_markers are set at the same time, a user error would be thrown.
399 Any strings listed in null_markers, including empty string would be interpreted as SQL NULL.
400 This applies to all column types.
401
402 See:
403 https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.null_markers
404 """
405 return self._get_sub_prop("nullMarkers")
406
407 @null_markers.setter
408 def null_markers(self, value: Optional[List[str]]):
409 self._set_sub_prop("nullMarkers", value)
410
411 @property
412 def preserve_ascii_control_characters(self):
413 """Optional[bool]: Preserves the embedded ASCII control characters when sourceFormat is set to CSV.
414
415 See:
416 https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.preserve_ascii_control_characters
417 """
418 return self._get_sub_prop("preserveAsciiControlCharacters")
419
420 @preserve_ascii_control_characters.setter
421 def preserve_ascii_control_characters(self, value):
422 self._set_sub_prop("preserveAsciiControlCharacters", bool(value))
423
424 @property
425 def projection_fields(self) -> Optional[List[str]]:
426 """Optional[List[str]]: If
427 :attr:`google.cloud.bigquery.job.LoadJobConfig.source_format` is set to
428 "DATASTORE_BACKUP", indicates which entity properties to load into
429 BigQuery from a Cloud Datastore backup.
430
431 Property names are case sensitive and must be top-level properties. If
432 no properties are specified, BigQuery loads all properties. If any
433 named property isn't found in the Cloud Datastore backup, an invalid
434 error is returned in the job result.
435
436 See:
437 https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.projection_fields
438 """
439 return self._get_sub_prop("projectionFields")
440
441 @projection_fields.setter
442 def projection_fields(self, value: Optional[List[str]]):
443 self._set_sub_prop("projectionFields", value)
444
445 @property
446 def quote_character(self):
447 """Optional[str]: Character used to quote data sections (CSV only).
448
449 See:
450 https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.quote
451 """
452 return self._get_sub_prop("quote")
453
454 @quote_character.setter
455 def quote_character(self, value):
456 self._set_sub_prop("quote", value)
457
458 @property
459 def range_partitioning(self):
460 """Optional[google.cloud.bigquery.table.RangePartitioning]:
461 Configures range-based partitioning for destination table.
462
463 .. note::
464 **Beta**. The integer range partitioning feature is in a
465 pre-release state and might change or have limited support.
466
467 Only specify at most one of
468 :attr:`~google.cloud.bigquery.job.LoadJobConfig.time_partitioning` or
469 :attr:`~google.cloud.bigquery.job.LoadJobConfig.range_partitioning`.
470
471 Raises:
472 ValueError:
473 If the value is not
474 :class:`~google.cloud.bigquery.table.RangePartitioning` or
475 :data:`None`.
476 """
477 resource = self._get_sub_prop("rangePartitioning")
478 if resource is not None:
479 return RangePartitioning(_properties=resource)
480
481 @range_partitioning.setter
482 def range_partitioning(self, value):
483 resource = value
484 if isinstance(value, RangePartitioning):
485 resource = value._properties
486 elif value is not None:
487 raise ValueError(
488 "Expected value to be RangePartitioning or None, got {}.".format(value)
489 )
490 self._set_sub_prop("rangePartitioning", resource)
491
492 @property
493 def reference_file_schema_uri(self):
494 """Optional[str]:
495 When creating an external table, the user can provide a reference file with the
496 table schema. This is enabled for the following formats:
497
498 AVRO, PARQUET, ORC
499 """
500 return self._get_sub_prop("referenceFileSchemaUri")
501
502 @reference_file_schema_uri.setter
503 def reference_file_schema_uri(self, value):
504 return self._set_sub_prop("referenceFileSchemaUri", value)
505
506 @property
507 def schema(self):
508 """Optional[Sequence[Union[ \
509 :class:`~google.cloud.bigquery.schema.SchemaField`, \
510 Mapping[str, Any] \
511 ]]]: Schema of the destination table.
512
513 See:
514 https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.schema
515 """
516 schema = _helpers._get_sub_prop(self._properties, ["load", "schema", "fields"])
517 if schema is None:
518 return
519 return [SchemaField.from_api_repr(field) for field in schema]
520
521 @schema.setter
522 def schema(self, value):
523 if value is None:
524 self._del_sub_prop("schema")
525 return
526
527 value = _to_schema_fields(value)
528
529 _helpers._set_sub_prop(
530 self._properties,
531 ["load", "schema", "fields"],
532 [field.to_api_repr() for field in value],
533 )
534
535 @property
536 def schema_update_options(self):
537 """Optional[List[google.cloud.bigquery.job.SchemaUpdateOption]]: Specifies
538 updates to the destination table schema to allow as a side effect of
539 the load job.
540 """
541 return self._get_sub_prop("schemaUpdateOptions")
542
543 @schema_update_options.setter
544 def schema_update_options(self, values):
545 self._set_sub_prop("schemaUpdateOptions", values)
546
547 @property
548 def skip_leading_rows(self):
549 """Optional[int]: Number of rows to skip when reading data (CSV only).
550
551 See:
552 https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.skip_leading_rows
553 """
554 return _helpers._int_or_none(self._get_sub_prop("skipLeadingRows"))
555
556 @skip_leading_rows.setter
557 def skip_leading_rows(self, value):
558 self._set_sub_prop("skipLeadingRows", str(value))
559
560 @property
561 def source_format(self):
562 """Optional[google.cloud.bigquery.job.SourceFormat]: File format of the data.
563
564 See:
565 https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.source_format
566 """
567 return self._get_sub_prop("sourceFormat")
568
569 @source_format.setter
570 def source_format(self, value):
571 self._set_sub_prop("sourceFormat", value)
572
573 @property
574 def source_column_match(self) -> Optional[SourceColumnMatch]:
575 """Optional[google.cloud.bigquery.enums.SourceColumnMatch]: Controls the
576 strategy used to match loaded columns to the schema. If not set, a sensible
577 default is chosen based on how the schema is provided. If autodetect is
578 used, then columns are matched by name. Otherwise, columns are matched by
579 position. This is done to keep the behavior backward-compatible.
580
581 Acceptable values are:
582
583 SOURCE_COLUMN_MATCH_UNSPECIFIED: Unspecified column name match option.
584 POSITION: matches by position. This assumes that the columns are ordered
585 the same way as the schema.
586 NAME: matches by name. This reads the header row as column names and
587 reorders columns to match the field names in the schema.
588
589 See:
590
591 https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.source_column_match
592 """
593 value = self._get_sub_prop("sourceColumnMatch")
594 return SourceColumnMatch(value) if value is not None else None
595
596 @source_column_match.setter
597 def source_column_match(self, value: Union[SourceColumnMatch, str, None]):
598 if value is not None and not isinstance(value, (SourceColumnMatch, str)):
599 raise TypeError(
600 "value must be a google.cloud.bigquery.enums.SourceColumnMatch, str, or None"
601 )
602 if isinstance(value, SourceColumnMatch):
603 value = value.value
604 self._set_sub_prop("sourceColumnMatch", value if value else None)
605
606 @property
607 def date_format(self) -> Optional[str]:
608 """Optional[str]: Date format used for parsing DATE values.
609
610 See:
611 https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.date_format
612 """
613 return self._get_sub_prop("dateFormat")
614
615 @date_format.setter
616 def date_format(self, value: Optional[str]):
617 self._set_sub_prop("dateFormat", value)
618
619 @property
620 def datetime_format(self) -> Optional[str]:
621 """Optional[str]: Date format used for parsing DATETIME values.
622
623 See:
624 https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.datetime_format
625 """
626 return self._get_sub_prop("datetimeFormat")
627
628 @datetime_format.setter
629 def datetime_format(self, value: Optional[str]):
630 self._set_sub_prop("datetimeFormat", value)
631
632 @property
633 def time_zone(self) -> Optional[str]:
634 """Optional[str]: Default time zone that will apply when parsing timestamp
635 values that have no specific time zone.
636
637 See:
638 https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.time_zone
639 """
640 return self._get_sub_prop("timeZone")
641
642 @time_zone.setter
643 def time_zone(self, value: Optional[str]):
644 self._set_sub_prop("timeZone", value)
645
646 @property
647 def time_format(self) -> Optional[str]:
648 """Optional[str]: Date format used for parsing TIME values.
649
650 See:
651 https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.time_format
652 """
653 return self._get_sub_prop("timeFormat")
654
655 @time_format.setter
656 def time_format(self, value: Optional[str]):
657 self._set_sub_prop("timeFormat", value)
658
659 @property
660 def timestamp_format(self) -> Optional[str]:
661 """Optional[str]: Date format used for parsing TIMESTAMP values.
662
663 See:
664 https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.timestamp_format
665 """
666 return self._get_sub_prop("timestampFormat")
667
668 @timestamp_format.setter
669 def timestamp_format(self, value: Optional[str]):
670 self._set_sub_prop("timestampFormat", value)
671
672 @property
673 def time_partitioning(self):
674 """Optional[google.cloud.bigquery.table.TimePartitioning]: Specifies time-based
675 partitioning for the destination table.
676
677 Only specify at most one of
678 :attr:`~google.cloud.bigquery.job.LoadJobConfig.time_partitioning` or
679 :attr:`~google.cloud.bigquery.job.LoadJobConfig.range_partitioning`.
680 """
681 prop = self._get_sub_prop("timePartitioning")
682 if prop is not None:
683 prop = TimePartitioning.from_api_repr(prop)
684 return prop
685
686 @time_partitioning.setter
687 def time_partitioning(self, value):
688 api_repr = value
689 if value is not None:
690 api_repr = value.to_api_repr()
691 self._set_sub_prop("timePartitioning", api_repr)
692 else:
693 self._del_sub_prop("timePartitioning")
694
695 @property
696 def use_avro_logical_types(self):
697 """Optional[bool]: For loads of Avro data, governs whether Avro logical types are
698 converted to their corresponding BigQuery types (e.g. TIMESTAMP) rather than
699 raw types (e.g. INTEGER).
700 """
701 return self._get_sub_prop("useAvroLogicalTypes")
702
703 @use_avro_logical_types.setter
704 def use_avro_logical_types(self, value):
705 self._set_sub_prop("useAvroLogicalTypes", bool(value))
706
707 @property
708 def write_disposition(self):
709 """Optional[google.cloud.bigquery.job.WriteDisposition]: Action that occurs if
710 the destination table already exists.
711
712 See:
713 https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.write_disposition
714 """
715 return self._get_sub_prop("writeDisposition")
716
717 @write_disposition.setter
718 def write_disposition(self, value):
719 self._set_sub_prop("writeDisposition", value)
720
721 @property
722 def parquet_options(self):
723 """Optional[google.cloud.bigquery.format_options.ParquetOptions]: Additional
724 properties to set if ``sourceFormat`` is set to PARQUET.
725
726 See:
727 https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.parquet_options
728 """
729 prop = self._get_sub_prop("parquetOptions")
730 if prop is not None:
731 prop = ParquetOptions.from_api_repr(prop)
732 return prop
733
734 @parquet_options.setter
735 def parquet_options(self, value):
736 if value is not None:
737 self._set_sub_prop("parquetOptions", value.to_api_repr())
738 else:
739 self._del_sub_prop("parquetOptions")
740
741 @property
742 def column_name_character_map(self) -> str:
743 """Optional[google.cloud.bigquery.job.ColumnNameCharacterMap]:
744 Character map supported for column names in CSV/Parquet loads. Defaults
745 to STRICT and can be overridden by Project Config Service. Using this
746 option with unsupported load formats will result in an error.
747
748 See
749 https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.column_name_character_map
750 """
751 return self._get_sub_prop(
752 "columnNameCharacterMap",
753 ColumnNameCharacterMap.COLUMN_NAME_CHARACTER_MAP_UNSPECIFIED,
754 )
755
756 @column_name_character_map.setter
757 def column_name_character_map(self, value: Optional[str]):
758 if value is None:
759 value = ColumnNameCharacterMap.COLUMN_NAME_CHARACTER_MAP_UNSPECIFIED
760 self._set_sub_prop("columnNameCharacterMap", value)
761
762 @property
763 def timestamp_target_precision(self) -> Optional[List[int]]:
764 """Optional[list[int]]: [Private Preview] Precisions (maximum number of
765 total digits in base 10) for seconds of TIMESTAMP types that are
766 allowed to the destination table for autodetection mode.
767
768 Available for the formats: CSV.
769
770 For the CSV Format, Possible values include:
771 None, [], or [6]: timestamp(6) for all auto detected TIMESTAMP
772 columns.
773 [6, 12]: timestamp(6) for all auto detected TIMESTAMP columns that
774 have less than 6 digits of subseconds. timestamp(12) for all auto
775 detected TIMESTAMP columns that have more than 6 digits of
776 subseconds.
777 [12]: timestamp(12) for all auto detected TIMESTAMP columns.
778
779 The order of the elements in this array is ignored. Inputs that have
780 higher precision than the highest target precision in this array will
781 be truncated.
782 """
783 return self._get_sub_prop("timestampTargetPrecision")
784
785 @timestamp_target_precision.setter
786 def timestamp_target_precision(self, value: Optional[List[int]]):
787 if value is not None:
788 self._set_sub_prop("timestampTargetPrecision", value)
789 else:
790 self._del_sub_prop("timestampTargetPrecision")
791
792
793class LoadJob(_AsyncJob):
794 """Asynchronous job for loading data into a table.
795
796 Can load from Google Cloud Storage URIs or from a file.
797
798 Args:
799 job_id (str): the job's ID
800
801 source_uris (Optional[Sequence[str]]):
802 URIs of one or more data files to be loaded. See
803 https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.source_uris
804 for supported URI formats. Pass None for jobs that load from a file.
805
806 destination (google.cloud.bigquery.table.TableReference): reference to table into which data is to be loaded.
807
808 client (google.cloud.bigquery.client.Client):
809 A client which holds credentials and project configuration
810 for the dataset (which requires a project).
811 """
812
813 _JOB_TYPE = "load"
814 _CONFIG_CLASS = LoadJobConfig
815
816 def __init__(self, job_id, source_uris, destination, client, job_config=None):
817 super(LoadJob, self).__init__(job_id, client)
818
819 if job_config is not None:
820 self._properties["configuration"] = job_config._properties
821
822 if source_uris is not None:
823 _helpers._set_sub_prop(
824 self._properties, ["configuration", "load", "sourceUris"], source_uris
825 )
826
827 if destination is not None:
828 _helpers._set_sub_prop(
829 self._properties,
830 ["configuration", "load", "destinationTable"],
831 destination.to_api_repr(),
832 )
833
834 @property
835 def configuration(self) -> LoadJobConfig:
836 """The configuration for this load job."""
837 return typing.cast(LoadJobConfig, super().configuration)
838
839 @property
840 def destination(self):
841 """google.cloud.bigquery.table.TableReference: table where loaded rows are written
842
843 See:
844 https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.destination_table
845 """
846 dest_config = _helpers._get_sub_prop(
847 self._properties, ["configuration", "load", "destinationTable"]
848 )
849 return TableReference.from_api_repr(dest_config)
850
851 @property
852 def source_uris(self):
853 """Optional[Sequence[str]]: URIs of data files to be loaded. See
854 https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.source_uris
855 for supported URI formats. None for jobs that load from a file.
856 """
857 return _helpers._get_sub_prop(
858 self._properties, ["configuration", "load", "sourceUris"]
859 )
860
861 @property
862 def allow_jagged_rows(self):
863 """See
864 :attr:`google.cloud.bigquery.job.LoadJobConfig.allow_jagged_rows`.
865 """
866 return self.configuration.allow_jagged_rows
867
868 @property
869 def allow_quoted_newlines(self):
870 """See
871 :attr:`google.cloud.bigquery.job.LoadJobConfig.allow_quoted_newlines`.
872 """
873 return self.configuration.allow_quoted_newlines
874
875 @property
876 def autodetect(self):
877 """See
878 :attr:`google.cloud.bigquery.job.LoadJobConfig.autodetect`.
879 """
880 return self.configuration.autodetect
881
882 @property
883 def connection_properties(self) -> List[ConnectionProperty]:
884 """See
885 :attr:`google.cloud.bigquery.job.LoadJobConfig.connection_properties`.
886
887 .. versionadded:: 3.7.0
888 """
889 return self.configuration.connection_properties
890
891 @property
892 def create_disposition(self):
893 """See
894 :attr:`google.cloud.bigquery.job.LoadJobConfig.create_disposition`.
895 """
896 return self.configuration.create_disposition
897
898 @property
899 def create_session(self) -> Optional[bool]:
900 """See
901 :attr:`google.cloud.bigquery.job.LoadJobConfig.create_session`.
902
903 .. versionadded:: 3.7.0
904 """
905 return self.configuration.create_session
906
907 @property
908 def encoding(self):
909 """See
910 :attr:`google.cloud.bigquery.job.LoadJobConfig.encoding`.
911 """
912 return self.configuration.encoding
913
914 @property
915 def field_delimiter(self):
916 """See
917 :attr:`google.cloud.bigquery.job.LoadJobConfig.field_delimiter`.
918 """
919 return self.configuration.field_delimiter
920
921 @property
922 def ignore_unknown_values(self):
923 """See
924 :attr:`google.cloud.bigquery.job.LoadJobConfig.ignore_unknown_values`.
925 """
926 return self.configuration.ignore_unknown_values
927
928 @property
929 def max_bad_records(self):
930 """See
931 :attr:`google.cloud.bigquery.job.LoadJobConfig.max_bad_records`.
932 """
933 return self.configuration.max_bad_records
934
935 @property
936 def null_marker(self):
937 """See
938 :attr:`google.cloud.bigquery.job.LoadJobConfig.null_marker`.
939 """
940 return self.configuration.null_marker
941
942 @property
943 def null_markers(self):
944 """See
945 :attr:`google.cloud.bigquery.job.LoadJobConfig.null_markers`.
946 """
947 return self.configuration.null_markers
948
949 @property
950 def quote_character(self):
951 """See
952 :attr:`google.cloud.bigquery.job.LoadJobConfig.quote_character`.
953 """
954 return self.configuration.quote_character
955
956 @property
957 def reference_file_schema_uri(self):
958 """See:
959 attr:`google.cloud.bigquery.job.LoadJobConfig.reference_file_schema_uri`.
960 """
961 return self.configuration.reference_file_schema_uri
962
963 @property
964 def skip_leading_rows(self):
965 """See
966 :attr:`google.cloud.bigquery.job.LoadJobConfig.skip_leading_rows`.
967 """
968 return self.configuration.skip_leading_rows
969
970 @property
971 def source_format(self):
972 """See
973 :attr:`google.cloud.bigquery.job.LoadJobConfig.source_format`.
974 """
975 return self.configuration.source_format
976
977 @property
978 def write_disposition(self):
979 """See
980 :attr:`google.cloud.bigquery.job.LoadJobConfig.write_disposition`.
981 """
982 return self.configuration.write_disposition
983
984 @property
985 def schema(self):
986 """See
987 :attr:`google.cloud.bigquery.job.LoadJobConfig.schema`.
988 """
989 return self.configuration.schema
990
991 @property
992 def destination_encryption_configuration(self):
993 """google.cloud.bigquery.encryption_configuration.EncryptionConfiguration: Custom
994 encryption configuration for the destination table.
995
996 Custom encryption configuration (e.g., Cloud KMS keys)
997 or :data:`None` if using default encryption.
998
999 See
1000 :attr:`google.cloud.bigquery.job.LoadJobConfig.destination_encryption_configuration`.
1001 """
1002 return self.configuration.destination_encryption_configuration
1003
1004 @property
1005 def destination_table_description(self):
1006 """Optional[str] name given to destination table.
1007
1008 See:
1009 https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#DestinationTableProperties.FIELDS.description
1010 """
1011 return self.configuration.destination_table_description
1012
1013 @property
1014 def destination_table_friendly_name(self):
1015 """Optional[str] name given to destination table.
1016
1017 See:
1018 https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#DestinationTableProperties.FIELDS.friendly_name
1019 """
1020 return self.configuration.destination_table_friendly_name
1021
1022 @property
1023 def range_partitioning(self):
1024 """See
1025 :attr:`google.cloud.bigquery.job.LoadJobConfig.range_partitioning`.
1026 """
1027 return self.configuration.range_partitioning
1028
1029 @property
1030 def time_partitioning(self):
1031 """See
1032 :attr:`google.cloud.bigquery.job.LoadJobConfig.time_partitioning`.
1033 """
1034 return self.configuration.time_partitioning
1035
1036 @property
1037 def use_avro_logical_types(self):
1038 """See
1039 :attr:`google.cloud.bigquery.job.LoadJobConfig.use_avro_logical_types`.
1040 """
1041 return self.configuration.use_avro_logical_types
1042
1043 @property
1044 def clustering_fields(self):
1045 """See
1046 :attr:`google.cloud.bigquery.job.LoadJobConfig.clustering_fields`.
1047 """
1048 return self.configuration.clustering_fields
1049
1050 @property
1051 def source_column_match(self) -> Optional[SourceColumnMatch]:
1052 """See
1053 :attr:`google.cloud.bigquery.job.LoadJobConfig.source_column_match`.
1054 """
1055 return self.configuration.source_column_match
1056
1057 @property
1058 def date_format(self):
1059 """See
1060 :attr:`google.cloud.bigquery.job.LoadJobConfig.date_format`.
1061 """
1062 return self.configuration.date_format
1063
1064 @property
1065 def datetime_format(self):
1066 """See
1067 :attr:`google.cloud.bigquery.job.LoadJobConfig.datetime_format`.
1068 """
1069 return self.configuration.datetime_format
1070
1071 @property
1072 def time_zone(self):
1073 """See
1074 :attr:`google.cloud.bigquery.job.LoadJobConfig.time_zone`.
1075 """
1076 return self.configuration.time_zone
1077
1078 @property
1079 def time_format(self):
1080 """See
1081 :attr:`google.cloud.bigquery.job.LoadJobConfig.time_format`.
1082 """
1083 return self.configuration.time_format
1084
1085 @property
1086 def timestamp_format(self):
1087 """See
1088 :attr:`google.cloud.bigquery.job.LoadJobConfig.timestamp_format`.
1089 """
1090 return self.configuration.timestamp_format
1091
1092 @property
1093 def schema_update_options(self):
1094 """See
1095 :attr:`google.cloud.bigquery.job.LoadJobConfig.schema_update_options`.
1096 """
1097 return self.configuration.schema_update_options
1098
1099 @property
1100 def input_file_bytes(self):
1101 """Count of bytes loaded from source files.
1102
1103 Returns:
1104 Optional[int]: the count (None until set from the server).
1105
1106 Raises:
1107 ValueError: for invalid value types.
1108 """
1109 return _helpers._int_or_none(
1110 _helpers._get_sub_prop(
1111 self._properties, ["statistics", "load", "inputFileBytes"]
1112 )
1113 )
1114
1115 @property
1116 def input_files(self):
1117 """Count of source files.
1118
1119 Returns:
1120 Optional[int]: the count (None until set from the server).
1121 """
1122 return _helpers._int_or_none(
1123 _helpers._get_sub_prop(
1124 self._properties, ["statistics", "load", "inputFiles"]
1125 )
1126 )
1127
1128 @property
1129 def output_bytes(self):
1130 """Count of bytes saved to destination table.
1131
1132 Returns:
1133 Optional[int]: the count (None until set from the server).
1134 """
1135 return _helpers._int_or_none(
1136 _helpers._get_sub_prop(
1137 self._properties, ["statistics", "load", "outputBytes"]
1138 )
1139 )
1140
1141 @property
1142 def output_rows(self):
1143 """Count of rows saved to destination table.
1144
1145 Returns:
1146 Optional[int]: the count (None until set from the server).
1147 """
1148 return _helpers._int_or_none(
1149 _helpers._get_sub_prop(
1150 self._properties, ["statistics", "load", "outputRows"]
1151 )
1152 )
1153
1154 def to_api_repr(self):
1155 """Generate a resource for :meth:`_begin`."""
1156 # Exclude statistics, if set.
1157 return {
1158 "jobReference": self._properties["jobReference"],
1159 "configuration": self._properties["configuration"],
1160 }
1161
1162 @classmethod
1163 def from_api_repr(cls, resource: dict, client) -> "LoadJob":
1164 """Factory: construct a job given its API representation
1165
1166 .. note::
1167
1168 This method assumes that the project found in the resource matches
1169 the client's project.
1170
1171 Args:
1172 resource (Dict): dataset job representation returned from the API
1173
1174 client (google.cloud.bigquery.client.Client):
1175 Client which holds credentials and project
1176 configuration for the dataset.
1177
1178 Returns:
1179 google.cloud.bigquery.job.LoadJob: Job parsed from ``resource``.
1180 """
1181 cls._check_resource_config(resource)
1182 job_ref = _JobReference._from_api_repr(resource["jobReference"])
1183 job = cls(job_ref, None, None, client)
1184 job._set_properties(resource)
1185 return job