1# -*- coding: utf-8 -*-
2# Copyright 2024 Google LLC
3#
4# Licensed under the Apache License, Version 2.0 (the "License");
5# you may not use this file except in compliance with the License.
6# You may obtain a copy of the License at
7#
8# http://www.apache.org/licenses/LICENSE-2.0
9#
10# Unless required by applicable law or agreed to in writing, software
11# distributed under the License is distributed on an "AS IS" BASIS,
12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13# See the License for the specific language governing permissions and
14# limitations under the License.
15#
16from __future__ import annotations
17
18from typing import MutableMapping, MutableSequence
19
20from google.protobuf import timestamp_pb2 # type: ignore
21import proto # type: ignore
22
23from google.cloud.bigquery_storage_v1.types import arrow, avro
24from google.cloud.bigquery_storage_v1.types import table as gcbs_table
25
26__protobuf__ = proto.module(
27 package="google.cloud.bigquery.storage.v1",
28 manifest={
29 "DataFormat",
30 "WriteStreamView",
31 "ReadSession",
32 "ReadStream",
33 "WriteStream",
34 },
35)
36
37
38class DataFormat(proto.Enum):
39 r"""Data format for input or output data.
40
41 Values:
42 DATA_FORMAT_UNSPECIFIED (0):
43 Data format is unspecified.
44 AVRO (1):
45 Avro is a standard open source row based file
46 format. See https://avro.apache.org/ for more
47 details.
48 ARROW (2):
49 Arrow is a standard open source column-based
50 message format. See https://arrow.apache.org/
51 for more details.
52 """
53 DATA_FORMAT_UNSPECIFIED = 0
54 AVRO = 1
55 ARROW = 2
56
57
58class WriteStreamView(proto.Enum):
59 r"""WriteStreamView is a view enum that controls what details
60 about a write stream should be returned.
61
62 Values:
63 WRITE_STREAM_VIEW_UNSPECIFIED (0):
64 The default / unset value.
65 BASIC (1):
66 The BASIC projection returns basic metadata
67 about a write stream. The basic view does not
68 include schema information. This is the default
69 view returned by GetWriteStream.
70 FULL (2):
71 The FULL projection returns all available
72 write stream metadata, including the schema.
73 CreateWriteStream returns the full projection of
74 write stream metadata.
75 """
76 WRITE_STREAM_VIEW_UNSPECIFIED = 0
77 BASIC = 1
78 FULL = 2
79
80
81class ReadSession(proto.Message):
82 r"""Information about the ReadSession.
83
84 This message has `oneof`_ fields (mutually exclusive fields).
85 For each oneof, at most one member field can be set at the same time.
86 Setting any member of the oneof automatically clears all other
87 members.
88
89 .. _oneof: https://proto-plus-python.readthedocs.io/en/stable/fields.html#oneofs-mutually-exclusive-fields
90
91 Attributes:
92 name (str):
93 Output only. Unique identifier for the session, in the form
94 ``projects/{project_id}/locations/{location}/sessions/{session_id}``.
95 expire_time (google.protobuf.timestamp_pb2.Timestamp):
96 Output only. Time at which the session becomes invalid.
97 After this time, subsequent requests to read this Session
98 will return errors. The expire_time is automatically
99 assigned and currently cannot be specified or updated.
100 data_format (google.cloud.bigquery_storage_v1.types.DataFormat):
101 Immutable. Data format of the output data.
102 DATA_FORMAT_UNSPECIFIED not supported.
103 avro_schema (google.cloud.bigquery_storage_v1.types.AvroSchema):
104 Output only. Avro schema.
105
106 This field is a member of `oneof`_ ``schema``.
107 arrow_schema (google.cloud.bigquery_storage_v1.types.ArrowSchema):
108 Output only. Arrow schema.
109
110 This field is a member of `oneof`_ ``schema``.
111 table (str):
112 Immutable. Table that this ReadSession is reading from, in
113 the form
114 ``projects/{project_id}/datasets/{dataset_id}/tables/{table_id}``
115 table_modifiers (google.cloud.bigquery_storage_v1.types.ReadSession.TableModifiers):
116 Optional. Any modifiers which are applied
117 when reading from the specified table.
118 read_options (google.cloud.bigquery_storage_v1.types.ReadSession.TableReadOptions):
119 Optional. Read options for this session (e.g.
120 column selection, filters).
121 streams (MutableSequence[google.cloud.bigquery_storage_v1.types.ReadStream]):
122 Output only. A list of streams created with the session.
123
124 At least one stream is created with the session. In the
125 future, larger request_stream_count values *may* result in
126 this list being unpopulated, in that case, the user will
127 need to use a List method to get the streams instead, which
128 is not yet available.
129 estimated_total_bytes_scanned (int):
130 Output only. An estimate on the number of
131 bytes this session will scan when all streams
132 are completely consumed. This estimate is based
133 on metadata from the table which might be
134 incomplete or stale.
135 estimated_total_physical_file_size (int):
136 Output only. A pre-projected estimate of the
137 total physical size of files (in bytes) that
138 this session will scan when all streams are
139 consumed. This estimate is independent of the
140 selected columns and can be based on incomplete
141 or stale metadata from the table. This field is
142 only set for BigLake tables.
143 estimated_row_count (int):
144 Output only. An estimate on the number of
145 rows present in this session's streams. This
146 estimate is based on metadata from the table
147 which might be incomplete or stale.
148 trace_id (str):
149 Optional. ID set by client to annotate a
150 session identity. This does not need to be
151 strictly unique, but instead the same ID should
152 be used to group logically connected sessions
153 (e.g. All using the same ID for all sessions
154 needed to complete a Spark SQL query is
155 reasonable).
156
157 Maximum length is 256 bytes.
158 """
159
160 class TableModifiers(proto.Message):
161 r"""Additional attributes when reading a table.
162
163 Attributes:
164 snapshot_time (google.protobuf.timestamp_pb2.Timestamp):
165 The snapshot time of the table. If not set,
166 interpreted as now.
167 """
168
169 snapshot_time: timestamp_pb2.Timestamp = proto.Field(
170 proto.MESSAGE,
171 number=1,
172 message=timestamp_pb2.Timestamp,
173 )
174
175 class TableReadOptions(proto.Message):
176 r"""Options dictating how we read a table.
177
178 This message has `oneof`_ fields (mutually exclusive fields).
179 For each oneof, at most one member field can be set at the same time.
180 Setting any member of the oneof automatically clears all other
181 members.
182
183 .. _oneof: https://proto-plus-python.readthedocs.io/en/stable/fields.html#oneofs-mutually-exclusive-fields
184
185 Attributes:
186 selected_fields (MutableSequence[str]):
187 Optional. The names of the fields in the table to be
188 returned. If no field names are specified, then all fields
189 in the table are returned.
190
191 Nested fields -- the child elements of a STRUCT field -- can
192 be selected individually using their fully-qualified names,
193 and will be returned as record fields containing only the
194 selected nested fields. If a STRUCT field is specified in
195 the selected fields list, all of the child elements will be
196 returned.
197
198 As an example, consider a table with the following schema:
199
200 { "name": "struct_field", "type": "RECORD", "mode":
201 "NULLABLE", "fields": [ { "name": "string_field1", "type":
202 "STRING", . "mode": "NULLABLE" }, { "name": "string_field2",
203 "type": "STRING", "mode": "NULLABLE" } ] }
204
205 Specifying "struct_field" in the selected fields list will
206 result in a read session schema with the following logical
207 structure:
208
209 struct_field { string_field1 string_field2 }
210
211 Specifying "struct_field.string_field1" in the selected
212 fields list will result in a read session schema with the
213 following logical structure:
214
215 struct_field { string_field1 }
216
217 The order of the fields in the read session schema is
218 derived from the table schema and does not correspond to the
219 order in which the fields are specified in this list.
220 row_restriction (str):
221 SQL text filtering statement, similar to a WHERE clause in a
222 query. Aggregates are not supported.
223
224 Examples: "int_field > 5" "date_field = CAST('2014-9-27' as
225 DATE)" "nullable_field is not NULL" "st_equals(geo_field,
226 st_geofromtext("POINT(2, 2)"))" "numeric_field BETWEEN 1.0
227 AND 5.0"
228
229 Restricted to a maximum length for 1 MB.
230 arrow_serialization_options (google.cloud.bigquery_storage_v1.types.ArrowSerializationOptions):
231 Optional. Options specific to the Apache
232 Arrow output format.
233
234 This field is a member of `oneof`_ ``output_format_serialization_options``.
235 avro_serialization_options (google.cloud.bigquery_storage_v1.types.AvroSerializationOptions):
236 Optional. Options specific to the Apache Avro
237 output format
238
239 This field is a member of `oneof`_ ``output_format_serialization_options``.
240 sample_percentage (float):
241 Optional. Specifies a table sampling percentage.
242 Specifically, the query planner will use TABLESAMPLE SYSTEM
243 (sample_percentage PERCENT). The sampling percentage is
244 applied at the data block granularity. It will randomly
245 choose for each data block whether to read the rows in that
246 data block. For more details, see
247 https://cloud.google.com/bigquery/docs/table-sampling)
248
249 This field is a member of `oneof`_ ``_sample_percentage``.
250 response_compression_codec (google.cloud.bigquery_storage_v1.types.ReadSession.TableReadOptions.ResponseCompressionCodec):
251 Optional. Set response_compression_codec when creating a
252 read session to enable application-level compression of
253 ReadRows responses.
254
255 This field is a member of `oneof`_ ``_response_compression_codec``.
256 """
257
258 class ResponseCompressionCodec(proto.Enum):
259 r"""Specifies which compression codec to attempt on the entire
260 serialized response payload (either Arrow record batch or Avro
261 rows). This is not to be confused with the Apache Arrow native
262 compression codecs specified in ArrowSerializationOptions. For
263 performance reasons, when creating a read session requesting
264 Arrow responses, setting both native Arrow compression and
265 application-level response compression will not be allowed -
266 choose, at most, one kind of compression.
267
268 Values:
269 RESPONSE_COMPRESSION_CODEC_UNSPECIFIED (0):
270 Default is no compression.
271 RESPONSE_COMPRESSION_CODEC_LZ4 (2):
272 Use raw LZ4 compression.
273 """
274 RESPONSE_COMPRESSION_CODEC_UNSPECIFIED = 0
275 RESPONSE_COMPRESSION_CODEC_LZ4 = 2
276
277 selected_fields: MutableSequence[str] = proto.RepeatedField(
278 proto.STRING,
279 number=1,
280 )
281 row_restriction: str = proto.Field(
282 proto.STRING,
283 number=2,
284 )
285 arrow_serialization_options: arrow.ArrowSerializationOptions = proto.Field(
286 proto.MESSAGE,
287 number=3,
288 oneof="output_format_serialization_options",
289 message=arrow.ArrowSerializationOptions,
290 )
291 avro_serialization_options: avro.AvroSerializationOptions = proto.Field(
292 proto.MESSAGE,
293 number=4,
294 oneof="output_format_serialization_options",
295 message=avro.AvroSerializationOptions,
296 )
297 sample_percentage: float = proto.Field(
298 proto.DOUBLE,
299 number=5,
300 optional=True,
301 )
302 response_compression_codec: "ReadSession.TableReadOptions.ResponseCompressionCodec" = proto.Field(
303 proto.ENUM,
304 number=6,
305 optional=True,
306 enum="ReadSession.TableReadOptions.ResponseCompressionCodec",
307 )
308
309 name: str = proto.Field(
310 proto.STRING,
311 number=1,
312 )
313 expire_time: timestamp_pb2.Timestamp = proto.Field(
314 proto.MESSAGE,
315 number=2,
316 message=timestamp_pb2.Timestamp,
317 )
318 data_format: "DataFormat" = proto.Field(
319 proto.ENUM,
320 number=3,
321 enum="DataFormat",
322 )
323 avro_schema: avro.AvroSchema = proto.Field(
324 proto.MESSAGE,
325 number=4,
326 oneof="schema",
327 message=avro.AvroSchema,
328 )
329 arrow_schema: arrow.ArrowSchema = proto.Field(
330 proto.MESSAGE,
331 number=5,
332 oneof="schema",
333 message=arrow.ArrowSchema,
334 )
335 table: str = proto.Field(
336 proto.STRING,
337 number=6,
338 )
339 table_modifiers: TableModifiers = proto.Field(
340 proto.MESSAGE,
341 number=7,
342 message=TableModifiers,
343 )
344 read_options: TableReadOptions = proto.Field(
345 proto.MESSAGE,
346 number=8,
347 message=TableReadOptions,
348 )
349 streams: MutableSequence["ReadStream"] = proto.RepeatedField(
350 proto.MESSAGE,
351 number=10,
352 message="ReadStream",
353 )
354 estimated_total_bytes_scanned: int = proto.Field(
355 proto.INT64,
356 number=12,
357 )
358 estimated_total_physical_file_size: int = proto.Field(
359 proto.INT64,
360 number=15,
361 )
362 estimated_row_count: int = proto.Field(
363 proto.INT64,
364 number=14,
365 )
366 trace_id: str = proto.Field(
367 proto.STRING,
368 number=13,
369 )
370
371
372class ReadStream(proto.Message):
373 r"""Information about a single stream that gets data out of the storage
374 system. Most of the information about ``ReadStream`` instances is
375 aggregated, making ``ReadStream`` lightweight.
376
377 Attributes:
378 name (str):
379 Output only. Name of the stream, in the form
380 ``projects/{project_id}/locations/{location}/sessions/{session_id}/streams/{stream_id}``.
381 """
382
383 name: str = proto.Field(
384 proto.STRING,
385 number=1,
386 )
387
388
389class WriteStream(proto.Message):
390 r"""Information about a single stream that gets data inside the
391 storage system.
392
393 Attributes:
394 name (str):
395 Output only. Name of the stream, in the form
396 ``projects/{project}/datasets/{dataset}/tables/{table}/streams/{stream}``.
397 type_ (google.cloud.bigquery_storage_v1.types.WriteStream.Type):
398 Immutable. Type of the stream.
399 create_time (google.protobuf.timestamp_pb2.Timestamp):
400 Output only. Create time of the stream. For the \_default
401 stream, this is the creation_time of the table.
402 commit_time (google.protobuf.timestamp_pb2.Timestamp):
403 Output only. Commit time of the stream. If a stream is of
404 ``COMMITTED`` type, then it will have a commit_time same as
405 ``create_time``. If the stream is of ``PENDING`` type, empty
406 commit_time means it is not committed.
407 table_schema (google.cloud.bigquery_storage_v1.types.TableSchema):
408 Output only. The schema of the destination table. It is only
409 returned in ``CreateWriteStream`` response. Caller should
410 generate data that's compatible with this schema to send in
411 initial ``AppendRowsRequest``. The table schema could go out
412 of date during the life time of the stream.
413 write_mode (google.cloud.bigquery_storage_v1.types.WriteStream.WriteMode):
414 Immutable. Mode of the stream.
415 location (str):
416 Immutable. The geographic location where the
417 stream's dataset resides. See
418 https://cloud.google.com/bigquery/docs/locations
419 for supported locations.
420 """
421
422 class Type(proto.Enum):
423 r"""Type enum of the stream.
424
425 Values:
426 TYPE_UNSPECIFIED (0):
427 Unknown type.
428 COMMITTED (1):
429 Data will commit automatically and appear as
430 soon as the write is acknowledged.
431 PENDING (2):
432 Data is invisible until the stream is
433 committed.
434 BUFFERED (3):
435 Data is only visible up to the offset to
436 which it was flushed.
437 """
438 TYPE_UNSPECIFIED = 0
439 COMMITTED = 1
440 PENDING = 2
441 BUFFERED = 3
442
443 class WriteMode(proto.Enum):
444 r"""Mode enum of the stream.
445
446 Values:
447 WRITE_MODE_UNSPECIFIED (0):
448 Unknown type.
449 INSERT (1):
450 Insert new records into the table.
451 It is the default value if customers do not
452 specify it.
453 """
454 WRITE_MODE_UNSPECIFIED = 0
455 INSERT = 1
456
457 name: str = proto.Field(
458 proto.STRING,
459 number=1,
460 )
461 type_: Type = proto.Field(
462 proto.ENUM,
463 number=2,
464 enum=Type,
465 )
466 create_time: timestamp_pb2.Timestamp = proto.Field(
467 proto.MESSAGE,
468 number=3,
469 message=timestamp_pb2.Timestamp,
470 )
471 commit_time: timestamp_pb2.Timestamp = proto.Field(
472 proto.MESSAGE,
473 number=4,
474 message=timestamp_pb2.Timestamp,
475 )
476 table_schema: gcbs_table.TableSchema = proto.Field(
477 proto.MESSAGE,
478 number=5,
479 message=gcbs_table.TableSchema,
480 )
481 write_mode: WriteMode = proto.Field(
482 proto.ENUM,
483 number=7,
484 enum=WriteMode,
485 )
486 location: str = proto.Field(
487 proto.STRING,
488 number=8,
489 )
490
491
492__all__ = tuple(sorted(__protobuf__.manifest))