Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/google/cloud/bigquery_storage_v1/types/stream.py: 100%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

60 statements  

1# -*- coding: utf-8 -*- 

2# Copyright 2024 Google LLC 

3# 

4# Licensed under the Apache License, Version 2.0 (the "License"); 

5# you may not use this file except in compliance with the License. 

6# You may obtain a copy of the License at 

7# 

8# http://www.apache.org/licenses/LICENSE-2.0 

9# 

10# Unless required by applicable law or agreed to in writing, software 

11# distributed under the License is distributed on an "AS IS" BASIS, 

12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 

13# See the License for the specific language governing permissions and 

14# limitations under the License. 

15# 

16from __future__ import annotations 

17 

18from typing import MutableMapping, MutableSequence 

19 

20from google.protobuf import timestamp_pb2 # type: ignore 

21import proto # type: ignore 

22 

23from google.cloud.bigquery_storage_v1.types import arrow, avro 

24from google.cloud.bigquery_storage_v1.types import table as gcbs_table 

25 

26__protobuf__ = proto.module( 

27 package="google.cloud.bigquery.storage.v1", 

28 manifest={ 

29 "DataFormat", 

30 "WriteStreamView", 

31 "ReadSession", 

32 "ReadStream", 

33 "WriteStream", 

34 }, 

35) 

36 

37 

38class DataFormat(proto.Enum): 

39 r"""Data format for input or output data. 

40 

41 Values: 

42 DATA_FORMAT_UNSPECIFIED (0): 

43 Data format is unspecified. 

44 AVRO (1): 

45 Avro is a standard open source row based file 

46 format. See https://avro.apache.org/ for more 

47 details. 

48 ARROW (2): 

49 Arrow is a standard open source column-based 

50 message format. See https://arrow.apache.org/ 

51 for more details. 

52 """ 

53 DATA_FORMAT_UNSPECIFIED = 0 

54 AVRO = 1 

55 ARROW = 2 

56 

57 

58class WriteStreamView(proto.Enum): 

59 r"""WriteStreamView is a view enum that controls what details 

60 about a write stream should be returned. 

61 

62 Values: 

63 WRITE_STREAM_VIEW_UNSPECIFIED (0): 

64 The default / unset value. 

65 BASIC (1): 

66 The BASIC projection returns basic metadata 

67 about a write stream. The basic view does not 

68 include schema information. This is the default 

69 view returned by GetWriteStream. 

70 FULL (2): 

71 The FULL projection returns all available 

72 write stream metadata, including the schema. 

73 CreateWriteStream returns the full projection of 

74 write stream metadata. 

75 """ 

76 WRITE_STREAM_VIEW_UNSPECIFIED = 0 

77 BASIC = 1 

78 FULL = 2 

79 

80 

81class ReadSession(proto.Message): 

82 r"""Information about the ReadSession. 

83 

84 This message has `oneof`_ fields (mutually exclusive fields). 

85 For each oneof, at most one member field can be set at the same time. 

86 Setting any member of the oneof automatically clears all other 

87 members. 

88 

89 .. _oneof: https://proto-plus-python.readthedocs.io/en/stable/fields.html#oneofs-mutually-exclusive-fields 

90 

91 Attributes: 

92 name (str): 

93 Output only. Unique identifier for the session, in the form 

94 ``projects/{project_id}/locations/{location}/sessions/{session_id}``. 

95 expire_time (google.protobuf.timestamp_pb2.Timestamp): 

96 Output only. Time at which the session becomes invalid. 

97 After this time, subsequent requests to read this Session 

98 will return errors. The expire_time is automatically 

99 assigned and currently cannot be specified or updated. 

100 data_format (google.cloud.bigquery_storage_v1.types.DataFormat): 

101 Immutable. Data format of the output data. 

102 DATA_FORMAT_UNSPECIFIED not supported. 

103 avro_schema (google.cloud.bigquery_storage_v1.types.AvroSchema): 

104 Output only. Avro schema. 

105 

106 This field is a member of `oneof`_ ``schema``. 

107 arrow_schema (google.cloud.bigquery_storage_v1.types.ArrowSchema): 

108 Output only. Arrow schema. 

109 

110 This field is a member of `oneof`_ ``schema``. 

111 table (str): 

112 Immutable. Table that this ReadSession is reading from, in 

113 the form 

114 ``projects/{project_id}/datasets/{dataset_id}/tables/{table_id}`` 

115 table_modifiers (google.cloud.bigquery_storage_v1.types.ReadSession.TableModifiers): 

116 Optional. Any modifiers which are applied 

117 when reading from the specified table. 

118 read_options (google.cloud.bigquery_storage_v1.types.ReadSession.TableReadOptions): 

119 Optional. Read options for this session (e.g. 

120 column selection, filters). 

121 streams (MutableSequence[google.cloud.bigquery_storage_v1.types.ReadStream]): 

122 Output only. A list of streams created with the session. 

123 

124 At least one stream is created with the session. In the 

125 future, larger request_stream_count values *may* result in 

126 this list being unpopulated, in that case, the user will 

127 need to use a List method to get the streams instead, which 

128 is not yet available. 

129 estimated_total_bytes_scanned (int): 

130 Output only. An estimate on the number of 

131 bytes this session will scan when all streams 

132 are completely consumed. This estimate is based 

133 on metadata from the table which might be 

134 incomplete or stale. 

135 estimated_total_physical_file_size (int): 

136 Output only. A pre-projected estimate of the 

137 total physical size of files (in bytes) that 

138 this session will scan when all streams are 

139 consumed. This estimate is independent of the 

140 selected columns and can be based on incomplete 

141 or stale metadata from the table. This field is 

142 only set for BigLake tables. 

143 estimated_row_count (int): 

144 Output only. An estimate on the number of 

145 rows present in this session's streams. This 

146 estimate is based on metadata from the table 

147 which might be incomplete or stale. 

148 trace_id (str): 

149 Optional. ID set by client to annotate a 

150 session identity. This does not need to be 

151 strictly unique, but instead the same ID should 

152 be used to group logically connected sessions 

153 (e.g. All using the same ID for all sessions 

154 needed to complete a Spark SQL query is 

155 reasonable). 

156 

157 Maximum length is 256 bytes. 

158 """ 

159 

160 class TableModifiers(proto.Message): 

161 r"""Additional attributes when reading a table. 

162 

163 Attributes: 

164 snapshot_time (google.protobuf.timestamp_pb2.Timestamp): 

165 The snapshot time of the table. If not set, 

166 interpreted as now. 

167 """ 

168 

169 snapshot_time: timestamp_pb2.Timestamp = proto.Field( 

170 proto.MESSAGE, 

171 number=1, 

172 message=timestamp_pb2.Timestamp, 

173 ) 

174 

175 class TableReadOptions(proto.Message): 

176 r"""Options dictating how we read a table. 

177 

178 This message has `oneof`_ fields (mutually exclusive fields). 

179 For each oneof, at most one member field can be set at the same time. 

180 Setting any member of the oneof automatically clears all other 

181 members. 

182 

183 .. _oneof: https://proto-plus-python.readthedocs.io/en/stable/fields.html#oneofs-mutually-exclusive-fields 

184 

185 Attributes: 

186 selected_fields (MutableSequence[str]): 

187 Optional. The names of the fields in the table to be 

188 returned. If no field names are specified, then all fields 

189 in the table are returned. 

190 

191 Nested fields -- the child elements of a STRUCT field -- can 

192 be selected individually using their fully-qualified names, 

193 and will be returned as record fields containing only the 

194 selected nested fields. If a STRUCT field is specified in 

195 the selected fields list, all of the child elements will be 

196 returned. 

197 

198 As an example, consider a table with the following schema: 

199 

200 { "name": "struct_field", "type": "RECORD", "mode": 

201 "NULLABLE", "fields": [ { "name": "string_field1", "type": 

202 "STRING", . "mode": "NULLABLE" }, { "name": "string_field2", 

203 "type": "STRING", "mode": "NULLABLE" } ] } 

204 

205 Specifying "struct_field" in the selected fields list will 

206 result in a read session schema with the following logical 

207 structure: 

208 

209 struct_field { string_field1 string_field2 } 

210 

211 Specifying "struct_field.string_field1" in the selected 

212 fields list will result in a read session schema with the 

213 following logical structure: 

214 

215 struct_field { string_field1 } 

216 

217 The order of the fields in the read session schema is 

218 derived from the table schema and does not correspond to the 

219 order in which the fields are specified in this list. 

220 row_restriction (str): 

221 SQL text filtering statement, similar to a WHERE clause in a 

222 query. Aggregates are not supported. 

223 

224 Examples: "int_field > 5" "date_field = CAST('2014-9-27' as 

225 DATE)" "nullable_field is not NULL" "st_equals(geo_field, 

226 st_geofromtext("POINT(2, 2)"))" "numeric_field BETWEEN 1.0 

227 AND 5.0" 

228 

229 Restricted to a maximum length for 1 MB. 

230 arrow_serialization_options (google.cloud.bigquery_storage_v1.types.ArrowSerializationOptions): 

231 Optional. Options specific to the Apache 

232 Arrow output format. 

233 

234 This field is a member of `oneof`_ ``output_format_serialization_options``. 

235 avro_serialization_options (google.cloud.bigquery_storage_v1.types.AvroSerializationOptions): 

236 Optional. Options specific to the Apache Avro 

237 output format 

238 

239 This field is a member of `oneof`_ ``output_format_serialization_options``. 

240 sample_percentage (float): 

241 Optional. Specifies a table sampling percentage. 

242 Specifically, the query planner will use TABLESAMPLE SYSTEM 

243 (sample_percentage PERCENT). The sampling percentage is 

244 applied at the data block granularity. It will randomly 

245 choose for each data block whether to read the rows in that 

246 data block. For more details, see 

247 https://cloud.google.com/bigquery/docs/table-sampling) 

248 

249 This field is a member of `oneof`_ ``_sample_percentage``. 

250 response_compression_codec (google.cloud.bigquery_storage_v1.types.ReadSession.TableReadOptions.ResponseCompressionCodec): 

251 Optional. Set response_compression_codec when creating a 

252 read session to enable application-level compression of 

253 ReadRows responses. 

254 

255 This field is a member of `oneof`_ ``_response_compression_codec``. 

256 """ 

257 

258 class ResponseCompressionCodec(proto.Enum): 

259 r"""Specifies which compression codec to attempt on the entire 

260 serialized response payload (either Arrow record batch or Avro 

261 rows). This is not to be confused with the Apache Arrow native 

262 compression codecs specified in ArrowSerializationOptions. For 

263 performance reasons, when creating a read session requesting 

264 Arrow responses, setting both native Arrow compression and 

265 application-level response compression will not be allowed - 

266 choose, at most, one kind of compression. 

267 

268 Values: 

269 RESPONSE_COMPRESSION_CODEC_UNSPECIFIED (0): 

270 Default is no compression. 

271 RESPONSE_COMPRESSION_CODEC_LZ4 (2): 

272 Use raw LZ4 compression. 

273 """ 

274 RESPONSE_COMPRESSION_CODEC_UNSPECIFIED = 0 

275 RESPONSE_COMPRESSION_CODEC_LZ4 = 2 

276 

277 selected_fields: MutableSequence[str] = proto.RepeatedField( 

278 proto.STRING, 

279 number=1, 

280 ) 

281 row_restriction: str = proto.Field( 

282 proto.STRING, 

283 number=2, 

284 ) 

285 arrow_serialization_options: arrow.ArrowSerializationOptions = proto.Field( 

286 proto.MESSAGE, 

287 number=3, 

288 oneof="output_format_serialization_options", 

289 message=arrow.ArrowSerializationOptions, 

290 ) 

291 avro_serialization_options: avro.AvroSerializationOptions = proto.Field( 

292 proto.MESSAGE, 

293 number=4, 

294 oneof="output_format_serialization_options", 

295 message=avro.AvroSerializationOptions, 

296 ) 

297 sample_percentage: float = proto.Field( 

298 proto.DOUBLE, 

299 number=5, 

300 optional=True, 

301 ) 

302 response_compression_codec: "ReadSession.TableReadOptions.ResponseCompressionCodec" = proto.Field( 

303 proto.ENUM, 

304 number=6, 

305 optional=True, 

306 enum="ReadSession.TableReadOptions.ResponseCompressionCodec", 

307 ) 

308 

309 name: str = proto.Field( 

310 proto.STRING, 

311 number=1, 

312 ) 

313 expire_time: timestamp_pb2.Timestamp = proto.Field( 

314 proto.MESSAGE, 

315 number=2, 

316 message=timestamp_pb2.Timestamp, 

317 ) 

318 data_format: "DataFormat" = proto.Field( 

319 proto.ENUM, 

320 number=3, 

321 enum="DataFormat", 

322 ) 

323 avro_schema: avro.AvroSchema = proto.Field( 

324 proto.MESSAGE, 

325 number=4, 

326 oneof="schema", 

327 message=avro.AvroSchema, 

328 ) 

329 arrow_schema: arrow.ArrowSchema = proto.Field( 

330 proto.MESSAGE, 

331 number=5, 

332 oneof="schema", 

333 message=arrow.ArrowSchema, 

334 ) 

335 table: str = proto.Field( 

336 proto.STRING, 

337 number=6, 

338 ) 

339 table_modifiers: TableModifiers = proto.Field( 

340 proto.MESSAGE, 

341 number=7, 

342 message=TableModifiers, 

343 ) 

344 read_options: TableReadOptions = proto.Field( 

345 proto.MESSAGE, 

346 number=8, 

347 message=TableReadOptions, 

348 ) 

349 streams: MutableSequence["ReadStream"] = proto.RepeatedField( 

350 proto.MESSAGE, 

351 number=10, 

352 message="ReadStream", 

353 ) 

354 estimated_total_bytes_scanned: int = proto.Field( 

355 proto.INT64, 

356 number=12, 

357 ) 

358 estimated_total_physical_file_size: int = proto.Field( 

359 proto.INT64, 

360 number=15, 

361 ) 

362 estimated_row_count: int = proto.Field( 

363 proto.INT64, 

364 number=14, 

365 ) 

366 trace_id: str = proto.Field( 

367 proto.STRING, 

368 number=13, 

369 ) 

370 

371 

372class ReadStream(proto.Message): 

373 r"""Information about a single stream that gets data out of the storage 

374 system. Most of the information about ``ReadStream`` instances is 

375 aggregated, making ``ReadStream`` lightweight. 

376 

377 Attributes: 

378 name (str): 

379 Output only. Name of the stream, in the form 

380 ``projects/{project_id}/locations/{location}/sessions/{session_id}/streams/{stream_id}``. 

381 """ 

382 

383 name: str = proto.Field( 

384 proto.STRING, 

385 number=1, 

386 ) 

387 

388 

389class WriteStream(proto.Message): 

390 r"""Information about a single stream that gets data inside the 

391 storage system. 

392 

393 Attributes: 

394 name (str): 

395 Output only. Name of the stream, in the form 

396 ``projects/{project}/datasets/{dataset}/tables/{table}/streams/{stream}``. 

397 type_ (google.cloud.bigquery_storage_v1.types.WriteStream.Type): 

398 Immutable. Type of the stream. 

399 create_time (google.protobuf.timestamp_pb2.Timestamp): 

400 Output only. Create time of the stream. For the \_default 

401 stream, this is the creation_time of the table. 

402 commit_time (google.protobuf.timestamp_pb2.Timestamp): 

403 Output only. Commit time of the stream. If a stream is of 

404 ``COMMITTED`` type, then it will have a commit_time same as 

405 ``create_time``. If the stream is of ``PENDING`` type, empty 

406 commit_time means it is not committed. 

407 table_schema (google.cloud.bigquery_storage_v1.types.TableSchema): 

408 Output only. The schema of the destination table. It is only 

409 returned in ``CreateWriteStream`` response. Caller should 

410 generate data that's compatible with this schema to send in 

411 initial ``AppendRowsRequest``. The table schema could go out 

412 of date during the life time of the stream. 

413 write_mode (google.cloud.bigquery_storage_v1.types.WriteStream.WriteMode): 

414 Immutable. Mode of the stream. 

415 location (str): 

416 Immutable. The geographic location where the 

417 stream's dataset resides. See 

418 https://cloud.google.com/bigquery/docs/locations 

419 for supported locations. 

420 """ 

421 

422 class Type(proto.Enum): 

423 r"""Type enum of the stream. 

424 

425 Values: 

426 TYPE_UNSPECIFIED (0): 

427 Unknown type. 

428 COMMITTED (1): 

429 Data will commit automatically and appear as 

430 soon as the write is acknowledged. 

431 PENDING (2): 

432 Data is invisible until the stream is 

433 committed. 

434 BUFFERED (3): 

435 Data is only visible up to the offset to 

436 which it was flushed. 

437 """ 

438 TYPE_UNSPECIFIED = 0 

439 COMMITTED = 1 

440 PENDING = 2 

441 BUFFERED = 3 

442 

443 class WriteMode(proto.Enum): 

444 r"""Mode enum of the stream. 

445 

446 Values: 

447 WRITE_MODE_UNSPECIFIED (0): 

448 Unknown type. 

449 INSERT (1): 

450 Insert new records into the table. 

451 It is the default value if customers do not 

452 specify it. 

453 """ 

454 WRITE_MODE_UNSPECIFIED = 0 

455 INSERT = 1 

456 

457 name: str = proto.Field( 

458 proto.STRING, 

459 number=1, 

460 ) 

461 type_: Type = proto.Field( 

462 proto.ENUM, 

463 number=2, 

464 enum=Type, 

465 ) 

466 create_time: timestamp_pb2.Timestamp = proto.Field( 

467 proto.MESSAGE, 

468 number=3, 

469 message=timestamp_pb2.Timestamp, 

470 ) 

471 commit_time: timestamp_pb2.Timestamp = proto.Field( 

472 proto.MESSAGE, 

473 number=4, 

474 message=timestamp_pb2.Timestamp, 

475 ) 

476 table_schema: gcbs_table.TableSchema = proto.Field( 

477 proto.MESSAGE, 

478 number=5, 

479 message=gcbs_table.TableSchema, 

480 ) 

481 write_mode: WriteMode = proto.Field( 

482 proto.ENUM, 

483 number=7, 

484 enum=WriteMode, 

485 ) 

486 location: str = proto.Field( 

487 proto.STRING, 

488 number=8, 

489 ) 

490 

491 

492__all__ = tuple(sorted(__protobuf__.manifest))