Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/google/cloud/bigquery_storage

1# -*- coding: utf-8 -*-

4# Licensed under the Apache License, Version 2.0 (the "License");

5# you may not use this file except in compliance with the License.

6# You may obtain a copy of the License at

8# http://www.apache.org/licenses/LICENSE-2.0

10# Unless required by applicable law or agreed to in writing, software

11# distributed under the License is distributed on an "AS IS" BASIS,

12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

13# See the License for the specific language governing permissions and

14# limitations under the License.

15#

16from __future__ import annotations

18from typing import MutableMapping, MutableSequence

20from google.protobuf import timestamp_pb2 # type: ignore

21import proto # type: ignore

23from google.cloud.bigquery_storage_v1.types import arrow, avro

24from google.cloud.bigquery_storage_v1.types import table as gcbs_table

26__protobuf__ = proto.module(

27 package="google.cloud.bigquery.storage.v1",

28 manifest={

29 "DataFormat",

30 "WriteStreamView",

31 "ReadSession",

32 "ReadStream",

33 "WriteStream",

34 },

35)

38class DataFormat(proto.Enum):

39 r"""Data format for input or output data.

41 Values:

42 DATA_FORMAT_UNSPECIFIED (0):

43 Data format is unspecified.

44 AVRO (1):

45 Avro is a standard open source row based file

46 format. See https://avro.apache.org/ for more

47 details.

48 ARROW (2):

49 Arrow is a standard open source column-based

50 message format. See https://arrow.apache.org/

51 for more details.

52 """

53 DATA_FORMAT_UNSPECIFIED = 0

54 AVRO = 1

55 ARROW = 2

58class WriteStreamView(proto.Enum):

59 r"""WriteStreamView is a view enum that controls what details

60 about a write stream should be returned.

62 Values:

63 WRITE_STREAM_VIEW_UNSPECIFIED (0):

64 The default / unset value.

65 BASIC (1):

66 The BASIC projection returns basic metadata

67 about a write stream. The basic view does not

68 include schema information. This is the default

69 view returned by GetWriteStream.

70 FULL (2):

71 The FULL projection returns all available

72 write stream metadata, including the schema.

73 CreateWriteStream returns the full projection of

74 write stream metadata.

75 """

76 WRITE_STREAM_VIEW_UNSPECIFIED = 0

77 BASIC = 1

78 FULL = 2

81class ReadSession(proto.Message):

82 r"""Information about the ReadSession.

84 This message has `oneof`_ fields (mutually exclusive fields).

85 For each oneof, at most one member field can be set at the same time.

86 Setting any member of the oneof automatically clears all other

87 members.

89 .. _oneof: https://proto-plus-python.readthedocs.io/en/stable/fields.html#oneofs-mutually-exclusive-fields

91 Attributes:

92 name (str):

93 Output only. Unique identifier for the session, in the form

94 ``projects/{project_id}/locations/{location}/sessions/{session_id}``.

95 expire_time (google.protobuf.timestamp_pb2.Timestamp):

96 Output only. Time at which the session becomes invalid.

97 After this time, subsequent requests to read this Session

98 will return errors. The expire_time is automatically

99 assigned and currently cannot be specified or updated.

100 data_format (google.cloud.bigquery_storage_v1.types.DataFormat):

101 Immutable. Data format of the output data.

102 DATA_FORMAT_UNSPECIFIED not supported.

103 avro_schema (google.cloud.bigquery_storage_v1.types.AvroSchema):

104 Output only. Avro schema.

105

106 This field is a member of `oneof`_ ``schema``.

107 arrow_schema (google.cloud.bigquery_storage_v1.types.ArrowSchema):

108 Output only. Arrow schema.

109

110 This field is a member of `oneof`_ ``schema``.

111 table (str):

112 Immutable. Table that this ReadSession is reading from, in

113 the form

114 ``projects/{project_id}/datasets/{dataset_id}/tables/{table_id}``

115 table_modifiers (google.cloud.bigquery_storage_v1.types.ReadSession.TableModifiers):

116 Optional. Any modifiers which are applied

117 when reading from the specified table.

118 read_options (google.cloud.bigquery_storage_v1.types.ReadSession.TableReadOptions):

119 Optional. Read options for this session (e.g.

120 column selection, filters).

121 streams (MutableSequence[google.cloud.bigquery_storage_v1.types.ReadStream]):

122 Output only. A list of streams created with the session.

123

124 At least one stream is created with the session. In the

125 future, larger request_stream_count values *may* result in

126 this list being unpopulated, in that case, the user will

127 need to use a List method to get the streams instead, which

128 is not yet available.

129 estimated_total_bytes_scanned (int):

130 Output only. An estimate on the number of

131 bytes this session will scan when all streams

132 are completely consumed. This estimate is based

133 on metadata from the table which might be

134 incomplete or stale.

135 estimated_total_physical_file_size (int):

136 Output only. A pre-projected estimate of the

137 total physical size of files (in bytes) that

138 this session will scan when all streams are

139 consumed. This estimate is independent of the

140 selected columns and can be based on incomplete

141 or stale metadata from the table. This field is

142 only set for BigLake tables.

143 estimated_row_count (int):

144 Output only. An estimate on the number of

145 rows present in this session's streams. This

146 estimate is based on metadata from the table

147 which might be incomplete or stale.

148 trace_id (str):

149 Optional. ID set by client to annotate a

150 session identity. This does not need to be

151 strictly unique, but instead the same ID should

152 be used to group logically connected sessions

153 (e.g. All using the same ID for all sessions

154 needed to complete a Spark SQL query is

155 reasonable).

156

157 Maximum length is 256 bytes.

158 """

159

160 class TableModifiers(proto.Message):

161 r"""Additional attributes when reading a table.

162

163 Attributes:

164 snapshot_time (google.protobuf.timestamp_pb2.Timestamp):

165 The snapshot time of the table. If not set,

166 interpreted as now.

167 """

168

169 snapshot_time: timestamp_pb2.Timestamp = proto.Field(

170 proto.MESSAGE,

171 number=1,

172 message=timestamp_pb2.Timestamp,

173 )

174

175 class TableReadOptions(proto.Message):

176 r"""Options dictating how we read a table.

177

178 This message has `oneof`_ fields (mutually exclusive fields).

179 For each oneof, at most one member field can be set at the same time.

180 Setting any member of the oneof automatically clears all other

181 members.

182

183 .. _oneof: https://proto-plus-python.readthedocs.io/en/stable/fields.html#oneofs-mutually-exclusive-fields

184

185 Attributes:

186 selected_fields (MutableSequence[str]):

187 Optional. The names of the fields in the table to be

188 returned. If no field names are specified, then all fields

189 in the table are returned.

190

191 Nested fields -- the child elements of a STRUCT field -- can

192 be selected individually using their fully-qualified names,

193 and will be returned as record fields containing only the

194 selected nested fields. If a STRUCT field is specified in

195 the selected fields list, all of the child elements will be

196 returned.

197

198 As an example, consider a table with the following schema:

199

200 { "name": "struct_field", "type": "RECORD", "mode":

201 "NULLABLE", "fields": [ { "name": "string_field1", "type":

202 "STRING", . "mode": "NULLABLE" }, { "name": "string_field2",

203 "type": "STRING", "mode": "NULLABLE" } ] }

204

205 Specifying "struct_field" in the selected fields list will

206 result in a read session schema with the following logical

207 structure:

208

209 struct_field { string_field1 string_field2 }

210

211 Specifying "struct_field.string_field1" in the selected

212 fields list will result in a read session schema with the

213 following logical structure:

214

215 struct_field { string_field1 }

216

217 The order of the fields in the read session schema is

218 derived from the table schema and does not correspond to the

219 order in which the fields are specified in this list.

220 row_restriction (str):

221 SQL text filtering statement, similar to a WHERE clause in a

222 query. Aggregates are not supported.

223

224 Examples: "int_field > 5" "date_field = CAST('2014-9-27' as

225 DATE)" "nullable_field is not NULL" "st_equals(geo_field,

226 st_geofromtext("POINT(2, 2)"))" "numeric_field BETWEEN 1.0

227 AND 5.0"

228

229 Restricted to a maximum length for 1 MB.

230 arrow_serialization_options (google.cloud.bigquery_storage_v1.types.ArrowSerializationOptions):

231 Optional. Options specific to the Apache

232 Arrow output format.

233

234 This field is a member of `oneof`_ ``output_format_serialization_options``.

235 avro_serialization_options (google.cloud.bigquery_storage_v1.types.AvroSerializationOptions):

236 Optional. Options specific to the Apache Avro

237 output format

238

239 This field is a member of `oneof`_ ``output_format_serialization_options``.

240 sample_percentage (float):

241 Optional. Specifies a table sampling percentage.

242 Specifically, the query planner will use TABLESAMPLE SYSTEM

243 (sample_percentage PERCENT). The sampling percentage is

244 applied at the data block granularity. It will randomly

245 choose for each data block whether to read the rows in that

246 data block. For more details, see

247 https://cloud.google.com/bigquery/docs/table-sampling)

248

249 This field is a member of `oneof`_ ``_sample_percentage``.

250 response_compression_codec (google.cloud.bigquery_storage_v1.types.ReadSession.TableReadOptions.ResponseCompressionCodec):

251 Optional. Set response_compression_codec when creating a

252 read session to enable application-level compression of

253 ReadRows responses.

254

255 This field is a member of `oneof`_ ``_response_compression_codec``.

256 """

257

258 class ResponseCompressionCodec(proto.Enum):

259 r"""Specifies which compression codec to attempt on the entire

260 serialized response payload (either Arrow record batch or Avro

261 rows). This is not to be confused with the Apache Arrow native

262 compression codecs specified in ArrowSerializationOptions. For

263 performance reasons, when creating a read session requesting

264 Arrow responses, setting both native Arrow compression and

265 application-level response compression will not be allowed -

266 choose, at most, one kind of compression.

267

268 Values:

269 RESPONSE_COMPRESSION_CODEC_UNSPECIFIED (0):

270 Default is no compression.

271 RESPONSE_COMPRESSION_CODEC_LZ4 (2):

272 Use raw LZ4 compression.

273 """

274 RESPONSE_COMPRESSION_CODEC_UNSPECIFIED = 0

275 RESPONSE_COMPRESSION_CODEC_LZ4 = 2

276

277 selected_fields: MutableSequence[str] = proto.RepeatedField(

278 proto.STRING,

279 number=1,

280 )

281 row_restriction: str = proto.Field(

282 proto.STRING,

283 number=2,

284 )

285 arrow_serialization_options: arrow.ArrowSerializationOptions = proto.Field(

286 proto.MESSAGE,

287 number=3,

288 oneof="output_format_serialization_options",

289 message=arrow.ArrowSerializationOptions,

290 )

291 avro_serialization_options: avro.AvroSerializationOptions = proto.Field(

292 proto.MESSAGE,

293 number=4,

294 oneof="output_format_serialization_options",

295 message=avro.AvroSerializationOptions,

296 )

297 sample_percentage: float = proto.Field(

298 proto.DOUBLE,

299 number=5,

300 optional=True,

301 )

302 response_compression_codec: "ReadSession.TableReadOptions.ResponseCompressionCodec" = proto.Field(

303 proto.ENUM,

304 number=6,

305 optional=True,

306 enum="ReadSession.TableReadOptions.ResponseCompressionCodec",

307 )

308

309 name: str = proto.Field(

310 proto.STRING,

311 number=1,

312 )

313 expire_time: timestamp_pb2.Timestamp = proto.Field(

314 proto.MESSAGE,

315 number=2,

316 message=timestamp_pb2.Timestamp,

317 )

318 data_format: "DataFormat" = proto.Field(

319 proto.ENUM,

320 number=3,

321 enum="DataFormat",

322 )

323 avro_schema: avro.AvroSchema = proto.Field(

324 proto.MESSAGE,

325 number=4,

326 oneof="schema",

327 message=avro.AvroSchema,

328 )

329 arrow_schema: arrow.ArrowSchema = proto.Field(

330 proto.MESSAGE,

331 number=5,

332 oneof="schema",

333 message=arrow.ArrowSchema,

334 )

335 table: str = proto.Field(

336 proto.STRING,

337 number=6,

338 )

339 table_modifiers: TableModifiers = proto.Field(

340 proto.MESSAGE,

341 number=7,

342 message=TableModifiers,

343 )

344 read_options: TableReadOptions = proto.Field(

345 proto.MESSAGE,

346 number=8,

347 message=TableReadOptions,

348 )

349 streams: MutableSequence["ReadStream"] = proto.RepeatedField(

350 proto.MESSAGE,

351 number=10,

352 message="ReadStream",

353 )

354 estimated_total_bytes_scanned: int = proto.Field(

355 proto.INT64,

356 number=12,

357 )

358 estimated_total_physical_file_size: int = proto.Field(

359 proto.INT64,

360 number=15,

361 )

362 estimated_row_count: int = proto.Field(

363 proto.INT64,

364 number=14,

365 )

366 trace_id: str = proto.Field(

367 proto.STRING,

368 number=13,

369 )

370

371

372class ReadStream(proto.Message):

373 r"""Information about a single stream that gets data out of the storage

374 system. Most of the information about ``ReadStream`` instances is

375 aggregated, making ``ReadStream`` lightweight.

376

377 Attributes:

378 name (str):

379 Output only. Name of the stream, in the form

380 ``projects/{project_id}/locations/{location}/sessions/{session_id}/streams/{stream_id}``.

381 """

382

383 name: str = proto.Field(

384 proto.STRING,

385 number=1,

386 )

387

388

389class WriteStream(proto.Message):

390 r"""Information about a single stream that gets data inside the

391 storage system.

392

393 Attributes:

394 name (str):

395 Output only. Name of the stream, in the form

396 ``projects/{project}/datasets/{dataset}/tables/{table}/streams/{stream}``.

397 type_ (google.cloud.bigquery_storage_v1.types.WriteStream.Type):

398 Immutable. Type of the stream.

399 create_time (google.protobuf.timestamp_pb2.Timestamp):

400 Output only. Create time of the stream. For the \_default

401 stream, this is the creation_time of the table.

402 commit_time (google.protobuf.timestamp_pb2.Timestamp):

403 Output only. Commit time of the stream. If a stream is of

404 ``COMMITTED`` type, then it will have a commit_time same as

405 ``create_time``. If the stream is of ``PENDING`` type, empty

406 commit_time means it is not committed.

407 table_schema (google.cloud.bigquery_storage_v1.types.TableSchema):

408 Output only. The schema of the destination table. It is only

409 returned in ``CreateWriteStream`` response. Caller should

410 generate data that's compatible with this schema to send in

411 initial ``AppendRowsRequest``. The table schema could go out

412 of date during the life time of the stream.

413 write_mode (google.cloud.bigquery_storage_v1.types.WriteStream.WriteMode):

414 Immutable. Mode of the stream.

415 location (str):

416 Immutable. The geographic location where the

417 stream's dataset resides. See

418 https://cloud.google.com/bigquery/docs/locations

419 for supported locations.

420 """

421

422 class Type(proto.Enum):

423 r"""Type enum of the stream.

424

425 Values:

426 TYPE_UNSPECIFIED (0):

427 Unknown type.

428 COMMITTED (1):

429 Data will commit automatically and appear as

430 soon as the write is acknowledged.

431 PENDING (2):

432 Data is invisible until the stream is

433 committed.

434 BUFFERED (3):

435 Data is only visible up to the offset to

436 which it was flushed.

437 """

438 TYPE_UNSPECIFIED = 0

439 COMMITTED = 1

440 PENDING = 2

441 BUFFERED = 3

442

443 class WriteMode(proto.Enum):

444 r"""Mode enum of the stream.

445

446 Values:

447 WRITE_MODE_UNSPECIFIED (0):

448 Unknown type.

449 INSERT (1):

450 Insert new records into the table.

451 It is the default value if customers do not

452 specify it.

453 """

454 WRITE_MODE_UNSPECIFIED = 0

455 INSERT = 1

456

457 name: str = proto.Field(

458 proto.STRING,

459 number=1,

460 )

461 type_: Type = proto.Field(

462 proto.ENUM,

463 number=2,

464 enum=Type,

465 )

466 create_time: timestamp_pb2.Timestamp = proto.Field(

467 proto.MESSAGE,

468 number=3,

469 message=timestamp_pb2.Timestamp,

470 )

471 commit_time: timestamp_pb2.Timestamp = proto.Field(

472 proto.MESSAGE,

473 number=4,

474 message=timestamp_pb2.Timestamp,

475 )

476 table_schema: gcbs_table.TableSchema = proto.Field(

477 proto.MESSAGE,

478 number=5,

479 message=gcbs_table.TableSchema,

480 )

481 write_mode: WriteMode = proto.Field(

482 proto.ENUM,

483 number=7,

484 enum=WriteMode,

485 )

486 location: str = proto.Field(

487 proto.STRING,

488 number=8,

489 )

490

491

492__all__ = tuple(sorted(__protobuf__.manifest))

Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/google/cloud/bigquery_storage_v1/types/stream.py: 100%

60 statements