/src/duckdb/extension/parquet/include/column_writer.hpp
Line | Count | Source |
1 | | //===----------------------------------------------------------------------===// |
2 | | // DuckDB |
3 | | // |
4 | | // column_writer.hpp |
5 | | // |
6 | | // |
7 | | //===----------------------------------------------------------------------===// |
8 | | |
9 | | #pragma once |
10 | | |
11 | | #include "duckdb.hpp" |
12 | | #include "parquet_types.h" |
13 | | #include "parquet_column_schema.hpp" |
14 | | #include "duckdb/planner/expression/bound_reference_expression.hpp" |
15 | | |
16 | | namespace duckdb { |
17 | | class MemoryStream; |
18 | | class ParquetWriter; |
19 | | class ColumnWriterPageState; |
20 | | class PrimitiveColumnWriterState; |
21 | | struct ChildFieldIDs; |
22 | | struct ShreddingType; |
23 | | class ResizeableBuffer; |
24 | | class ParquetBloomFilter; |
25 | | |
26 | | class ColumnWriterState { |
27 | | public: |
28 | | virtual ~ColumnWriterState(); |
29 | | |
30 | | unsafe_vector<uint16_t> definition_levels; |
31 | | unsafe_vector<uint16_t> repetition_levels; |
32 | | unsafe_vector<uint8_t> is_empty; |
33 | | idx_t parent_null_count = 0; |
34 | | idx_t null_count = 0; |
35 | | |
36 | | public: |
37 | | template <class TARGET> |
38 | 0 | TARGET &Cast() { |
39 | 0 | DynamicCastCheck<TARGET>(this); |
40 | 0 | return reinterpret_cast<TARGET &>(*this); |
41 | 0 | } Unexecuted instantiation: duckdb::ListColumnWriterState& duckdb::ColumnWriterState::Cast<duckdb::ListColumnWriterState>() Unexecuted instantiation: duckdb::PrimitiveColumnWriterState& duckdb::ColumnWriterState::Cast<duckdb::PrimitiveColumnWriterState>() Unexecuted instantiation: duckdb::StructColumnWriterState& duckdb::ColumnWriterState::Cast<duckdb::StructColumnWriterState>() Unexecuted instantiation: duckdb::StandardColumnWriterState<signed char, int, duckdb::ParquetCastOperator>& duckdb::ColumnWriterState::Cast<duckdb::StandardColumnWriterState<signed char, int, duckdb::ParquetCastOperator> >() Unexecuted instantiation: duckdb::StandardColumnWriterState<short, int, duckdb::ParquetCastOperator>& duckdb::ColumnWriterState::Cast<duckdb::StandardColumnWriterState<short, int, duckdb::ParquetCastOperator> >() Unexecuted instantiation: duckdb::StandardColumnWriterState<int, int, duckdb::ParquetCastOperator>& duckdb::ColumnWriterState::Cast<duckdb::StandardColumnWriterState<int, int, duckdb::ParquetCastOperator> >() Unexecuted instantiation: duckdb::StandardColumnWriterState<long, long, duckdb::ParquetCastOperator>& duckdb::ColumnWriterState::Cast<duckdb::StandardColumnWriterState<long, long, duckdb::ParquetCastOperator> >() Unexecuted instantiation: duckdb::StandardColumnWriterState<long, duckdb::Int96, duckdb::ParquetTimestampInt96Operator>& duckdb::ColumnWriterState::Cast<duckdb::StandardColumnWriterState<long, duckdb::Int96, duckdb::ParquetTimestampInt96Operator> >() Unexecuted instantiation: duckdb::StandardColumnWriterState<long, duckdb::Int96, duckdb::ParquetTimestampMSInt96Operator>& duckdb::ColumnWriterState::Cast<duckdb::StandardColumnWriterState<long, duckdb::Int96, duckdb::ParquetTimestampMSInt96Operator> >() Unexecuted instantiation: duckdb::StandardColumnWriterState<duckdb::dtime_tz_t, long, duckdb::ParquetTimeTZOperator>& duckdb::ColumnWriterState::Cast<duckdb::StandardColumnWriterState<duckdb::dtime_tz_t, long, duckdb::ParquetTimeTZOperator> >() Unexecuted instantiation: duckdb::StandardColumnWriterState<duckdb::hugeint_t, double, duckdb::ParquetHugeintOperator>& duckdb::ColumnWriterState::Cast<duckdb::StandardColumnWriterState<duckdb::hugeint_t, double, duckdb::ParquetHugeintOperator> >() Unexecuted instantiation: duckdb::StandardColumnWriterState<duckdb::uhugeint_t, double, duckdb::ParquetUhugeintOperator>& duckdb::ColumnWriterState::Cast<duckdb::StandardColumnWriterState<duckdb::uhugeint_t, double, duckdb::ParquetUhugeintOperator> >() Unexecuted instantiation: duckdb::StandardColumnWriterState<long, duckdb::Int96, duckdb::ParquetTimestampNSInt96Operator>& duckdb::ColumnWriterState::Cast<duckdb::StandardColumnWriterState<long, duckdb::Int96, duckdb::ParquetTimestampNSInt96Operator> >() Unexecuted instantiation: duckdb::StandardColumnWriterState<long, long, duckdb::ParquetTimestampNSOperator>& duckdb::ColumnWriterState::Cast<duckdb::StandardColumnWriterState<long, long, duckdb::ParquetTimestampNSOperator> >() Unexecuted instantiation: duckdb::StandardColumnWriterState<long, duckdb::Int96, duckdb::ParquetTimestampSInt96Operator>& duckdb::ColumnWriterState::Cast<duckdb::StandardColumnWriterState<long, duckdb::Int96, duckdb::ParquetTimestampSInt96Operator> >() Unexecuted instantiation: duckdb::StandardColumnWriterState<long, long, duckdb::ParquetTimestampSOperator>& duckdb::ColumnWriterState::Cast<duckdb::StandardColumnWriterState<long, long, duckdb::ParquetTimestampSOperator> >() Unexecuted instantiation: duckdb::StandardColumnWriterState<unsigned char, int, duckdb::ParquetCastOperator>& duckdb::ColumnWriterState::Cast<duckdb::StandardColumnWriterState<unsigned char, int, duckdb::ParquetCastOperator> >() Unexecuted instantiation: duckdb::StandardColumnWriterState<unsigned short, int, duckdb::ParquetCastOperator>& duckdb::ColumnWriterState::Cast<duckdb::StandardColumnWriterState<unsigned short, int, duckdb::ParquetCastOperator> >() Unexecuted instantiation: duckdb::StandardColumnWriterState<unsigned int, unsigned int, duckdb::ParquetCastOperator>& duckdb::ColumnWriterState::Cast<duckdb::StandardColumnWriterState<unsigned int, unsigned int, duckdb::ParquetCastOperator> >() Unexecuted instantiation: duckdb::StandardColumnWriterState<unsigned long, unsigned long, duckdb::ParquetCastOperator>& duckdb::ColumnWriterState::Cast<duckdb::StandardColumnWriterState<unsigned long, unsigned long, duckdb::ParquetCastOperator> >() Unexecuted instantiation: duckdb::StandardColumnWriterState<duckdb::float_na_equal, float, duckdb::FloatingPointOperator>& duckdb::ColumnWriterState::Cast<duckdb::StandardColumnWriterState<duckdb::float_na_equal, float, duckdb::FloatingPointOperator> >() Unexecuted instantiation: duckdb::StandardColumnWriterState<duckdb::double_na_equal, double, duckdb::FloatingPointOperator>& duckdb::ColumnWriterState::Cast<duckdb::StandardColumnWriterState<duckdb::double_na_equal, double, duckdb::FloatingPointOperator> >() Unexecuted instantiation: duckdb::StandardColumnWriterState<duckdb::string_t, duckdb::string_t, duckdb::ParquetBlobOperator>& duckdb::ColumnWriterState::Cast<duckdb::StandardColumnWriterState<duckdb::string_t, duckdb::string_t, duckdb::ParquetBlobOperator> >() Unexecuted instantiation: duckdb::StandardColumnWriterState<duckdb::string_t, duckdb::string_t, duckdb::ParquetGeometryOperator>& duckdb::ColumnWriterState::Cast<duckdb::StandardColumnWriterState<duckdb::string_t, duckdb::string_t, duckdb::ParquetGeometryOperator> >() Unexecuted instantiation: duckdb::StandardColumnWriterState<duckdb::string_t, duckdb::string_t, duckdb::ParquetStringOperator>& duckdb::ColumnWriterState::Cast<duckdb::StandardColumnWriterState<duckdb::string_t, duckdb::string_t, duckdb::ParquetStringOperator> >() Unexecuted instantiation: duckdb::StandardColumnWriterState<duckdb::hugeint_t, duckdb::ParquetUUIDTargetType, duckdb::ParquetUUIDOperator>& duckdb::ColumnWriterState::Cast<duckdb::StandardColumnWriterState<duckdb::hugeint_t, duckdb::ParquetUUIDTargetType, duckdb::ParquetUUIDOperator> >() Unexecuted instantiation: duckdb::StandardColumnWriterState<duckdb::interval_t, duckdb::ParquetIntervalTargetType, duckdb::ParquetIntervalOperator>& duckdb::ColumnWriterState::Cast<duckdb::StandardColumnWriterState<duckdb::interval_t, duckdb::ParquetIntervalTargetType, duckdb::ParquetIntervalOperator> >() |
42 | | template <class TARGET> |
43 | 0 | const TARGET &Cast() const { |
44 | 0 | D_ASSERT(dynamic_cast<const TARGET *>(this)); |
45 | 0 | return reinterpret_cast<const TARGET &>(*this); |
46 | 0 | } Unexecuted instantiation: duckdb::StandardColumnWriterState<signed char, int, duckdb::ParquetCastOperator> const& duckdb::ColumnWriterState::Cast<duckdb::StandardColumnWriterState<signed char, int, duckdb::ParquetCastOperator> >() const Unexecuted instantiation: duckdb::StandardColumnWriterState<short, int, duckdb::ParquetCastOperator> const& duckdb::ColumnWriterState::Cast<duckdb::StandardColumnWriterState<short, int, duckdb::ParquetCastOperator> >() const Unexecuted instantiation: duckdb::StandardColumnWriterState<int, int, duckdb::ParquetCastOperator> const& duckdb::ColumnWriterState::Cast<duckdb::StandardColumnWriterState<int, int, duckdb::ParquetCastOperator> >() const Unexecuted instantiation: duckdb::StandardColumnWriterState<long, long, duckdb::ParquetCastOperator> const& duckdb::ColumnWriterState::Cast<duckdb::StandardColumnWriterState<long, long, duckdb::ParquetCastOperator> >() const Unexecuted instantiation: duckdb::StandardColumnWriterState<long, duckdb::Int96, duckdb::ParquetTimestampInt96Operator> const& duckdb::ColumnWriterState::Cast<duckdb::StandardColumnWriterState<long, duckdb::Int96, duckdb::ParquetTimestampInt96Operator> >() const Unexecuted instantiation: duckdb::StandardColumnWriterState<long, duckdb::Int96, duckdb::ParquetTimestampMSInt96Operator> const& duckdb::ColumnWriterState::Cast<duckdb::StandardColumnWriterState<long, duckdb::Int96, duckdb::ParquetTimestampMSInt96Operator> >() const Unexecuted instantiation: duckdb::StandardColumnWriterState<duckdb::dtime_tz_t, long, duckdb::ParquetTimeTZOperator> const& duckdb::ColumnWriterState::Cast<duckdb::StandardColumnWriterState<duckdb::dtime_tz_t, long, duckdb::ParquetTimeTZOperator> >() const Unexecuted instantiation: duckdb::StandardColumnWriterState<duckdb::hugeint_t, double, duckdb::ParquetHugeintOperator> const& duckdb::ColumnWriterState::Cast<duckdb::StandardColumnWriterState<duckdb::hugeint_t, double, duckdb::ParquetHugeintOperator> >() const Unexecuted instantiation: duckdb::StandardColumnWriterState<duckdb::uhugeint_t, double, duckdb::ParquetUhugeintOperator> const& duckdb::ColumnWriterState::Cast<duckdb::StandardColumnWriterState<duckdb::uhugeint_t, double, duckdb::ParquetUhugeintOperator> >() const Unexecuted instantiation: duckdb::StandardColumnWriterState<long, duckdb::Int96, duckdb::ParquetTimestampNSInt96Operator> const& duckdb::ColumnWriterState::Cast<duckdb::StandardColumnWriterState<long, duckdb::Int96, duckdb::ParquetTimestampNSInt96Operator> >() const Unexecuted instantiation: duckdb::StandardColumnWriterState<long, long, duckdb::ParquetTimestampNSOperator> const& duckdb::ColumnWriterState::Cast<duckdb::StandardColumnWriterState<long, long, duckdb::ParquetTimestampNSOperator> >() const Unexecuted instantiation: duckdb::StandardColumnWriterState<long, duckdb::Int96, duckdb::ParquetTimestampSInt96Operator> const& duckdb::ColumnWriterState::Cast<duckdb::StandardColumnWriterState<long, duckdb::Int96, duckdb::ParquetTimestampSInt96Operator> >() const Unexecuted instantiation: duckdb::StandardColumnWriterState<long, long, duckdb::ParquetTimestampSOperator> const& duckdb::ColumnWriterState::Cast<duckdb::StandardColumnWriterState<long, long, duckdb::ParquetTimestampSOperator> >() const Unexecuted instantiation: duckdb::StandardColumnWriterState<unsigned char, int, duckdb::ParquetCastOperator> const& duckdb::ColumnWriterState::Cast<duckdb::StandardColumnWriterState<unsigned char, int, duckdb::ParquetCastOperator> >() const Unexecuted instantiation: duckdb::StandardColumnWriterState<unsigned short, int, duckdb::ParquetCastOperator> const& duckdb::ColumnWriterState::Cast<duckdb::StandardColumnWriterState<unsigned short, int, duckdb::ParquetCastOperator> >() const Unexecuted instantiation: duckdb::StandardColumnWriterState<unsigned int, unsigned int, duckdb::ParquetCastOperator> const& duckdb::ColumnWriterState::Cast<duckdb::StandardColumnWriterState<unsigned int, unsigned int, duckdb::ParquetCastOperator> >() const Unexecuted instantiation: duckdb::StandardColumnWriterState<unsigned long, unsigned long, duckdb::ParquetCastOperator> const& duckdb::ColumnWriterState::Cast<duckdb::StandardColumnWriterState<unsigned long, unsigned long, duckdb::ParquetCastOperator> >() const Unexecuted instantiation: duckdb::StandardColumnWriterState<duckdb::float_na_equal, float, duckdb::FloatingPointOperator> const& duckdb::ColumnWriterState::Cast<duckdb::StandardColumnWriterState<duckdb::float_na_equal, float, duckdb::FloatingPointOperator> >() const Unexecuted instantiation: duckdb::StandardColumnWriterState<duckdb::double_na_equal, double, duckdb::FloatingPointOperator> const& duckdb::ColumnWriterState::Cast<duckdb::StandardColumnWriterState<duckdb::double_na_equal, double, duckdb::FloatingPointOperator> >() const Unexecuted instantiation: duckdb::StandardColumnWriterState<duckdb::string_t, duckdb::string_t, duckdb::ParquetBlobOperator> const& duckdb::ColumnWriterState::Cast<duckdb::StandardColumnWriterState<duckdb::string_t, duckdb::string_t, duckdb::ParquetBlobOperator> >() const Unexecuted instantiation: duckdb::StandardColumnWriterState<duckdb::string_t, duckdb::string_t, duckdb::ParquetGeometryOperator> const& duckdb::ColumnWriterState::Cast<duckdb::StandardColumnWriterState<duckdb::string_t, duckdb::string_t, duckdb::ParquetGeometryOperator> >() const Unexecuted instantiation: duckdb::StandardColumnWriterState<duckdb::string_t, duckdb::string_t, duckdb::ParquetStringOperator> const& duckdb::ColumnWriterState::Cast<duckdb::StandardColumnWriterState<duckdb::string_t, duckdb::string_t, duckdb::ParquetStringOperator> >() const Unexecuted instantiation: duckdb::StandardColumnWriterState<duckdb::hugeint_t, duckdb::ParquetUUIDTargetType, duckdb::ParquetUUIDOperator> const& duckdb::ColumnWriterState::Cast<duckdb::StandardColumnWriterState<duckdb::hugeint_t, duckdb::ParquetUUIDTargetType, duckdb::ParquetUUIDOperator> >() const Unexecuted instantiation: duckdb::StandardColumnWriterState<duckdb::interval_t, duckdb::ParquetIntervalTargetType, duckdb::ParquetIntervalOperator> const& duckdb::ColumnWriterState::Cast<duckdb::StandardColumnWriterState<duckdb::interval_t, duckdb::ParquetIntervalTargetType, duckdb::ParquetIntervalOperator> >() const |
47 | | }; |
48 | | |
49 | | class ColumnWriterPageState { |
50 | | public: |
51 | 0 | virtual ~ColumnWriterPageState() { |
52 | 0 | } |
53 | | |
54 | | public: |
55 | | template <class TARGET> |
56 | 0 | TARGET &Cast() { |
57 | 0 | DynamicCastCheck<TARGET>(this); |
58 | 0 | return reinterpret_cast<TARGET &>(*this); |
59 | 0 | } Unexecuted instantiation: duckdb::BooleanWriterPageState& duckdb::ColumnWriterPageState::Cast<duckdb::BooleanWriterPageState>() Unexecuted instantiation: duckdb::EnumWriterPageState& duckdb::ColumnWriterPageState::Cast<duckdb::EnumWriterPageState>() Unexecuted instantiation: duckdb::StandardWriterPageState<signed char, int, duckdb::ParquetCastOperator>& duckdb::ColumnWriterPageState::Cast<duckdb::StandardWriterPageState<signed char, int, duckdb::ParquetCastOperator> >() Unexecuted instantiation: duckdb::StandardWriterPageState<short, int, duckdb::ParquetCastOperator>& duckdb::ColumnWriterPageState::Cast<duckdb::StandardWriterPageState<short, int, duckdb::ParquetCastOperator> >() Unexecuted instantiation: duckdb::StandardWriterPageState<int, int, duckdb::ParquetCastOperator>& duckdb::ColumnWriterPageState::Cast<duckdb::StandardWriterPageState<int, int, duckdb::ParquetCastOperator> >() Unexecuted instantiation: duckdb::StandardWriterPageState<long, long, duckdb::ParquetCastOperator>& duckdb::ColumnWriterPageState::Cast<duckdb::StandardWriterPageState<long, long, duckdb::ParquetCastOperator> >() Unexecuted instantiation: duckdb::StandardWriterPageState<long, duckdb::Int96, duckdb::ParquetTimestampInt96Operator>& duckdb::ColumnWriterPageState::Cast<duckdb::StandardWriterPageState<long, duckdb::Int96, duckdb::ParquetTimestampInt96Operator> >() Unexecuted instantiation: duckdb::StandardWriterPageState<long, duckdb::Int96, duckdb::ParquetTimestampMSInt96Operator>& duckdb::ColumnWriterPageState::Cast<duckdb::StandardWriterPageState<long, duckdb::Int96, duckdb::ParquetTimestampMSInt96Operator> >() Unexecuted instantiation: duckdb::StandardWriterPageState<duckdb::dtime_tz_t, long, duckdb::ParquetTimeTZOperator>& duckdb::ColumnWriterPageState::Cast<duckdb::StandardWriterPageState<duckdb::dtime_tz_t, long, duckdb::ParquetTimeTZOperator> >() Unexecuted instantiation: duckdb::StandardWriterPageState<duckdb::hugeint_t, double, duckdb::ParquetHugeintOperator>& duckdb::ColumnWriterPageState::Cast<duckdb::StandardWriterPageState<duckdb::hugeint_t, double, duckdb::ParquetHugeintOperator> >() Unexecuted instantiation: duckdb::StandardWriterPageState<duckdb::uhugeint_t, double, duckdb::ParquetUhugeintOperator>& duckdb::ColumnWriterPageState::Cast<duckdb::StandardWriterPageState<duckdb::uhugeint_t, double, duckdb::ParquetUhugeintOperator> >() Unexecuted instantiation: duckdb::StandardWriterPageState<long, duckdb::Int96, duckdb::ParquetTimestampNSInt96Operator>& duckdb::ColumnWriterPageState::Cast<duckdb::StandardWriterPageState<long, duckdb::Int96, duckdb::ParquetTimestampNSInt96Operator> >() Unexecuted instantiation: duckdb::StandardWriterPageState<long, long, duckdb::ParquetTimestampNSOperator>& duckdb::ColumnWriterPageState::Cast<duckdb::StandardWriterPageState<long, long, duckdb::ParquetTimestampNSOperator> >() Unexecuted instantiation: duckdb::StandardWriterPageState<long, duckdb::Int96, duckdb::ParquetTimestampSInt96Operator>& duckdb::ColumnWriterPageState::Cast<duckdb::StandardWriterPageState<long, duckdb::Int96, duckdb::ParquetTimestampSInt96Operator> >() Unexecuted instantiation: duckdb::StandardWriterPageState<long, long, duckdb::ParquetTimestampSOperator>& duckdb::ColumnWriterPageState::Cast<duckdb::StandardWriterPageState<long, long, duckdb::ParquetTimestampSOperator> >() Unexecuted instantiation: duckdb::StandardWriterPageState<unsigned char, int, duckdb::ParquetCastOperator>& duckdb::ColumnWriterPageState::Cast<duckdb::StandardWriterPageState<unsigned char, int, duckdb::ParquetCastOperator> >() Unexecuted instantiation: duckdb::StandardWriterPageState<unsigned short, int, duckdb::ParquetCastOperator>& duckdb::ColumnWriterPageState::Cast<duckdb::StandardWriterPageState<unsigned short, int, duckdb::ParquetCastOperator> >() Unexecuted instantiation: duckdb::StandardWriterPageState<unsigned int, unsigned int, duckdb::ParquetCastOperator>& duckdb::ColumnWriterPageState::Cast<duckdb::StandardWriterPageState<unsigned int, unsigned int, duckdb::ParquetCastOperator> >() Unexecuted instantiation: duckdb::StandardWriterPageState<unsigned long, unsigned long, duckdb::ParquetCastOperator>& duckdb::ColumnWriterPageState::Cast<duckdb::StandardWriterPageState<unsigned long, unsigned long, duckdb::ParquetCastOperator> >() Unexecuted instantiation: duckdb::StandardWriterPageState<duckdb::float_na_equal, float, duckdb::FloatingPointOperator>& duckdb::ColumnWriterPageState::Cast<duckdb::StandardWriterPageState<duckdb::float_na_equal, float, duckdb::FloatingPointOperator> >() Unexecuted instantiation: duckdb::StandardWriterPageState<duckdb::double_na_equal, double, duckdb::FloatingPointOperator>& duckdb::ColumnWriterPageState::Cast<duckdb::StandardWriterPageState<duckdb::double_na_equal, double, duckdb::FloatingPointOperator> >() Unexecuted instantiation: duckdb::StandardWriterPageState<duckdb::string_t, duckdb::string_t, duckdb::ParquetBlobOperator>& duckdb::ColumnWriterPageState::Cast<duckdb::StandardWriterPageState<duckdb::string_t, duckdb::string_t, duckdb::ParquetBlobOperator> >() Unexecuted instantiation: duckdb::StandardWriterPageState<duckdb::string_t, duckdb::string_t, duckdb::ParquetGeometryOperator>& duckdb::ColumnWriterPageState::Cast<duckdb::StandardWriterPageState<duckdb::string_t, duckdb::string_t, duckdb::ParquetGeometryOperator> >() Unexecuted instantiation: duckdb::StandardWriterPageState<duckdb::string_t, duckdb::string_t, duckdb::ParquetStringOperator>& duckdb::ColumnWriterPageState::Cast<duckdb::StandardWriterPageState<duckdb::string_t, duckdb::string_t, duckdb::ParquetStringOperator> >() Unexecuted instantiation: duckdb::StandardWriterPageState<duckdb::hugeint_t, duckdb::ParquetUUIDTargetType, duckdb::ParquetUUIDOperator>& duckdb::ColumnWriterPageState::Cast<duckdb::StandardWriterPageState<duckdb::hugeint_t, duckdb::ParquetUUIDTargetType, duckdb::ParquetUUIDOperator> >() Unexecuted instantiation: duckdb::StandardWriterPageState<duckdb::interval_t, duckdb::ParquetIntervalTargetType, duckdb::ParquetIntervalOperator>& duckdb::ColumnWriterPageState::Cast<duckdb::StandardWriterPageState<duckdb::interval_t, duckdb::ParquetIntervalTargetType, duckdb::ParquetIntervalOperator> >() |
60 | | template <class TARGET> |
61 | | const TARGET &Cast() const { |
62 | | D_ASSERT(dynamic_cast<const TARGET *>(this)); |
63 | | return reinterpret_cast<const TARGET &>(*this); |
64 | | } |
65 | | }; |
66 | | |
67 | | struct ParquetAnalyzeSchemaState { |
68 | | public: |
69 | 0 | ParquetAnalyzeSchemaState() { |
70 | 0 | } |
71 | 0 | virtual ~ParquetAnalyzeSchemaState() { |
72 | 0 | } |
73 | | |
74 | | public: |
75 | | template <class TARGET> |
76 | 0 | TARGET &Cast() { |
77 | 0 | DynamicCastCheck<TARGET>(this); |
78 | 0 | return reinterpret_cast<TARGET &>(*this); |
79 | 0 | } |
80 | | template <class TARGET> |
81 | 0 | const TARGET &Cast() const { |
82 | 0 | D_ASSERT(dynamic_cast<const TARGET *>(this)); |
83 | 0 | return reinterpret_cast<const TARGET &>(*this); |
84 | 0 | } |
85 | | }; |
86 | | |
87 | | class ColumnWriter { |
88 | | protected: |
89 | | static constexpr uint16_t PARQUET_DEFINE_VALID = UINT16_C(65535); |
90 | | |
91 | | public: |
92 | | ColumnWriter(ParquetWriter &writer, ParquetColumnSchema &&column_schema, vector<string> schema_path); |
93 | | virtual ~ColumnWriter(); |
94 | | |
95 | | public: |
96 | 0 | const LogicalType &Type() const { |
97 | 0 | return column_schema.type; |
98 | 0 | } |
99 | 0 | const ParquetColumnSchema &Schema() const { |
100 | 0 | return column_schema; |
101 | 0 | } |
102 | 0 | ParquetColumnSchema &Schema() { |
103 | 0 | return column_schema; |
104 | 0 | } |
105 | 0 | inline idx_t SchemaIndex() const { |
106 | 0 | D_ASSERT(column_schema.schema_index.IsValid()); |
107 | 0 | return column_schema.schema_index.GetIndex(); |
108 | 0 | } |
109 | 0 | inline idx_t MaxDefine() const { |
110 | 0 | return column_schema.max_define; |
111 | 0 | } |
112 | 0 | idx_t MaxRepeat() const { |
113 | 0 | return column_schema.max_repeat; |
114 | 0 | } |
115 | 0 | virtual bool HasTransform() { |
116 | 0 | for (auto &child_writer : child_writers) { |
117 | 0 | if (child_writer->HasTransform()) { |
118 | 0 | throw NotImplementedException("ColumnWriter of type '%s' requires a transform, but is not a root " |
119 | 0 | "column, this isn't supported currently", |
120 | 0 | child_writer->Type()); |
121 | 0 | } |
122 | 0 | } |
123 | 0 | return false; |
124 | 0 | } |
125 | 0 | virtual LogicalType TransformedType() const { |
126 | 0 | throw NotImplementedException("Writer does not have a transformed type"); |
127 | 0 | } |
128 | 0 | virtual unique_ptr<Expression> TransformExpression(unique_ptr<BoundReferenceExpression> expr) { |
129 | 0 | throw NotImplementedException("Writer does not have a transform expression"); |
130 | 0 | } |
131 | | |
132 | 0 | virtual unique_ptr<ParquetAnalyzeSchemaState> AnalyzeSchemaInit() { |
133 | 0 | return nullptr; |
134 | 0 | } |
135 | | |
136 | 0 | const vector<unique_ptr<ColumnWriter>> &ChildWriters() const { |
137 | 0 | return child_writers; |
138 | 0 | } |
139 | | |
140 | 0 | virtual void AnalyzeSchema(ParquetAnalyzeSchemaState &state, Vector &input, idx_t count) { |
141 | 0 | throw NotImplementedException("Writer doesn't require an AnalyzeSchema pass"); |
142 | 0 | } |
143 | | |
144 | 0 | virtual void AnalyzeSchemaFinalize(const ParquetAnalyzeSchemaState &state) { |
145 | 0 | throw NotImplementedException("Writer doesn't require an AnalyzeSchemaFinalize pass"); |
146 | 0 | } |
147 | | |
148 | | virtual idx_t FinalizeSchema(vector<duckdb_parquet::SchemaElement> &schemas) = 0; |
149 | | |
150 | | //! Create the column writer for a specific type recursively |
151 | | static unique_ptr<ColumnWriter> CreateWriterRecursive(ClientContext &context, ParquetWriter &writer, |
152 | | vector<string> path_in_schema, const LogicalType &type, |
153 | | const string &name, bool allow_geometry, |
154 | | optional_ptr<const ChildFieldIDs> field_ids, |
155 | | optional_ptr<const ShreddingType> shredding_types, |
156 | | idx_t max_repeat = 0, idx_t max_define = 1, |
157 | | bool can_have_nulls = true); |
158 | | |
159 | | virtual unique_ptr<ColumnWriterState> InitializeWriteState(duckdb_parquet::RowGroup &row_group) = 0; |
160 | | |
161 | | //! indicates whether the write need to analyse the data before preparing it |
162 | 0 | virtual bool HasAnalyze() { |
163 | 0 | return false; |
164 | 0 | } |
165 | | |
166 | 0 | virtual void Analyze(ColumnWriterState &state, ColumnWriterState *parent, Vector &vector, idx_t count) { |
167 | 0 | throw NotImplementedException("Writer does not need analysis"); |
168 | 0 | } |
169 | | |
170 | | //! Called after all data has been passed to Analyze |
171 | 0 | virtual void FinalizeAnalyze(ColumnWriterState &state) { |
172 | 0 | throw NotImplementedException("Writer does not need analysis"); |
173 | 0 | } |
174 | | |
175 | | virtual void Prepare(ColumnWriterState &state, ColumnWriterState *parent, Vector &vector, idx_t count, |
176 | | bool vector_can_span_multiple_pages) = 0; |
177 | | |
178 | | virtual void BeginWrite(ColumnWriterState &state) = 0; |
179 | | virtual void Write(ColumnWriterState &state, Vector &vector, idx_t count) = 0; |
180 | | virtual void FinalizeWrite(ColumnWriterState &state) = 0; |
181 | | |
182 | | public: |
183 | | template <class TARGET> |
184 | | TARGET &Cast() { |
185 | | DynamicCastCheck<TARGET>(this); |
186 | | return reinterpret_cast<TARGET &>(*this); |
187 | | } |
188 | | template <class TARGET> |
189 | 0 | const TARGET &Cast() const { |
190 | 0 | D_ASSERT(dynamic_cast<const TARGET *>(this)); |
191 | 0 | return reinterpret_cast<const TARGET &>(*this); |
192 | 0 | } |
193 | | |
194 | | protected: |
195 | | void HandleDefineLevels(ColumnWriterState &state, ColumnWriterState *parent, const ValidityMask &validity, |
196 | | const idx_t count, const uint16_t define_value, const uint16_t null_value) const; |
197 | | void HandleRepeatLevels(ColumnWriterState &state_p, ColumnWriterState *parent, idx_t count) const; |
198 | | |
199 | | void CompressPage(MemoryStream &temp_writer, size_t &compressed_size, data_ptr_t &compressed_data, |
200 | | AllocatedData &compressed_buf); |
201 | | |
202 | | public: |
203 | | ParquetWriter &writer; |
204 | | //! The parent writer (if this is a nested field) |
205 | | optional_ptr<ColumnWriter> parent; |
206 | | ParquetColumnSchema column_schema; |
207 | | vector<string> schema_path; |
208 | | bool can_have_nulls; |
209 | | |
210 | | protected: |
211 | | vector<unique_ptr<ColumnWriter>> child_writers; |
212 | | }; |
213 | | |
214 | | } // namespace duckdb |