Coverage Report

Created: 2026-06-30 06:38

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/duckdb/extension/parquet/include/column_writer.hpp
Line
Count
Source
1
//===----------------------------------------------------------------------===//
2
//                         DuckDB
3
//
4
// column_writer.hpp
5
//
6
//
7
//===----------------------------------------------------------------------===//
8
9
#pragma once
10
11
#include <stddef.h>
12
#include <stdint.h>
13
#include <string>
14
#include <vector>
15
16
#include "duckdb.hpp"
17
#include "parquet_types.h"
18
#include "parquet_column_schema.hpp"
19
#include "duckdb/planner/expression/bound_reference_expression.hpp"
20
#include "duckdb/common/assert.hpp"
21
#include "duckdb/common/exception.hpp"
22
#include "duckdb/common/helper.hpp"
23
#include "duckdb/common/optional_idx.hpp"
24
#include "duckdb/common/optional_ptr.hpp"
25
#include "duckdb/common/string.hpp"
26
#include "duckdb/common/typedefs.hpp"
27
#include "duckdb/common/types.hpp"
28
#include "duckdb/common/unique_ptr.hpp"
29
#include "duckdb/common/vector.hpp"
30
31
namespace duckdb_parquet {
32
class RowGroup;
33
class SchemaElement;
34
} // namespace duckdb_parquet
35
36
namespace duckdb {
37
class MemoryStream;
38
class ParquetWriter;
39
class ColumnWriterPageState;
40
class PrimitiveColumnWriterState;
41
struct ChildFieldIDs;
42
struct ShreddingType;
43
class ResizeableBuffer;
44
class ParquetBloomFilter;
45
class AllocatedData;
46
class BoundReferenceExpression;
47
class ClientContext;
48
class Expression;
49
class Vector;
50
struct ValidityMask;
51
52
class ColumnWriterState {
53
public:
54
  virtual ~ColumnWriterState();
55
56
  unsafe_vector<uint16_t> definition_levels;
57
  unsafe_vector<uint16_t> repetition_levels;
58
  unsafe_vector<uint8_t> is_empty;
59
  idx_t parent_null_count = 0;
60
  idx_t null_count = 0;
61
62
public:
63
  template <class TARGET>
64
0
  TARGET &Cast() {
65
0
    DynamicCastCheck<TARGET>(this);
66
0
    return reinterpret_cast<TARGET &>(*this);
67
0
  }
Unexecuted instantiation: duckdb::ListColumnWriterState& duckdb::ColumnWriterState::Cast<duckdb::ListColumnWriterState>()
Unexecuted instantiation: duckdb::PrimitiveColumnWriterState& duckdb::ColumnWriterState::Cast<duckdb::PrimitiveColumnWriterState>()
Unexecuted instantiation: duckdb::StructColumnWriterState& duckdb::ColumnWriterState::Cast<duckdb::StructColumnWriterState>()
Unexecuted instantiation: duckdb::StandardColumnWriterState<signed char, int, duckdb::ParquetCastOperator>& duckdb::ColumnWriterState::Cast<duckdb::StandardColumnWriterState<signed char, int, duckdb::ParquetCastOperator> >()
Unexecuted instantiation: duckdb::StandardColumnWriterState<short, int, duckdb::ParquetCastOperator>& duckdb::ColumnWriterState::Cast<duckdb::StandardColumnWriterState<short, int, duckdb::ParquetCastOperator> >()
Unexecuted instantiation: duckdb::StandardColumnWriterState<int, int, duckdb::ParquetCastOperator>& duckdb::ColumnWriterState::Cast<duckdb::StandardColumnWriterState<int, int, duckdb::ParquetCastOperator> >()
Unexecuted instantiation: duckdb::StandardColumnWriterState<long, long, duckdb::ParquetCastOperator>& duckdb::ColumnWriterState::Cast<duckdb::StandardColumnWriterState<long, long, duckdb::ParquetCastOperator> >()
Unexecuted instantiation: duckdb::StandardColumnWriterState<long, duckdb::Int96, duckdb::ParquetTimestampInt96Operator>& duckdb::ColumnWriterState::Cast<duckdb::StandardColumnWriterState<long, duckdb::Int96, duckdb::ParquetTimestampInt96Operator> >()
Unexecuted instantiation: duckdb::StandardColumnWriterState<long, duckdb::Int96, duckdb::ParquetTimestampMSInt96Operator>& duckdb::ColumnWriterState::Cast<duckdb::StandardColumnWriterState<long, duckdb::Int96, duckdb::ParquetTimestampMSInt96Operator> >()
Unexecuted instantiation: duckdb::StandardColumnWriterState<duckdb::dtime_tz_t, long, duckdb::ParquetTimeTZOperator>& duckdb::ColumnWriterState::Cast<duckdb::StandardColumnWriterState<duckdb::dtime_tz_t, long, duckdb::ParquetTimeTZOperator> >()
Unexecuted instantiation: duckdb::StandardColumnWriterState<duckdb::hugeint_t, double, duckdb::ParquetHugeintOperator>& duckdb::ColumnWriterState::Cast<duckdb::StandardColumnWriterState<duckdb::hugeint_t, double, duckdb::ParquetHugeintOperator> >()
Unexecuted instantiation: duckdb::StandardColumnWriterState<duckdb::uhugeint_t, double, duckdb::ParquetUhugeintOperator>& duckdb::ColumnWriterState::Cast<duckdb::StandardColumnWriterState<duckdb::uhugeint_t, double, duckdb::ParquetUhugeintOperator> >()
Unexecuted instantiation: duckdb::StandardColumnWriterState<long, duckdb::Int96, duckdb::ParquetTimestampNSInt96Operator>& duckdb::ColumnWriterState::Cast<duckdb::StandardColumnWriterState<long, duckdb::Int96, duckdb::ParquetTimestampNSInt96Operator> >()
Unexecuted instantiation: duckdb::StandardColumnWriterState<long, long, duckdb::ParquetTimestampNSOperator>& duckdb::ColumnWriterState::Cast<duckdb::StandardColumnWriterState<long, long, duckdb::ParquetTimestampNSOperator> >()
Unexecuted instantiation: duckdb::StandardColumnWriterState<long, duckdb::Int96, duckdb::ParquetTimestampSInt96Operator>& duckdb::ColumnWriterState::Cast<duckdb::StandardColumnWriterState<long, duckdb::Int96, duckdb::ParquetTimestampSInt96Operator> >()
Unexecuted instantiation: duckdb::StandardColumnWriterState<long, long, duckdb::ParquetTimestampSOperator>& duckdb::ColumnWriterState::Cast<duckdb::StandardColumnWriterState<long, long, duckdb::ParquetTimestampSOperator> >()
Unexecuted instantiation: duckdb::StandardColumnWriterState<unsigned char, int, duckdb::ParquetCastOperator>& duckdb::ColumnWriterState::Cast<duckdb::StandardColumnWriterState<unsigned char, int, duckdb::ParquetCastOperator> >()
Unexecuted instantiation: duckdb::StandardColumnWriterState<unsigned short, int, duckdb::ParquetCastOperator>& duckdb::ColumnWriterState::Cast<duckdb::StandardColumnWriterState<unsigned short, int, duckdb::ParquetCastOperator> >()
Unexecuted instantiation: duckdb::StandardColumnWriterState<unsigned int, unsigned int, duckdb::ParquetCastOperator>& duckdb::ColumnWriterState::Cast<duckdb::StandardColumnWriterState<unsigned int, unsigned int, duckdb::ParquetCastOperator> >()
Unexecuted instantiation: duckdb::StandardColumnWriterState<unsigned long, unsigned long, duckdb::ParquetCastOperator>& duckdb::ColumnWriterState::Cast<duckdb::StandardColumnWriterState<unsigned long, unsigned long, duckdb::ParquetCastOperator> >()
Unexecuted instantiation: duckdb::StandardColumnWriterState<duckdb::float_na_equal, float, duckdb::FloatingPointOperator>& duckdb::ColumnWriterState::Cast<duckdb::StandardColumnWriterState<duckdb::float_na_equal, float, duckdb::FloatingPointOperator> >()
Unexecuted instantiation: duckdb::StandardColumnWriterState<duckdb::double_na_equal, double, duckdb::FloatingPointOperator>& duckdb::ColumnWriterState::Cast<duckdb::StandardColumnWriterState<duckdb::double_na_equal, double, duckdb::FloatingPointOperator> >()
Unexecuted instantiation: duckdb::StandardColumnWriterState<duckdb::string_t, duckdb::string_t, duckdb::ParquetBlobOperator>& duckdb::ColumnWriterState::Cast<duckdb::StandardColumnWriterState<duckdb::string_t, duckdb::string_t, duckdb::ParquetBlobOperator> >()
Unexecuted instantiation: duckdb::StandardColumnWriterState<duckdb::string_t, duckdb::string_t, duckdb::ParquetGeometryOperator>& duckdb::ColumnWriterState::Cast<duckdb::StandardColumnWriterState<duckdb::string_t, duckdb::string_t, duckdb::ParquetGeometryOperator> >()
Unexecuted instantiation: duckdb::StandardColumnWriterState<duckdb::string_t, duckdb::string_t, duckdb::ParquetStringOperator>& duckdb::ColumnWriterState::Cast<duckdb::StandardColumnWriterState<duckdb::string_t, duckdb::string_t, duckdb::ParquetStringOperator> >()
Unexecuted instantiation: duckdb::StandardColumnWriterState<duckdb::hugeint_t, duckdb::ParquetUUIDTargetType, duckdb::ParquetUUIDOperator>& duckdb::ColumnWriterState::Cast<duckdb::StandardColumnWriterState<duckdb::hugeint_t, duckdb::ParquetUUIDTargetType, duckdb::ParquetUUIDOperator> >()
Unexecuted instantiation: duckdb::StandardColumnWriterState<duckdb::interval_t, duckdb::ParquetIntervalTargetType, duckdb::ParquetIntervalOperator>& duckdb::ColumnWriterState::Cast<duckdb::StandardColumnWriterState<duckdb::interval_t, duckdb::ParquetIntervalTargetType, duckdb::ParquetIntervalOperator> >()
68
  template <class TARGET>
69
0
  const TARGET &Cast() const {
70
0
    D_ASSERT(dynamic_cast<const TARGET *>(this));
71
0
    return reinterpret_cast<const TARGET &>(*this);
72
0
  }
Unexecuted instantiation: duckdb::StandardColumnWriterState<signed char, int, duckdb::ParquetCastOperator> const& duckdb::ColumnWriterState::Cast<duckdb::StandardColumnWriterState<signed char, int, duckdb::ParquetCastOperator> >() const
Unexecuted instantiation: duckdb::StandardColumnWriterState<short, int, duckdb::ParquetCastOperator> const& duckdb::ColumnWriterState::Cast<duckdb::StandardColumnWriterState<short, int, duckdb::ParquetCastOperator> >() const
Unexecuted instantiation: duckdb::StandardColumnWriterState<int, int, duckdb::ParquetCastOperator> const& duckdb::ColumnWriterState::Cast<duckdb::StandardColumnWriterState<int, int, duckdb::ParquetCastOperator> >() const
Unexecuted instantiation: duckdb::StandardColumnWriterState<long, long, duckdb::ParquetCastOperator> const& duckdb::ColumnWriterState::Cast<duckdb::StandardColumnWriterState<long, long, duckdb::ParquetCastOperator> >() const
Unexecuted instantiation: duckdb::StandardColumnWriterState<long, duckdb::Int96, duckdb::ParquetTimestampInt96Operator> const& duckdb::ColumnWriterState::Cast<duckdb::StandardColumnWriterState<long, duckdb::Int96, duckdb::ParquetTimestampInt96Operator> >() const
Unexecuted instantiation: duckdb::StandardColumnWriterState<long, duckdb::Int96, duckdb::ParquetTimestampMSInt96Operator> const& duckdb::ColumnWriterState::Cast<duckdb::StandardColumnWriterState<long, duckdb::Int96, duckdb::ParquetTimestampMSInt96Operator> >() const
Unexecuted instantiation: duckdb::StandardColumnWriterState<duckdb::dtime_tz_t, long, duckdb::ParquetTimeTZOperator> const& duckdb::ColumnWriterState::Cast<duckdb::StandardColumnWriterState<duckdb::dtime_tz_t, long, duckdb::ParquetTimeTZOperator> >() const
Unexecuted instantiation: duckdb::StandardColumnWriterState<duckdb::hugeint_t, double, duckdb::ParquetHugeintOperator> const& duckdb::ColumnWriterState::Cast<duckdb::StandardColumnWriterState<duckdb::hugeint_t, double, duckdb::ParquetHugeintOperator> >() const
Unexecuted instantiation: duckdb::StandardColumnWriterState<duckdb::uhugeint_t, double, duckdb::ParquetUhugeintOperator> const& duckdb::ColumnWriterState::Cast<duckdb::StandardColumnWriterState<duckdb::uhugeint_t, double, duckdb::ParquetUhugeintOperator> >() const
Unexecuted instantiation: duckdb::StandardColumnWriterState<long, duckdb::Int96, duckdb::ParquetTimestampNSInt96Operator> const& duckdb::ColumnWriterState::Cast<duckdb::StandardColumnWriterState<long, duckdb::Int96, duckdb::ParquetTimestampNSInt96Operator> >() const
Unexecuted instantiation: duckdb::StandardColumnWriterState<long, long, duckdb::ParquetTimestampNSOperator> const& duckdb::ColumnWriterState::Cast<duckdb::StandardColumnWriterState<long, long, duckdb::ParquetTimestampNSOperator> >() const
Unexecuted instantiation: duckdb::StandardColumnWriterState<long, duckdb::Int96, duckdb::ParquetTimestampSInt96Operator> const& duckdb::ColumnWriterState::Cast<duckdb::StandardColumnWriterState<long, duckdb::Int96, duckdb::ParquetTimestampSInt96Operator> >() const
Unexecuted instantiation: duckdb::StandardColumnWriterState<long, long, duckdb::ParquetTimestampSOperator> const& duckdb::ColumnWriterState::Cast<duckdb::StandardColumnWriterState<long, long, duckdb::ParquetTimestampSOperator> >() const
Unexecuted instantiation: duckdb::StandardColumnWriterState<unsigned char, int, duckdb::ParquetCastOperator> const& duckdb::ColumnWriterState::Cast<duckdb::StandardColumnWriterState<unsigned char, int, duckdb::ParquetCastOperator> >() const
Unexecuted instantiation: duckdb::StandardColumnWriterState<unsigned short, int, duckdb::ParquetCastOperator> const& duckdb::ColumnWriterState::Cast<duckdb::StandardColumnWriterState<unsigned short, int, duckdb::ParquetCastOperator> >() const
Unexecuted instantiation: duckdb::StandardColumnWriterState<unsigned int, unsigned int, duckdb::ParquetCastOperator> const& duckdb::ColumnWriterState::Cast<duckdb::StandardColumnWriterState<unsigned int, unsigned int, duckdb::ParquetCastOperator> >() const
Unexecuted instantiation: duckdb::StandardColumnWriterState<unsigned long, unsigned long, duckdb::ParquetCastOperator> const& duckdb::ColumnWriterState::Cast<duckdb::StandardColumnWriterState<unsigned long, unsigned long, duckdb::ParquetCastOperator> >() const
Unexecuted instantiation: duckdb::StandardColumnWriterState<duckdb::float_na_equal, float, duckdb::FloatingPointOperator> const& duckdb::ColumnWriterState::Cast<duckdb::StandardColumnWriterState<duckdb::float_na_equal, float, duckdb::FloatingPointOperator> >() const
Unexecuted instantiation: duckdb::StandardColumnWriterState<duckdb::double_na_equal, double, duckdb::FloatingPointOperator> const& duckdb::ColumnWriterState::Cast<duckdb::StandardColumnWriterState<duckdb::double_na_equal, double, duckdb::FloatingPointOperator> >() const
Unexecuted instantiation: duckdb::StandardColumnWriterState<duckdb::string_t, duckdb::string_t, duckdb::ParquetBlobOperator> const& duckdb::ColumnWriterState::Cast<duckdb::StandardColumnWriterState<duckdb::string_t, duckdb::string_t, duckdb::ParquetBlobOperator> >() const
Unexecuted instantiation: duckdb::StandardColumnWriterState<duckdb::string_t, duckdb::string_t, duckdb::ParquetGeometryOperator> const& duckdb::ColumnWriterState::Cast<duckdb::StandardColumnWriterState<duckdb::string_t, duckdb::string_t, duckdb::ParquetGeometryOperator> >() const
Unexecuted instantiation: duckdb::StandardColumnWriterState<duckdb::string_t, duckdb::string_t, duckdb::ParquetStringOperator> const& duckdb::ColumnWriterState::Cast<duckdb::StandardColumnWriterState<duckdb::string_t, duckdb::string_t, duckdb::ParquetStringOperator> >() const
Unexecuted instantiation: duckdb::StandardColumnWriterState<duckdb::hugeint_t, duckdb::ParquetUUIDTargetType, duckdb::ParquetUUIDOperator> const& duckdb::ColumnWriterState::Cast<duckdb::StandardColumnWriterState<duckdb::hugeint_t, duckdb::ParquetUUIDTargetType, duckdb::ParquetUUIDOperator> >() const
Unexecuted instantiation: duckdb::StandardColumnWriterState<duckdb::interval_t, duckdb::ParquetIntervalTargetType, duckdb::ParquetIntervalOperator> const& duckdb::ColumnWriterState::Cast<duckdb::StandardColumnWriterState<duckdb::interval_t, duckdb::ParquetIntervalTargetType, duckdb::ParquetIntervalOperator> >() const
73
};
74
75
class ColumnWriterPageState {
76
public:
77
0
  virtual ~ColumnWriterPageState() {
78
0
  }
79
80
public:
81
  template <class TARGET>
82
0
  TARGET &Cast() {
83
0
    DynamicCastCheck<TARGET>(this);
84
0
    return reinterpret_cast<TARGET &>(*this);
85
0
  }
Unexecuted instantiation: duckdb::BooleanWriterPageState& duckdb::ColumnWriterPageState::Cast<duckdb::BooleanWriterPageState>()
Unexecuted instantiation: duckdb::EnumWriterPageState& duckdb::ColumnWriterPageState::Cast<duckdb::EnumWriterPageState>()
Unexecuted instantiation: duckdb::StandardWriterPageState<signed char, int, duckdb::ParquetCastOperator>& duckdb::ColumnWriterPageState::Cast<duckdb::StandardWriterPageState<signed char, int, duckdb::ParquetCastOperator> >()
Unexecuted instantiation: duckdb::StandardWriterPageState<short, int, duckdb::ParquetCastOperator>& duckdb::ColumnWriterPageState::Cast<duckdb::StandardWriterPageState<short, int, duckdb::ParquetCastOperator> >()
Unexecuted instantiation: duckdb::StandardWriterPageState<int, int, duckdb::ParquetCastOperator>& duckdb::ColumnWriterPageState::Cast<duckdb::StandardWriterPageState<int, int, duckdb::ParquetCastOperator> >()
Unexecuted instantiation: duckdb::StandardWriterPageState<long, long, duckdb::ParquetCastOperator>& duckdb::ColumnWriterPageState::Cast<duckdb::StandardWriterPageState<long, long, duckdb::ParquetCastOperator> >()
Unexecuted instantiation: duckdb::StandardWriterPageState<long, duckdb::Int96, duckdb::ParquetTimestampInt96Operator>& duckdb::ColumnWriterPageState::Cast<duckdb::StandardWriterPageState<long, duckdb::Int96, duckdb::ParquetTimestampInt96Operator> >()
Unexecuted instantiation: duckdb::StandardWriterPageState<long, duckdb::Int96, duckdb::ParquetTimestampMSInt96Operator>& duckdb::ColumnWriterPageState::Cast<duckdb::StandardWriterPageState<long, duckdb::Int96, duckdb::ParquetTimestampMSInt96Operator> >()
Unexecuted instantiation: duckdb::StandardWriterPageState<duckdb::dtime_tz_t, long, duckdb::ParquetTimeTZOperator>& duckdb::ColumnWriterPageState::Cast<duckdb::StandardWriterPageState<duckdb::dtime_tz_t, long, duckdb::ParquetTimeTZOperator> >()
Unexecuted instantiation: duckdb::StandardWriterPageState<duckdb::hugeint_t, double, duckdb::ParquetHugeintOperator>& duckdb::ColumnWriterPageState::Cast<duckdb::StandardWriterPageState<duckdb::hugeint_t, double, duckdb::ParquetHugeintOperator> >()
Unexecuted instantiation: duckdb::StandardWriterPageState<duckdb::uhugeint_t, double, duckdb::ParquetUhugeintOperator>& duckdb::ColumnWriterPageState::Cast<duckdb::StandardWriterPageState<duckdb::uhugeint_t, double, duckdb::ParquetUhugeintOperator> >()
Unexecuted instantiation: duckdb::StandardWriterPageState<long, duckdb::Int96, duckdb::ParquetTimestampNSInt96Operator>& duckdb::ColumnWriterPageState::Cast<duckdb::StandardWriterPageState<long, duckdb::Int96, duckdb::ParquetTimestampNSInt96Operator> >()
Unexecuted instantiation: duckdb::StandardWriterPageState<long, long, duckdb::ParquetTimestampNSOperator>& duckdb::ColumnWriterPageState::Cast<duckdb::StandardWriterPageState<long, long, duckdb::ParquetTimestampNSOperator> >()
Unexecuted instantiation: duckdb::StandardWriterPageState<long, duckdb::Int96, duckdb::ParquetTimestampSInt96Operator>& duckdb::ColumnWriterPageState::Cast<duckdb::StandardWriterPageState<long, duckdb::Int96, duckdb::ParquetTimestampSInt96Operator> >()
Unexecuted instantiation: duckdb::StandardWriterPageState<long, long, duckdb::ParquetTimestampSOperator>& duckdb::ColumnWriterPageState::Cast<duckdb::StandardWriterPageState<long, long, duckdb::ParquetTimestampSOperator> >()
Unexecuted instantiation: duckdb::StandardWriterPageState<unsigned char, int, duckdb::ParquetCastOperator>& duckdb::ColumnWriterPageState::Cast<duckdb::StandardWriterPageState<unsigned char, int, duckdb::ParquetCastOperator> >()
Unexecuted instantiation: duckdb::StandardWriterPageState<unsigned short, int, duckdb::ParquetCastOperator>& duckdb::ColumnWriterPageState::Cast<duckdb::StandardWriterPageState<unsigned short, int, duckdb::ParquetCastOperator> >()
Unexecuted instantiation: duckdb::StandardWriterPageState<unsigned int, unsigned int, duckdb::ParquetCastOperator>& duckdb::ColumnWriterPageState::Cast<duckdb::StandardWriterPageState<unsigned int, unsigned int, duckdb::ParquetCastOperator> >()
Unexecuted instantiation: duckdb::StandardWriterPageState<unsigned long, unsigned long, duckdb::ParquetCastOperator>& duckdb::ColumnWriterPageState::Cast<duckdb::StandardWriterPageState<unsigned long, unsigned long, duckdb::ParquetCastOperator> >()
Unexecuted instantiation: duckdb::StandardWriterPageState<duckdb::float_na_equal, float, duckdb::FloatingPointOperator>& duckdb::ColumnWriterPageState::Cast<duckdb::StandardWriterPageState<duckdb::float_na_equal, float, duckdb::FloatingPointOperator> >()
Unexecuted instantiation: duckdb::StandardWriterPageState<duckdb::double_na_equal, double, duckdb::FloatingPointOperator>& duckdb::ColumnWriterPageState::Cast<duckdb::StandardWriterPageState<duckdb::double_na_equal, double, duckdb::FloatingPointOperator> >()
Unexecuted instantiation: duckdb::StandardWriterPageState<duckdb::string_t, duckdb::string_t, duckdb::ParquetBlobOperator>& duckdb::ColumnWriterPageState::Cast<duckdb::StandardWriterPageState<duckdb::string_t, duckdb::string_t, duckdb::ParquetBlobOperator> >()
Unexecuted instantiation: duckdb::StandardWriterPageState<duckdb::string_t, duckdb::string_t, duckdb::ParquetGeometryOperator>& duckdb::ColumnWriterPageState::Cast<duckdb::StandardWriterPageState<duckdb::string_t, duckdb::string_t, duckdb::ParquetGeometryOperator> >()
Unexecuted instantiation: duckdb::StandardWriterPageState<duckdb::string_t, duckdb::string_t, duckdb::ParquetStringOperator>& duckdb::ColumnWriterPageState::Cast<duckdb::StandardWriterPageState<duckdb::string_t, duckdb::string_t, duckdb::ParquetStringOperator> >()
Unexecuted instantiation: duckdb::StandardWriterPageState<duckdb::hugeint_t, duckdb::ParquetUUIDTargetType, duckdb::ParquetUUIDOperator>& duckdb::ColumnWriterPageState::Cast<duckdb::StandardWriterPageState<duckdb::hugeint_t, duckdb::ParquetUUIDTargetType, duckdb::ParquetUUIDOperator> >()
Unexecuted instantiation: duckdb::StandardWriterPageState<duckdb::interval_t, duckdb::ParquetIntervalTargetType, duckdb::ParquetIntervalOperator>& duckdb::ColumnWriterPageState::Cast<duckdb::StandardWriterPageState<duckdb::interval_t, duckdb::ParquetIntervalTargetType, duckdb::ParquetIntervalOperator> >()
86
  template <class TARGET>
87
  const TARGET &Cast() const {
88
    D_ASSERT(dynamic_cast<const TARGET *>(this));
89
    return reinterpret_cast<const TARGET &>(*this);
90
  }
91
};
92
93
struct ParquetAnalyzeSchemaState {
94
public:
95
0
  ParquetAnalyzeSchemaState() {
96
0
  }
97
0
  virtual ~ParquetAnalyzeSchemaState() {
98
0
  }
99
100
public:
101
  template <class TARGET>
102
0
  TARGET &Cast() {
103
0
    DynamicCastCheck<TARGET>(this);
104
0
    return reinterpret_cast<TARGET &>(*this);
105
0
  }
106
  template <class TARGET>
107
0
  const TARGET &Cast() const {
108
0
    D_ASSERT(dynamic_cast<const TARGET *>(this));
109
0
    return reinterpret_cast<const TARGET &>(*this);
110
0
  }
111
};
112
113
class ColumnWriter {
114
protected:
115
  static constexpr uint16_t PARQUET_DEFINE_VALID = UINT16_C(65535);
116
117
public:
118
  ColumnWriter(ParquetWriter &writer, ParquetColumnSchema &&column_schema, vector<Identifier> schema_path);
119
  virtual ~ColumnWriter();
120
121
public:
122
0
  const LogicalType &Type() const {
123
0
    return column_schema.type;
124
0
  }
125
0
  const ParquetColumnSchema &Schema() const {
126
0
    return column_schema;
127
0
  }
128
0
  ParquetColumnSchema &Schema() {
129
0
    return column_schema;
130
0
  }
131
0
  inline idx_t SchemaIndex() const {
132
0
    D_ASSERT(column_schema.schema_index.IsValid());
133
0
    return column_schema.schema_index.GetIndex();
134
0
  }
135
0
  inline idx_t MaxDefine() const {
136
0
    return column_schema.max_define;
137
0
  }
138
0
  idx_t MaxRepeat() const {
139
0
    return column_schema.max_repeat;
140
0
  }
141
0
  virtual bool HasTransform() {
142
0
    for (auto &child_writer : child_writers) {
143
0
      if (child_writer->HasTransform()) {
144
0
        throw NotImplementedException("ColumnWriter of type '%s' requires a transform, but is not a root "
145
0
                                      "column, this isn't supported currently",
146
0
                                      child_writer->Type());
147
0
      }
148
0
    }
149
0
    return false;
150
0
  }
151
0
  virtual LogicalType TransformedType() const {
152
0
    throw NotImplementedException("Writer does not have a transformed type");
153
0
  }
154
0
  virtual unique_ptr<Expression> TransformExpression(unique_ptr<BoundReferenceExpression> expr) {
155
0
    throw NotImplementedException("Writer does not have a transform expression");
156
0
  }
157
158
0
  virtual unique_ptr<ParquetAnalyzeSchemaState> AnalyzeSchemaInit() {
159
0
    return nullptr;
160
0
  }
161
162
0
  const vector<unique_ptr<ColumnWriter>> &ChildWriters() const {
163
0
    return child_writers;
164
0
  }
165
166
0
  virtual void AnalyzeSchema(ParquetAnalyzeSchemaState &state, Vector &input, idx_t count) {
167
0
    throw NotImplementedException("Writer doesn't require an AnalyzeSchema pass");
168
0
  }
169
170
0
  virtual void AnalyzeSchemaFinalize(const ParquetAnalyzeSchemaState &state) {
171
0
    throw NotImplementedException("Writer doesn't require an AnalyzeSchemaFinalize pass");
172
0
  }
173
174
  virtual bool TryExportPreparedShreddingType(ShreddingType &result) const;
175
176
  virtual idx_t FinalizeSchema(vector<duckdb_parquet::SchemaElement> &schemas) = 0;
177
178
  //! Create the column writer for a specific type recursively
179
  static unique_ptr<ColumnWriter> CreateWriterRecursive(ClientContext &context, ParquetWriter &writer,
180
                                                        vector<Identifier> path_in_schema, const LogicalType &type,
181
                                                        const Identifier &name, bool allow_geometry,
182
                                                        optional_ptr<const ChildFieldIDs> field_ids,
183
                                                        optional_ptr<const ShreddingType> shredding_types,
184
                                                        idx_t max_repeat = 0, idx_t max_define = 1,
185
                                                        bool can_have_nulls = true);
186
187
  virtual unique_ptr<ColumnWriterState> InitializeWriteState(duckdb_parquet::RowGroup &row_group) = 0;
188
189
  //! indicates whether the write need to analyse the data before preparing it
190
0
  virtual bool HasAnalyze() {
191
0
    return false;
192
0
  }
193
194
0
  virtual void Analyze(ColumnWriterState &state, ColumnWriterState *parent, Vector &vector, idx_t count) {
195
0
    throw NotImplementedException("Writer does not need analysis");
196
0
  }
197
198
  //! Called after all data has been passed to Analyze
199
0
  virtual void FinalizeAnalyze(ColumnWriterState &state) {
200
0
    throw NotImplementedException("Writer does not need analysis");
201
0
  }
202
203
  virtual void Prepare(ColumnWriterState &state, ColumnWriterState *parent, Vector &vector, idx_t count,
204
                       bool vector_can_span_multiple_pages) = 0;
205
206
  virtual void BeginWrite(ColumnWriterState &state) = 0;
207
  virtual void Write(ColumnWriterState &state, Vector &vector, idx_t count) = 0;
208
  virtual void PrepareWrite(ColumnWriterState &state) = 0;
209
  virtual void FinalizeWrite(ColumnWriterState &state) = 0;
210
211
public:
212
  template <class TARGET>
213
  TARGET &Cast() {
214
    DynamicCastCheck<TARGET>(this);
215
    return reinterpret_cast<TARGET &>(*this);
216
  }
217
  template <class TARGET>
218
0
  const TARGET &Cast() const {
219
0
    D_ASSERT(dynamic_cast<const TARGET *>(this));
220
0
    return reinterpret_cast<const TARGET &>(*this);
221
0
  }
222
223
protected:
224
  void HandleDefineLevels(ColumnWriterState &state, ColumnWriterState *parent, const ValidityMask &validity,
225
                          const idx_t count, const uint16_t define_value, const uint16_t null_value) const;
226
  void HandleRepeatLevels(ColumnWriterState &state_p, ColumnWriterState *parent, idx_t count) const;
227
228
  void CompressPage(MemoryStream &temp_writer, size_t &compressed_size, data_ptr_t &compressed_data,
229
                    AllocatedData &compressed_buf);
230
231
public:
232
  ParquetWriter &writer;
233
  //! The parent writer (if this is a nested field)
234
  optional_ptr<ColumnWriter> parent;
235
  ParquetColumnSchema column_schema;
236
  vector<Identifier> schema_path;
237
  bool can_have_nulls;
238
239
protected:
240
  vector<unique_ptr<ColumnWriter>> child_writers;
241
};
242
243
} // namespace duckdb