Coverage Report

Created: 2026-03-31 07:54

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/duckdb/extension/parquet/include/column_writer.hpp
Line
Count
Source
1
//===----------------------------------------------------------------------===//
2
//                         DuckDB
3
//
4
// column_writer.hpp
5
//
6
//
7
//===----------------------------------------------------------------------===//
8
9
#pragma once
10
11
#include "duckdb.hpp"
12
#include "parquet_types.h"
13
#include "parquet_column_schema.hpp"
14
#include "duckdb/planner/expression/bound_reference_expression.hpp"
15
16
namespace duckdb {
17
class MemoryStream;
18
class ParquetWriter;
19
class ColumnWriterPageState;
20
class PrimitiveColumnWriterState;
21
struct ChildFieldIDs;
22
struct ShreddingType;
23
class ResizeableBuffer;
24
class ParquetBloomFilter;
25
26
class ColumnWriterState {
27
public:
28
  virtual ~ColumnWriterState();
29
30
  unsafe_vector<uint16_t> definition_levels;
31
  unsafe_vector<uint16_t> repetition_levels;
32
  unsafe_vector<uint8_t> is_empty;
33
  idx_t parent_null_count = 0;
34
  idx_t null_count = 0;
35
36
public:
37
  template <class TARGET>
38
0
  TARGET &Cast() {
39
0
    DynamicCastCheck<TARGET>(this);
40
0
    return reinterpret_cast<TARGET &>(*this);
41
0
  }
Unexecuted instantiation: duckdb::ListColumnWriterState& duckdb::ColumnWriterState::Cast<duckdb::ListColumnWriterState>()
Unexecuted instantiation: duckdb::PrimitiveColumnWriterState& duckdb::ColumnWriterState::Cast<duckdb::PrimitiveColumnWriterState>()
Unexecuted instantiation: duckdb::StructColumnWriterState& duckdb::ColumnWriterState::Cast<duckdb::StructColumnWriterState>()
Unexecuted instantiation: duckdb::StandardColumnWriterState<signed char, int, duckdb::ParquetCastOperator>& duckdb::ColumnWriterState::Cast<duckdb::StandardColumnWriterState<signed char, int, duckdb::ParquetCastOperator> >()
Unexecuted instantiation: duckdb::StandardColumnWriterState<short, int, duckdb::ParquetCastOperator>& duckdb::ColumnWriterState::Cast<duckdb::StandardColumnWriterState<short, int, duckdb::ParquetCastOperator> >()
Unexecuted instantiation: duckdb::StandardColumnWriterState<int, int, duckdb::ParquetCastOperator>& duckdb::ColumnWriterState::Cast<duckdb::StandardColumnWriterState<int, int, duckdb::ParquetCastOperator> >()
Unexecuted instantiation: duckdb::StandardColumnWriterState<long, long, duckdb::ParquetCastOperator>& duckdb::ColumnWriterState::Cast<duckdb::StandardColumnWriterState<long, long, duckdb::ParquetCastOperator> >()
Unexecuted instantiation: duckdb::StandardColumnWriterState<long, duckdb::Int96, duckdb::ParquetTimestampInt96Operator>& duckdb::ColumnWriterState::Cast<duckdb::StandardColumnWriterState<long, duckdb::Int96, duckdb::ParquetTimestampInt96Operator> >()
Unexecuted instantiation: duckdb::StandardColumnWriterState<long, duckdb::Int96, duckdb::ParquetTimestampMSInt96Operator>& duckdb::ColumnWriterState::Cast<duckdb::StandardColumnWriterState<long, duckdb::Int96, duckdb::ParquetTimestampMSInt96Operator> >()
Unexecuted instantiation: duckdb::StandardColumnWriterState<duckdb::dtime_tz_t, long, duckdb::ParquetTimeTZOperator>& duckdb::ColumnWriterState::Cast<duckdb::StandardColumnWriterState<duckdb::dtime_tz_t, long, duckdb::ParquetTimeTZOperator> >()
Unexecuted instantiation: duckdb::StandardColumnWriterState<duckdb::hugeint_t, double, duckdb::ParquetHugeintOperator>& duckdb::ColumnWriterState::Cast<duckdb::StandardColumnWriterState<duckdb::hugeint_t, double, duckdb::ParquetHugeintOperator> >()
Unexecuted instantiation: duckdb::StandardColumnWriterState<duckdb::uhugeint_t, double, duckdb::ParquetUhugeintOperator>& duckdb::ColumnWriterState::Cast<duckdb::StandardColumnWriterState<duckdb::uhugeint_t, double, duckdb::ParquetUhugeintOperator> >()
Unexecuted instantiation: duckdb::StandardColumnWriterState<long, duckdb::Int96, duckdb::ParquetTimestampNSInt96Operator>& duckdb::ColumnWriterState::Cast<duckdb::StandardColumnWriterState<long, duckdb::Int96, duckdb::ParquetTimestampNSInt96Operator> >()
Unexecuted instantiation: duckdb::StandardColumnWriterState<long, long, duckdb::ParquetTimestampNSOperator>& duckdb::ColumnWriterState::Cast<duckdb::StandardColumnWriterState<long, long, duckdb::ParquetTimestampNSOperator> >()
Unexecuted instantiation: duckdb::StandardColumnWriterState<long, duckdb::Int96, duckdb::ParquetTimestampSInt96Operator>& duckdb::ColumnWriterState::Cast<duckdb::StandardColumnWriterState<long, duckdb::Int96, duckdb::ParquetTimestampSInt96Operator> >()
Unexecuted instantiation: duckdb::StandardColumnWriterState<long, long, duckdb::ParquetTimestampSOperator>& duckdb::ColumnWriterState::Cast<duckdb::StandardColumnWriterState<long, long, duckdb::ParquetTimestampSOperator> >()
Unexecuted instantiation: duckdb::StandardColumnWriterState<unsigned char, int, duckdb::ParquetCastOperator>& duckdb::ColumnWriterState::Cast<duckdb::StandardColumnWriterState<unsigned char, int, duckdb::ParquetCastOperator> >()
Unexecuted instantiation: duckdb::StandardColumnWriterState<unsigned short, int, duckdb::ParquetCastOperator>& duckdb::ColumnWriterState::Cast<duckdb::StandardColumnWriterState<unsigned short, int, duckdb::ParquetCastOperator> >()
Unexecuted instantiation: duckdb::StandardColumnWriterState<unsigned int, unsigned int, duckdb::ParquetCastOperator>& duckdb::ColumnWriterState::Cast<duckdb::StandardColumnWriterState<unsigned int, unsigned int, duckdb::ParquetCastOperator> >()
Unexecuted instantiation: duckdb::StandardColumnWriterState<unsigned long, unsigned long, duckdb::ParquetCastOperator>& duckdb::ColumnWriterState::Cast<duckdb::StandardColumnWriterState<unsigned long, unsigned long, duckdb::ParquetCastOperator> >()
Unexecuted instantiation: duckdb::StandardColumnWriterState<duckdb::float_na_equal, float, duckdb::FloatingPointOperator>& duckdb::ColumnWriterState::Cast<duckdb::StandardColumnWriterState<duckdb::float_na_equal, float, duckdb::FloatingPointOperator> >()
Unexecuted instantiation: duckdb::StandardColumnWriterState<duckdb::double_na_equal, double, duckdb::FloatingPointOperator>& duckdb::ColumnWriterState::Cast<duckdb::StandardColumnWriterState<duckdb::double_na_equal, double, duckdb::FloatingPointOperator> >()
Unexecuted instantiation: duckdb::StandardColumnWriterState<duckdb::string_t, duckdb::string_t, duckdb::ParquetBlobOperator>& duckdb::ColumnWriterState::Cast<duckdb::StandardColumnWriterState<duckdb::string_t, duckdb::string_t, duckdb::ParquetBlobOperator> >()
Unexecuted instantiation: duckdb::StandardColumnWriterState<duckdb::string_t, duckdb::string_t, duckdb::ParquetGeometryOperator>& duckdb::ColumnWriterState::Cast<duckdb::StandardColumnWriterState<duckdb::string_t, duckdb::string_t, duckdb::ParquetGeometryOperator> >()
Unexecuted instantiation: duckdb::StandardColumnWriterState<duckdb::string_t, duckdb::string_t, duckdb::ParquetStringOperator>& duckdb::ColumnWriterState::Cast<duckdb::StandardColumnWriterState<duckdb::string_t, duckdb::string_t, duckdb::ParquetStringOperator> >()
Unexecuted instantiation: duckdb::StandardColumnWriterState<duckdb::hugeint_t, duckdb::ParquetUUIDTargetType, duckdb::ParquetUUIDOperator>& duckdb::ColumnWriterState::Cast<duckdb::StandardColumnWriterState<duckdb::hugeint_t, duckdb::ParquetUUIDTargetType, duckdb::ParquetUUIDOperator> >()
Unexecuted instantiation: duckdb::StandardColumnWriterState<duckdb::interval_t, duckdb::ParquetIntervalTargetType, duckdb::ParquetIntervalOperator>& duckdb::ColumnWriterState::Cast<duckdb::StandardColumnWriterState<duckdb::interval_t, duckdb::ParquetIntervalTargetType, duckdb::ParquetIntervalOperator> >()
42
  template <class TARGET>
43
0
  const TARGET &Cast() const {
44
0
    D_ASSERT(dynamic_cast<const TARGET *>(this));
45
0
    return reinterpret_cast<const TARGET &>(*this);
46
0
  }
Unexecuted instantiation: duckdb::StandardColumnWriterState<signed char, int, duckdb::ParquetCastOperator> const& duckdb::ColumnWriterState::Cast<duckdb::StandardColumnWriterState<signed char, int, duckdb::ParquetCastOperator> >() const
Unexecuted instantiation: duckdb::StandardColumnWriterState<short, int, duckdb::ParquetCastOperator> const& duckdb::ColumnWriterState::Cast<duckdb::StandardColumnWriterState<short, int, duckdb::ParquetCastOperator> >() const
Unexecuted instantiation: duckdb::StandardColumnWriterState<int, int, duckdb::ParquetCastOperator> const& duckdb::ColumnWriterState::Cast<duckdb::StandardColumnWriterState<int, int, duckdb::ParquetCastOperator> >() const
Unexecuted instantiation: duckdb::StandardColumnWriterState<long, long, duckdb::ParquetCastOperator> const& duckdb::ColumnWriterState::Cast<duckdb::StandardColumnWriterState<long, long, duckdb::ParquetCastOperator> >() const
Unexecuted instantiation: duckdb::StandardColumnWriterState<long, duckdb::Int96, duckdb::ParquetTimestampInt96Operator> const& duckdb::ColumnWriterState::Cast<duckdb::StandardColumnWriterState<long, duckdb::Int96, duckdb::ParquetTimestampInt96Operator> >() const
Unexecuted instantiation: duckdb::StandardColumnWriterState<long, duckdb::Int96, duckdb::ParquetTimestampMSInt96Operator> const& duckdb::ColumnWriterState::Cast<duckdb::StandardColumnWriterState<long, duckdb::Int96, duckdb::ParquetTimestampMSInt96Operator> >() const
Unexecuted instantiation: duckdb::StandardColumnWriterState<duckdb::dtime_tz_t, long, duckdb::ParquetTimeTZOperator> const& duckdb::ColumnWriterState::Cast<duckdb::StandardColumnWriterState<duckdb::dtime_tz_t, long, duckdb::ParquetTimeTZOperator> >() const
Unexecuted instantiation: duckdb::StandardColumnWriterState<duckdb::hugeint_t, double, duckdb::ParquetHugeintOperator> const& duckdb::ColumnWriterState::Cast<duckdb::StandardColumnWriterState<duckdb::hugeint_t, double, duckdb::ParquetHugeintOperator> >() const
Unexecuted instantiation: duckdb::StandardColumnWriterState<duckdb::uhugeint_t, double, duckdb::ParquetUhugeintOperator> const& duckdb::ColumnWriterState::Cast<duckdb::StandardColumnWriterState<duckdb::uhugeint_t, double, duckdb::ParquetUhugeintOperator> >() const
Unexecuted instantiation: duckdb::StandardColumnWriterState<long, duckdb::Int96, duckdb::ParquetTimestampNSInt96Operator> const& duckdb::ColumnWriterState::Cast<duckdb::StandardColumnWriterState<long, duckdb::Int96, duckdb::ParquetTimestampNSInt96Operator> >() const
Unexecuted instantiation: duckdb::StandardColumnWriterState<long, long, duckdb::ParquetTimestampNSOperator> const& duckdb::ColumnWriterState::Cast<duckdb::StandardColumnWriterState<long, long, duckdb::ParquetTimestampNSOperator> >() const
Unexecuted instantiation: duckdb::StandardColumnWriterState<long, duckdb::Int96, duckdb::ParquetTimestampSInt96Operator> const& duckdb::ColumnWriterState::Cast<duckdb::StandardColumnWriterState<long, duckdb::Int96, duckdb::ParquetTimestampSInt96Operator> >() const
Unexecuted instantiation: duckdb::StandardColumnWriterState<long, long, duckdb::ParquetTimestampSOperator> const& duckdb::ColumnWriterState::Cast<duckdb::StandardColumnWriterState<long, long, duckdb::ParquetTimestampSOperator> >() const
Unexecuted instantiation: duckdb::StandardColumnWriterState<unsigned char, int, duckdb::ParquetCastOperator> const& duckdb::ColumnWriterState::Cast<duckdb::StandardColumnWriterState<unsigned char, int, duckdb::ParquetCastOperator> >() const
Unexecuted instantiation: duckdb::StandardColumnWriterState<unsigned short, int, duckdb::ParquetCastOperator> const& duckdb::ColumnWriterState::Cast<duckdb::StandardColumnWriterState<unsigned short, int, duckdb::ParquetCastOperator> >() const
Unexecuted instantiation: duckdb::StandardColumnWriterState<unsigned int, unsigned int, duckdb::ParquetCastOperator> const& duckdb::ColumnWriterState::Cast<duckdb::StandardColumnWriterState<unsigned int, unsigned int, duckdb::ParquetCastOperator> >() const
Unexecuted instantiation: duckdb::StandardColumnWriterState<unsigned long, unsigned long, duckdb::ParquetCastOperator> const& duckdb::ColumnWriterState::Cast<duckdb::StandardColumnWriterState<unsigned long, unsigned long, duckdb::ParquetCastOperator> >() const
Unexecuted instantiation: duckdb::StandardColumnWriterState<duckdb::float_na_equal, float, duckdb::FloatingPointOperator> const& duckdb::ColumnWriterState::Cast<duckdb::StandardColumnWriterState<duckdb::float_na_equal, float, duckdb::FloatingPointOperator> >() const
Unexecuted instantiation: duckdb::StandardColumnWriterState<duckdb::double_na_equal, double, duckdb::FloatingPointOperator> const& duckdb::ColumnWriterState::Cast<duckdb::StandardColumnWriterState<duckdb::double_na_equal, double, duckdb::FloatingPointOperator> >() const
Unexecuted instantiation: duckdb::StandardColumnWriterState<duckdb::string_t, duckdb::string_t, duckdb::ParquetBlobOperator> const& duckdb::ColumnWriterState::Cast<duckdb::StandardColumnWriterState<duckdb::string_t, duckdb::string_t, duckdb::ParquetBlobOperator> >() const
Unexecuted instantiation: duckdb::StandardColumnWriterState<duckdb::string_t, duckdb::string_t, duckdb::ParquetGeometryOperator> const& duckdb::ColumnWriterState::Cast<duckdb::StandardColumnWriterState<duckdb::string_t, duckdb::string_t, duckdb::ParquetGeometryOperator> >() const
Unexecuted instantiation: duckdb::StandardColumnWriterState<duckdb::string_t, duckdb::string_t, duckdb::ParquetStringOperator> const& duckdb::ColumnWriterState::Cast<duckdb::StandardColumnWriterState<duckdb::string_t, duckdb::string_t, duckdb::ParquetStringOperator> >() const
Unexecuted instantiation: duckdb::StandardColumnWriterState<duckdb::hugeint_t, duckdb::ParquetUUIDTargetType, duckdb::ParquetUUIDOperator> const& duckdb::ColumnWriterState::Cast<duckdb::StandardColumnWriterState<duckdb::hugeint_t, duckdb::ParquetUUIDTargetType, duckdb::ParquetUUIDOperator> >() const
Unexecuted instantiation: duckdb::StandardColumnWriterState<duckdb::interval_t, duckdb::ParquetIntervalTargetType, duckdb::ParquetIntervalOperator> const& duckdb::ColumnWriterState::Cast<duckdb::StandardColumnWriterState<duckdb::interval_t, duckdb::ParquetIntervalTargetType, duckdb::ParquetIntervalOperator> >() const
47
};
48
49
class ColumnWriterPageState {
50
public:
51
0
  virtual ~ColumnWriterPageState() {
52
0
  }
53
54
public:
55
  template <class TARGET>
56
0
  TARGET &Cast() {
57
0
    DynamicCastCheck<TARGET>(this);
58
0
    return reinterpret_cast<TARGET &>(*this);
59
0
  }
Unexecuted instantiation: duckdb::BooleanWriterPageState& duckdb::ColumnWriterPageState::Cast<duckdb::BooleanWriterPageState>()
Unexecuted instantiation: duckdb::EnumWriterPageState& duckdb::ColumnWriterPageState::Cast<duckdb::EnumWriterPageState>()
Unexecuted instantiation: duckdb::StandardWriterPageState<signed char, int, duckdb::ParquetCastOperator>& duckdb::ColumnWriterPageState::Cast<duckdb::StandardWriterPageState<signed char, int, duckdb::ParquetCastOperator> >()
Unexecuted instantiation: duckdb::StandardWriterPageState<short, int, duckdb::ParquetCastOperator>& duckdb::ColumnWriterPageState::Cast<duckdb::StandardWriterPageState<short, int, duckdb::ParquetCastOperator> >()
Unexecuted instantiation: duckdb::StandardWriterPageState<int, int, duckdb::ParquetCastOperator>& duckdb::ColumnWriterPageState::Cast<duckdb::StandardWriterPageState<int, int, duckdb::ParquetCastOperator> >()
Unexecuted instantiation: duckdb::StandardWriterPageState<long, long, duckdb::ParquetCastOperator>& duckdb::ColumnWriterPageState::Cast<duckdb::StandardWriterPageState<long, long, duckdb::ParquetCastOperator> >()
Unexecuted instantiation: duckdb::StandardWriterPageState<long, duckdb::Int96, duckdb::ParquetTimestampInt96Operator>& duckdb::ColumnWriterPageState::Cast<duckdb::StandardWriterPageState<long, duckdb::Int96, duckdb::ParquetTimestampInt96Operator> >()
Unexecuted instantiation: duckdb::StandardWriterPageState<long, duckdb::Int96, duckdb::ParquetTimestampMSInt96Operator>& duckdb::ColumnWriterPageState::Cast<duckdb::StandardWriterPageState<long, duckdb::Int96, duckdb::ParquetTimestampMSInt96Operator> >()
Unexecuted instantiation: duckdb::StandardWriterPageState<duckdb::dtime_tz_t, long, duckdb::ParquetTimeTZOperator>& duckdb::ColumnWriterPageState::Cast<duckdb::StandardWriterPageState<duckdb::dtime_tz_t, long, duckdb::ParquetTimeTZOperator> >()
Unexecuted instantiation: duckdb::StandardWriterPageState<duckdb::hugeint_t, double, duckdb::ParquetHugeintOperator>& duckdb::ColumnWriterPageState::Cast<duckdb::StandardWriterPageState<duckdb::hugeint_t, double, duckdb::ParquetHugeintOperator> >()
Unexecuted instantiation: duckdb::StandardWriterPageState<duckdb::uhugeint_t, double, duckdb::ParquetUhugeintOperator>& duckdb::ColumnWriterPageState::Cast<duckdb::StandardWriterPageState<duckdb::uhugeint_t, double, duckdb::ParquetUhugeintOperator> >()
Unexecuted instantiation: duckdb::StandardWriterPageState<long, duckdb::Int96, duckdb::ParquetTimestampNSInt96Operator>& duckdb::ColumnWriterPageState::Cast<duckdb::StandardWriterPageState<long, duckdb::Int96, duckdb::ParquetTimestampNSInt96Operator> >()
Unexecuted instantiation: duckdb::StandardWriterPageState<long, long, duckdb::ParquetTimestampNSOperator>& duckdb::ColumnWriterPageState::Cast<duckdb::StandardWriterPageState<long, long, duckdb::ParquetTimestampNSOperator> >()
Unexecuted instantiation: duckdb::StandardWriterPageState<long, duckdb::Int96, duckdb::ParquetTimestampSInt96Operator>& duckdb::ColumnWriterPageState::Cast<duckdb::StandardWriterPageState<long, duckdb::Int96, duckdb::ParquetTimestampSInt96Operator> >()
Unexecuted instantiation: duckdb::StandardWriterPageState<long, long, duckdb::ParquetTimestampSOperator>& duckdb::ColumnWriterPageState::Cast<duckdb::StandardWriterPageState<long, long, duckdb::ParquetTimestampSOperator> >()
Unexecuted instantiation: duckdb::StandardWriterPageState<unsigned char, int, duckdb::ParquetCastOperator>& duckdb::ColumnWriterPageState::Cast<duckdb::StandardWriterPageState<unsigned char, int, duckdb::ParquetCastOperator> >()
Unexecuted instantiation: duckdb::StandardWriterPageState<unsigned short, int, duckdb::ParquetCastOperator>& duckdb::ColumnWriterPageState::Cast<duckdb::StandardWriterPageState<unsigned short, int, duckdb::ParquetCastOperator> >()
Unexecuted instantiation: duckdb::StandardWriterPageState<unsigned int, unsigned int, duckdb::ParquetCastOperator>& duckdb::ColumnWriterPageState::Cast<duckdb::StandardWriterPageState<unsigned int, unsigned int, duckdb::ParquetCastOperator> >()
Unexecuted instantiation: duckdb::StandardWriterPageState<unsigned long, unsigned long, duckdb::ParquetCastOperator>& duckdb::ColumnWriterPageState::Cast<duckdb::StandardWriterPageState<unsigned long, unsigned long, duckdb::ParquetCastOperator> >()
Unexecuted instantiation: duckdb::StandardWriterPageState<duckdb::float_na_equal, float, duckdb::FloatingPointOperator>& duckdb::ColumnWriterPageState::Cast<duckdb::StandardWriterPageState<duckdb::float_na_equal, float, duckdb::FloatingPointOperator> >()
Unexecuted instantiation: duckdb::StandardWriterPageState<duckdb::double_na_equal, double, duckdb::FloatingPointOperator>& duckdb::ColumnWriterPageState::Cast<duckdb::StandardWriterPageState<duckdb::double_na_equal, double, duckdb::FloatingPointOperator> >()
Unexecuted instantiation: duckdb::StandardWriterPageState<duckdb::string_t, duckdb::string_t, duckdb::ParquetBlobOperator>& duckdb::ColumnWriterPageState::Cast<duckdb::StandardWriterPageState<duckdb::string_t, duckdb::string_t, duckdb::ParquetBlobOperator> >()
Unexecuted instantiation: duckdb::StandardWriterPageState<duckdb::string_t, duckdb::string_t, duckdb::ParquetGeometryOperator>& duckdb::ColumnWriterPageState::Cast<duckdb::StandardWriterPageState<duckdb::string_t, duckdb::string_t, duckdb::ParquetGeometryOperator> >()
Unexecuted instantiation: duckdb::StandardWriterPageState<duckdb::string_t, duckdb::string_t, duckdb::ParquetStringOperator>& duckdb::ColumnWriterPageState::Cast<duckdb::StandardWriterPageState<duckdb::string_t, duckdb::string_t, duckdb::ParquetStringOperator> >()
Unexecuted instantiation: duckdb::StandardWriterPageState<duckdb::hugeint_t, duckdb::ParquetUUIDTargetType, duckdb::ParquetUUIDOperator>& duckdb::ColumnWriterPageState::Cast<duckdb::StandardWriterPageState<duckdb::hugeint_t, duckdb::ParquetUUIDTargetType, duckdb::ParquetUUIDOperator> >()
Unexecuted instantiation: duckdb::StandardWriterPageState<duckdb::interval_t, duckdb::ParquetIntervalTargetType, duckdb::ParquetIntervalOperator>& duckdb::ColumnWriterPageState::Cast<duckdb::StandardWriterPageState<duckdb::interval_t, duckdb::ParquetIntervalTargetType, duckdb::ParquetIntervalOperator> >()
60
  template <class TARGET>
61
  const TARGET &Cast() const {
62
    D_ASSERT(dynamic_cast<const TARGET *>(this));
63
    return reinterpret_cast<const TARGET &>(*this);
64
  }
65
};
66
67
struct ParquetAnalyzeSchemaState {
68
public:
69
0
  ParquetAnalyzeSchemaState() {
70
0
  }
71
0
  virtual ~ParquetAnalyzeSchemaState() {
72
0
  }
73
74
public:
75
  template <class TARGET>
76
0
  TARGET &Cast() {
77
0
    DynamicCastCheck<TARGET>(this);
78
0
    return reinterpret_cast<TARGET &>(*this);
79
0
  }
80
  template <class TARGET>
81
0
  const TARGET &Cast() const {
82
0
    D_ASSERT(dynamic_cast<const TARGET *>(this));
83
0
    return reinterpret_cast<const TARGET &>(*this);
84
0
  }
85
};
86
87
class ColumnWriter {
88
protected:
89
  static constexpr uint16_t PARQUET_DEFINE_VALID = UINT16_C(65535);
90
91
public:
92
  ColumnWriter(ParquetWriter &writer, ParquetColumnSchema &&column_schema, vector<string> schema_path);
93
  virtual ~ColumnWriter();
94
95
public:
96
0
  const LogicalType &Type() const {
97
0
    return column_schema.type;
98
0
  }
99
0
  const ParquetColumnSchema &Schema() const {
100
0
    return column_schema;
101
0
  }
102
0
  ParquetColumnSchema &Schema() {
103
0
    return column_schema;
104
0
  }
105
0
  inline idx_t SchemaIndex() const {
106
0
    D_ASSERT(column_schema.schema_index.IsValid());
107
0
    return column_schema.schema_index.GetIndex();
108
0
  }
109
0
  inline idx_t MaxDefine() const {
110
0
    return column_schema.max_define;
111
0
  }
112
0
  idx_t MaxRepeat() const {
113
0
    return column_schema.max_repeat;
114
0
  }
115
0
  virtual bool HasTransform() {
116
0
    for (auto &child_writer : child_writers) {
117
0
      if (child_writer->HasTransform()) {
118
0
        throw NotImplementedException("ColumnWriter of type '%s' requires a transform, but is not a root "
119
0
                                      "column, this isn't supported currently",
120
0
                                      child_writer->Type());
121
0
      }
122
0
    }
123
0
    return false;
124
0
  }
125
0
  virtual LogicalType TransformedType() const {
126
0
    throw NotImplementedException("Writer does not have a transformed type");
127
0
  }
128
0
  virtual unique_ptr<Expression> TransformExpression(unique_ptr<BoundReferenceExpression> expr) {
129
0
    throw NotImplementedException("Writer does not have a transform expression");
130
0
  }
131
132
0
  virtual unique_ptr<ParquetAnalyzeSchemaState> AnalyzeSchemaInit() {
133
0
    return nullptr;
134
0
  }
135
136
0
  const vector<unique_ptr<ColumnWriter>> &ChildWriters() const {
137
0
    return child_writers;
138
0
  }
139
140
0
  virtual void AnalyzeSchema(ParquetAnalyzeSchemaState &state, Vector &input, idx_t count) {
141
0
    throw NotImplementedException("Writer doesn't require an AnalyzeSchema pass");
142
0
  }
143
144
0
  virtual void AnalyzeSchemaFinalize(const ParquetAnalyzeSchemaState &state) {
145
0
    throw NotImplementedException("Writer doesn't require an AnalyzeSchemaFinalize pass");
146
0
  }
147
148
  virtual idx_t FinalizeSchema(vector<duckdb_parquet::SchemaElement> &schemas) = 0;
149
150
  //! Create the column writer for a specific type recursively
151
  static unique_ptr<ColumnWriter> CreateWriterRecursive(ClientContext &context, ParquetWriter &writer,
152
                                                        vector<string> path_in_schema, const LogicalType &type,
153
                                                        const string &name, bool allow_geometry,
154
                                                        optional_ptr<const ChildFieldIDs> field_ids,
155
                                                        optional_ptr<const ShreddingType> shredding_types,
156
                                                        idx_t max_repeat = 0, idx_t max_define = 1,
157
                                                        bool can_have_nulls = true);
158
159
  virtual unique_ptr<ColumnWriterState> InitializeWriteState(duckdb_parquet::RowGroup &row_group) = 0;
160
161
  //! indicates whether the write need to analyse the data before preparing it
162
0
  virtual bool HasAnalyze() {
163
0
    return false;
164
0
  }
165
166
0
  virtual void Analyze(ColumnWriterState &state, ColumnWriterState *parent, Vector &vector, idx_t count) {
167
0
    throw NotImplementedException("Writer does not need analysis");
168
0
  }
169
170
  //! Called after all data has been passed to Analyze
171
0
  virtual void FinalizeAnalyze(ColumnWriterState &state) {
172
0
    throw NotImplementedException("Writer does not need analysis");
173
0
  }
174
175
  virtual void Prepare(ColumnWriterState &state, ColumnWriterState *parent, Vector &vector, idx_t count,
176
                       bool vector_can_span_multiple_pages) = 0;
177
178
  virtual void BeginWrite(ColumnWriterState &state) = 0;
179
  virtual void Write(ColumnWriterState &state, Vector &vector, idx_t count) = 0;
180
  virtual void FinalizeWrite(ColumnWriterState &state) = 0;
181
182
public:
183
  template <class TARGET>
184
  TARGET &Cast() {
185
    DynamicCastCheck<TARGET>(this);
186
    return reinterpret_cast<TARGET &>(*this);
187
  }
188
  template <class TARGET>
189
0
  const TARGET &Cast() const {
190
0
    D_ASSERT(dynamic_cast<const TARGET *>(this));
191
0
    return reinterpret_cast<const TARGET &>(*this);
192
0
  }
193
194
protected:
195
  void HandleDefineLevels(ColumnWriterState &state, ColumnWriterState *parent, const ValidityMask &validity,
196
                          const idx_t count, const uint16_t define_value, const uint16_t null_value) const;
197
  void HandleRepeatLevels(ColumnWriterState &state_p, ColumnWriterState *parent, idx_t count) const;
198
199
  void CompressPage(MemoryStream &temp_writer, size_t &compressed_size, data_ptr_t &compressed_data,
200
                    AllocatedData &compressed_buf);
201
202
public:
203
  ParquetWriter &writer;
204
  //! The parent writer (if this is a nested field)
205
  optional_ptr<ColumnWriter> parent;
206
  ParquetColumnSchema column_schema;
207
  vector<string> schema_path;
208
  bool can_have_nulls;
209
210
protected:
211
  vector<unique_ptr<ColumnWriter>> child_writers;
212
};
213
214
} // namespace duckdb