/src/duckdb/extension/parquet/writer/struct_column_writer.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | #include "writer/struct_column_writer.hpp" |
2 | | |
3 | | namespace duckdb { |
4 | | |
5 | | class StructColumnWriterState : public ColumnWriterState { |
6 | | public: |
7 | | StructColumnWriterState(duckdb_parquet::RowGroup &row_group, idx_t col_idx) |
8 | 0 | : row_group(row_group), col_idx(col_idx) { |
9 | 0 | } |
10 | 0 | ~StructColumnWriterState() override = default; |
11 | | |
12 | | duckdb_parquet::RowGroup &row_group; |
13 | | idx_t col_idx; |
14 | | vector<unique_ptr<ColumnWriterState>> child_states; |
15 | | }; |
16 | | |
17 | 0 | unique_ptr<ColumnWriterState> StructColumnWriter::InitializeWriteState(duckdb_parquet::RowGroup &row_group) { |
18 | 0 | auto result = make_uniq<StructColumnWriterState>(row_group, row_group.columns.size()); |
19 | |
|
20 | 0 | result->child_states.reserve(child_writers.size()); |
21 | 0 | for (auto &child_writer : child_writers) { |
22 | 0 | result->child_states.push_back(child_writer->InitializeWriteState(row_group)); |
23 | 0 | } |
24 | 0 | return std::move(result); |
25 | 0 | } |
26 | | |
27 | 0 | bool StructColumnWriter::HasAnalyze() { |
28 | 0 | for (auto &child_writer : child_writers) { |
29 | 0 | if (child_writer->HasAnalyze()) { |
30 | 0 | return true; |
31 | 0 | } |
32 | 0 | } |
33 | 0 | return false; |
34 | 0 | } |
35 | | |
36 | 0 | void StructColumnWriter::Analyze(ColumnWriterState &state_p, ColumnWriterState *parent, Vector &vector, idx_t count) { |
37 | 0 | auto &state = state_p.Cast<StructColumnWriterState>(); |
38 | 0 | auto &child_vectors = StructVector::GetEntries(vector); |
39 | 0 | for (idx_t child_idx = 0; child_idx < child_writers.size(); child_idx++) { |
40 | | // Need to check again. It might be that just one child needs it but the rest not |
41 | 0 | if (child_writers[child_idx]->HasAnalyze()) { |
42 | 0 | child_writers[child_idx]->Analyze(*state.child_states[child_idx], &state_p, *child_vectors[child_idx], |
43 | 0 | count); |
44 | 0 | } |
45 | 0 | } |
46 | 0 | } |
47 | | |
48 | 0 | void StructColumnWriter::FinalizeAnalyze(ColumnWriterState &state_p) { |
49 | 0 | auto &state = state_p.Cast<StructColumnWriterState>(); |
50 | 0 | for (idx_t child_idx = 0; child_idx < child_writers.size(); child_idx++) { |
51 | | // Need to check again. It might be that just one child needs it but the rest not |
52 | 0 | if (child_writers[child_idx]->HasAnalyze()) { |
53 | 0 | child_writers[child_idx]->FinalizeAnalyze(*state.child_states[child_idx]); |
54 | 0 | } |
55 | 0 | } |
56 | 0 | } |
57 | | |
58 | 0 | void StructColumnWriter::Prepare(ColumnWriterState &state_p, ColumnWriterState *parent, Vector &vector, idx_t count) { |
59 | 0 | auto &state = state_p.Cast<StructColumnWriterState>(); |
60 | |
|
61 | 0 | auto &validity = FlatVector::Validity(vector); |
62 | 0 | if (parent) { |
63 | | // propagate empty entries from the parent |
64 | 0 | while (state.is_empty.size() < parent->is_empty.size()) { |
65 | 0 | state.is_empty.push_back(parent->is_empty[state.is_empty.size()]); |
66 | 0 | } |
67 | 0 | } |
68 | 0 | HandleRepeatLevels(state_p, parent, count, MaxRepeat()); |
69 | 0 | HandleDefineLevels(state_p, parent, validity, count, PARQUET_DEFINE_VALID, MaxDefine() - 1); |
70 | 0 | auto &child_vectors = StructVector::GetEntries(vector); |
71 | 0 | for (idx_t child_idx = 0; child_idx < child_writers.size(); child_idx++) { |
72 | 0 | child_writers[child_idx]->Prepare(*state.child_states[child_idx], &state_p, *child_vectors[child_idx], count); |
73 | 0 | } |
74 | 0 | } |
75 | | |
76 | 0 | void StructColumnWriter::BeginWrite(ColumnWriterState &state_p) { |
77 | 0 | auto &state = state_p.Cast<StructColumnWriterState>(); |
78 | 0 | for (idx_t child_idx = 0; child_idx < child_writers.size(); child_idx++) { |
79 | 0 | child_writers[child_idx]->BeginWrite(*state.child_states[child_idx]); |
80 | 0 | } |
81 | 0 | } |
82 | | |
83 | 0 | void StructColumnWriter::Write(ColumnWriterState &state_p, Vector &vector, idx_t count) { |
84 | 0 | auto &state = state_p.Cast<StructColumnWriterState>(); |
85 | 0 | auto &child_vectors = StructVector::GetEntries(vector); |
86 | 0 | for (idx_t child_idx = 0; child_idx < child_writers.size(); child_idx++) { |
87 | 0 | child_writers[child_idx]->Write(*state.child_states[child_idx], *child_vectors[child_idx], count); |
88 | 0 | } |
89 | 0 | } |
90 | | |
91 | 0 | void StructColumnWriter::FinalizeWrite(ColumnWriterState &state_p) { |
92 | 0 | auto &state = state_p.Cast<StructColumnWriterState>(); |
93 | 0 | for (idx_t child_idx = 0; child_idx < child_writers.size(); child_idx++) { |
94 | | // we add the null count of the struct to the null count of the children |
95 | 0 | state.child_states[child_idx]->null_count += state_p.null_count; |
96 | 0 | child_writers[child_idx]->FinalizeWrite(*state.child_states[child_idx]); |
97 | 0 | } |
98 | 0 | } |
99 | | |
100 | | } // namespace duckdb |