/src/duckdb/extension/parquet/writer/array_column_writer.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | #include "writer/array_column_writer.hpp" |
2 | | |
3 | | namespace duckdb { |
4 | | |
5 | 0 | void ArrayColumnWriter::Analyze(ColumnWriterState &state_p, ColumnWriterState *parent, Vector &vector, idx_t count) { |
6 | 0 | auto &state = state_p.Cast<ListColumnWriterState>(); |
7 | 0 | auto &array_child = ArrayVector::GetEntry(vector); |
8 | 0 | auto array_size = ArrayType::GetSize(vector.GetType()); |
9 | 0 | child_writer->Analyze(*state.child_state, &state_p, array_child, array_size * count); |
10 | 0 | } |
11 | | |
12 | | void ArrayColumnWriter::Prepare(ColumnWriterState &state_p, ColumnWriterState *parent, Vector &vector, idx_t count, |
13 | 0 | bool vector_can_span_multiple_pages) { |
14 | 0 | auto &state = state_p.Cast<ListColumnWriterState>(); |
15 | |
|
16 | 0 | auto array_size = ArrayType::GetSize(vector.GetType()); |
17 | 0 | auto &validity = FlatVector::Validity(vector); |
18 | | |
19 | | // write definition levels and repeats |
20 | | // the main difference between this and ListColumnWriter::Prepare is that we need to make sure to write out |
21 | | // repetition levels and definitions for the child elements of the array even if the array itself is NULL. |
22 | 0 | idx_t start = 0; |
23 | 0 | idx_t vcount = parent ? parent->definition_levels.size() - state.parent_index : count; |
24 | 0 | idx_t vector_index = 0; |
25 | 0 | for (idx_t i = start; i < vcount; i++) { |
26 | 0 | idx_t parent_index = state.parent_index + i; |
27 | 0 | if (parent && !parent->is_empty.empty() && parent->is_empty[parent_index]) { |
28 | 0 | state.definition_levels.push_back(parent->definition_levels[parent_index]); |
29 | 0 | state.repetition_levels.push_back(parent->repetition_levels[parent_index]); |
30 | 0 | state.is_empty.push_back(true); |
31 | 0 | continue; |
32 | 0 | } |
33 | 0 | auto first_repeat_level = |
34 | 0 | parent && !parent->repetition_levels.empty() ? parent->repetition_levels[parent_index] : MaxRepeat(); |
35 | 0 | if (parent && parent->definition_levels[parent_index] != PARQUET_DEFINE_VALID) { |
36 | 0 | state.definition_levels.push_back(parent->definition_levels[parent_index]); |
37 | 0 | state.repetition_levels.push_back(first_repeat_level); |
38 | 0 | state.is_empty.push_back(false); |
39 | 0 | for (idx_t k = 1; k < array_size; k++) { |
40 | 0 | state.repetition_levels.push_back(MaxRepeat() + 1); |
41 | 0 | state.definition_levels.push_back(parent->definition_levels[parent_index]); |
42 | 0 | state.is_empty.push_back(false); |
43 | 0 | } |
44 | 0 | } else if (validity.RowIsValid(vector_index)) { |
45 | | // push the repetition levels |
46 | 0 | state.definition_levels.push_back(PARQUET_DEFINE_VALID); |
47 | 0 | state.is_empty.push_back(false); |
48 | |
|
49 | 0 | state.repetition_levels.push_back(first_repeat_level); |
50 | 0 | for (idx_t k = 1; k < array_size; k++) { |
51 | 0 | state.repetition_levels.push_back(MaxRepeat() + 1); |
52 | 0 | state.definition_levels.push_back(PARQUET_DEFINE_VALID); |
53 | 0 | state.is_empty.push_back(false); |
54 | 0 | } |
55 | 0 | } else { |
56 | 0 | state.definition_levels.push_back(MaxDefine() - 1); |
57 | 0 | state.repetition_levels.push_back(first_repeat_level); |
58 | 0 | state.is_empty.push_back(false); |
59 | 0 | for (idx_t k = 1; k < array_size; k++) { |
60 | 0 | state.repetition_levels.push_back(MaxRepeat() + 1); |
61 | 0 | state.definition_levels.push_back(MaxDefine() - 1); |
62 | 0 | state.is_empty.push_back(false); |
63 | 0 | } |
64 | 0 | } |
65 | 0 | vector_index++; |
66 | 0 | } |
67 | 0 | state.parent_index += vcount; |
68 | |
|
69 | 0 | auto &array_child = ArrayVector::GetEntry(vector); |
70 | | // The elements of a single array should not span multiple Parquet pages |
71 | | // So, we force the entire vector to fit on a single page by setting "vector_can_span_multiple_pages=false" |
72 | 0 | child_writer->Prepare(*state.child_state, &state_p, array_child, count * array_size, false); |
73 | 0 | } |
74 | | |
75 | 0 | void ArrayColumnWriter::Write(ColumnWriterState &state_p, Vector &vector, idx_t count) { |
76 | 0 | auto &state = state_p.Cast<ListColumnWriterState>(); |
77 | 0 | auto array_size = ArrayType::GetSize(vector.GetType()); |
78 | 0 | auto &array_child = ArrayVector::GetEntry(vector); |
79 | 0 | child_writer->Write(*state.child_state, array_child, count * array_size); |
80 | 0 | } |
81 | | |
82 | | } // namespace duckdb |