Coverage Report

Created: 2025-09-05 08:05

/src/duckdb/extension/parquet/writer/array_column_writer.cpp
Line
Count
Source (jump to first uncovered line)
1
#include "writer/array_column_writer.hpp"
2
3
namespace duckdb {
4
5
0
void ArrayColumnWriter::Analyze(ColumnWriterState &state_p, ColumnWriterState *parent, Vector &vector, idx_t count) {
6
0
  auto &state = state_p.Cast<ListColumnWriterState>();
7
0
  auto &array_child = ArrayVector::GetEntry(vector);
8
0
  auto array_size = ArrayType::GetSize(vector.GetType());
9
0
  child_writer->Analyze(*state.child_state, &state_p, array_child, array_size * count);
10
0
}
11
12
void ArrayColumnWriter::Prepare(ColumnWriterState &state_p, ColumnWriterState *parent, Vector &vector, idx_t count,
13
0
                                bool vector_can_span_multiple_pages) {
14
0
  auto &state = state_p.Cast<ListColumnWriterState>();
15
16
0
  auto array_size = ArrayType::GetSize(vector.GetType());
17
0
  auto &validity = FlatVector::Validity(vector);
18
19
  // write definition levels and repeats
20
  // the main difference between this and ListColumnWriter::Prepare is that we need to make sure to write out
21
  // repetition levels and definitions for the child elements of the array even if the array itself is NULL.
22
0
  idx_t start = 0;
23
0
  idx_t vcount = parent ? parent->definition_levels.size() - state.parent_index : count;
24
0
  idx_t vector_index = 0;
25
0
  for (idx_t i = start; i < vcount; i++) {
26
0
    idx_t parent_index = state.parent_index + i;
27
0
    if (parent && !parent->is_empty.empty() && parent->is_empty[parent_index]) {
28
0
      state.definition_levels.push_back(parent->definition_levels[parent_index]);
29
0
      state.repetition_levels.push_back(parent->repetition_levels[parent_index]);
30
0
      state.is_empty.push_back(true);
31
0
      continue;
32
0
    }
33
0
    auto first_repeat_level =
34
0
        parent && !parent->repetition_levels.empty() ? parent->repetition_levels[parent_index] : MaxRepeat();
35
0
    if (parent && parent->definition_levels[parent_index] != PARQUET_DEFINE_VALID) {
36
0
      state.definition_levels.push_back(parent->definition_levels[parent_index]);
37
0
      state.repetition_levels.push_back(first_repeat_level);
38
0
      state.is_empty.push_back(false);
39
0
      for (idx_t k = 1; k < array_size; k++) {
40
0
        state.repetition_levels.push_back(MaxRepeat() + 1);
41
0
        state.definition_levels.push_back(parent->definition_levels[parent_index]);
42
0
        state.is_empty.push_back(false);
43
0
      }
44
0
    } else if (validity.RowIsValid(vector_index)) {
45
      // push the repetition levels
46
0
      state.definition_levels.push_back(PARQUET_DEFINE_VALID);
47
0
      state.is_empty.push_back(false);
48
49
0
      state.repetition_levels.push_back(first_repeat_level);
50
0
      for (idx_t k = 1; k < array_size; k++) {
51
0
        state.repetition_levels.push_back(MaxRepeat() + 1);
52
0
        state.definition_levels.push_back(PARQUET_DEFINE_VALID);
53
0
        state.is_empty.push_back(false);
54
0
      }
55
0
    } else {
56
0
      state.definition_levels.push_back(MaxDefine() - 1);
57
0
      state.repetition_levels.push_back(first_repeat_level);
58
0
      state.is_empty.push_back(false);
59
0
      for (idx_t k = 1; k < array_size; k++) {
60
0
        state.repetition_levels.push_back(MaxRepeat() + 1);
61
0
        state.definition_levels.push_back(MaxDefine() - 1);
62
0
        state.is_empty.push_back(false);
63
0
      }
64
0
    }
65
0
    vector_index++;
66
0
  }
67
0
  state.parent_index += vcount;
68
69
0
  auto &array_child = ArrayVector::GetEntry(vector);
70
  // The elements of a single array should not span multiple Parquet pages
71
  // So, we force the entire vector to fit on a single page by setting "vector_can_span_multiple_pages=false"
72
0
  child_writer->Prepare(*state.child_state, &state_p, array_child, count * array_size, false);
73
0
}
74
75
0
void ArrayColumnWriter::Write(ColumnWriterState &state_p, Vector &vector, idx_t count) {
76
0
  auto &state = state_p.Cast<ListColumnWriterState>();
77
0
  auto array_size = ArrayType::GetSize(vector.GetType());
78
0
  auto &array_child = ArrayVector::GetEntry(vector);
79
0
  child_writer->Write(*state.child_state, array_child, count * array_size);
80
0
}
81
82
} // namespace duckdb