Coverage Report

Created: 2025-08-28 07:58

/src/duckdb/extension/parquet/writer/boolean_column_writer.cpp
Line
Count
Source (jump to first uncovered line)
1
#include "writer/boolean_column_writer.hpp"
2
3
namespace duckdb {
4
5
class BooleanStatisticsState : public ColumnWriterStatistics {
6
public:
7
0
  BooleanStatisticsState() : min(true), max(false) {
8
0
  }
9
10
  bool min;
11
  bool max;
12
13
public:
14
0
  bool HasStats() override {
15
0
    return !(min && !max);
16
0
  }
17
18
0
  string GetMin() override {
19
0
    return GetMinValue();
20
0
  }
21
0
  string GetMax() override {
22
0
    return GetMaxValue();
23
0
  }
24
0
  string GetMinValue() override {
25
0
    return HasStats() ? string(const_char_ptr_cast(&min), sizeof(bool)) : string();
26
0
  }
27
0
  string GetMaxValue() override {
28
0
    return HasStats() ? string(const_char_ptr_cast(&max), sizeof(bool)) : string();
29
0
  }
30
};
31
32
class BooleanWriterPageState : public ColumnWriterPageState {
33
public:
34
  uint8_t byte = 0;
35
  uint8_t byte_pos = 0;
36
};
37
38
BooleanColumnWriter::BooleanColumnWriter(ParquetWriter &writer, const ParquetColumnSchema &column_schema,
39
                                         vector<string> schema_path_p, bool can_have_nulls)
40
0
    : PrimitiveColumnWriter(writer, column_schema, std::move(schema_path_p), can_have_nulls) {
41
0
}
42
43
0
unique_ptr<ColumnWriterStatistics> BooleanColumnWriter::InitializeStatsState() {
44
0
  return make_uniq<BooleanStatisticsState>();
45
0
}
46
47
void BooleanColumnWriter::WriteVector(WriteStream &temp_writer, ColumnWriterStatistics *stats_p,
48
                                      ColumnWriterPageState *state_p, Vector &input_column, idx_t chunk_start,
49
0
                                      idx_t chunk_end) {
50
0
  auto &stats = stats_p->Cast<BooleanStatisticsState>();
51
0
  auto &state = state_p->Cast<BooleanWriterPageState>();
52
0
  const auto &mask = FlatVector::Validity(input_column);
53
54
0
  const auto *const ptr = FlatVector::GetData<bool>(input_column);
55
0
  if (stats.max && !stats.min && mask.AllValid()) {
56
    // Fast path: stats have already been set, and there's no NULLs
57
0
    for (idx_t r = chunk_start; r < chunk_end; r++) {
58
0
      const auto &val = ptr[r];
59
0
      state.byte |= val << state.byte_pos;
60
0
      if (++state.byte_pos == 8) {
61
0
        temp_writer.Write(state.byte);
62
0
        state.byte = 0;
63
0
        state.byte_pos = 0;
64
0
      }
65
0
    }
66
0
  } else {
67
0
    for (idx_t r = chunk_start; r < chunk_end; r++) {
68
0
      if (!mask.RowIsValid(r)) {
69
0
        continue;
70
0
      }
71
0
      const auto &val = ptr[r];
72
73
0
      stats.max |= val;
74
0
      stats.min &= val;
75
0
      state.byte |= val << state.byte_pos;
76
77
0
      if (++state.byte_pos == 8) {
78
0
        temp_writer.Write(state.byte);
79
0
        state.byte = 0;
80
0
        state.byte_pos = 0;
81
0
      }
82
0
    }
83
0
  }
84
0
}
85
86
unique_ptr<ColumnWriterPageState> BooleanColumnWriter::InitializePageState(PrimitiveColumnWriterState &state,
87
0
                                                                           idx_t page_idx) {
88
0
  return make_uniq<BooleanWriterPageState>();
89
0
}
90
91
0
void BooleanColumnWriter::FlushPageState(WriteStream &temp_writer, ColumnWriterPageState *state_p) {
92
0
  auto &state = state_p->Cast<BooleanWriterPageState>();
93
0
  if (state.byte_pos > 0) {
94
0
    temp_writer.Write<uint8_t>(state.byte);
95
0
    state.byte = 0;
96
0
    state.byte_pos = 0;
97
0
  }
98
0
}
99
100
idx_t BooleanColumnWriter::GetRowSize(const Vector &vector, const idx_t index,
101
0
                                      const PrimitiveColumnWriterState &state) const {
102
0
  return sizeof(bool);
103
0
}
104
105
} // namespace duckdb