/src/duckdb/extension/parquet/writer/boolean_column_writer.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | #include "writer/boolean_column_writer.hpp" |
2 | | |
3 | | namespace duckdb { |
4 | | |
5 | | class BooleanStatisticsState : public ColumnWriterStatistics { |
6 | | public: |
7 | 0 | BooleanStatisticsState() : min(true), max(false) { |
8 | 0 | } |
9 | | |
10 | | bool min; |
11 | | bool max; |
12 | | |
13 | | public: |
14 | 0 | bool HasStats() override { |
15 | 0 | return !(min && !max); |
16 | 0 | } |
17 | | |
18 | 0 | string GetMin() override { |
19 | 0 | return GetMinValue(); |
20 | 0 | } |
21 | 0 | string GetMax() override { |
22 | 0 | return GetMaxValue(); |
23 | 0 | } |
24 | 0 | string GetMinValue() override { |
25 | 0 | return HasStats() ? string(const_char_ptr_cast(&min), sizeof(bool)) : string(); |
26 | 0 | } |
27 | 0 | string GetMaxValue() override { |
28 | 0 | return HasStats() ? string(const_char_ptr_cast(&max), sizeof(bool)) : string(); |
29 | 0 | } |
30 | | }; |
31 | | |
32 | | class BooleanWriterPageState : public ColumnWriterPageState { |
33 | | public: |
34 | | uint8_t byte = 0; |
35 | | uint8_t byte_pos = 0; |
36 | | }; |
37 | | |
38 | | BooleanColumnWriter::BooleanColumnWriter(ParquetWriter &writer, const ParquetColumnSchema &column_schema, |
39 | | vector<string> schema_path_p, bool can_have_nulls) |
40 | 0 | : PrimitiveColumnWriter(writer, column_schema, std::move(schema_path_p), can_have_nulls) { |
41 | 0 | } |
42 | | |
43 | 0 | unique_ptr<ColumnWriterStatistics> BooleanColumnWriter::InitializeStatsState() { |
44 | 0 | return make_uniq<BooleanStatisticsState>(); |
45 | 0 | } |
46 | | |
47 | | void BooleanColumnWriter::WriteVector(WriteStream &temp_writer, ColumnWriterStatistics *stats_p, |
48 | | ColumnWriterPageState *state_p, Vector &input_column, idx_t chunk_start, |
49 | 0 | idx_t chunk_end) { |
50 | 0 | auto &stats = stats_p->Cast<BooleanStatisticsState>(); |
51 | 0 | auto &state = state_p->Cast<BooleanWriterPageState>(); |
52 | 0 | const auto &mask = FlatVector::Validity(input_column); |
53 | |
|
54 | 0 | const auto *const ptr = FlatVector::GetData<bool>(input_column); |
55 | 0 | if (stats.max && !stats.min && mask.AllValid()) { |
56 | | // Fast path: stats have already been set, and there's no NULLs |
57 | 0 | for (idx_t r = chunk_start; r < chunk_end; r++) { |
58 | 0 | const auto &val = ptr[r]; |
59 | 0 | state.byte |= val << state.byte_pos; |
60 | 0 | if (++state.byte_pos == 8) { |
61 | 0 | temp_writer.Write(state.byte); |
62 | 0 | state.byte = 0; |
63 | 0 | state.byte_pos = 0; |
64 | 0 | } |
65 | 0 | } |
66 | 0 | } else { |
67 | 0 | for (idx_t r = chunk_start; r < chunk_end; r++) { |
68 | 0 | if (!mask.RowIsValid(r)) { |
69 | 0 | continue; |
70 | 0 | } |
71 | 0 | const auto &val = ptr[r]; |
72 | |
|
73 | 0 | stats.max |= val; |
74 | 0 | stats.min &= val; |
75 | 0 | state.byte |= val << state.byte_pos; |
76 | |
|
77 | 0 | if (++state.byte_pos == 8) { |
78 | 0 | temp_writer.Write(state.byte); |
79 | 0 | state.byte = 0; |
80 | 0 | state.byte_pos = 0; |
81 | 0 | } |
82 | 0 | } |
83 | 0 | } |
84 | 0 | } |
85 | | |
86 | | unique_ptr<ColumnWriterPageState> BooleanColumnWriter::InitializePageState(PrimitiveColumnWriterState &state, |
87 | 0 | idx_t page_idx) { |
88 | 0 | return make_uniq<BooleanWriterPageState>(); |
89 | 0 | } |
90 | | |
91 | 0 | void BooleanColumnWriter::FlushPageState(WriteStream &temp_writer, ColumnWriterPageState *state_p) { |
92 | 0 | auto &state = state_p->Cast<BooleanWriterPageState>(); |
93 | 0 | if (state.byte_pos > 0) { |
94 | 0 | temp_writer.Write<uint8_t>(state.byte); |
95 | 0 | state.byte = 0; |
96 | 0 | state.byte_pos = 0; |
97 | 0 | } |
98 | 0 | } |
99 | | |
100 | | idx_t BooleanColumnWriter::GetRowSize(const Vector &vector, const idx_t index, |
101 | 0 | const PrimitiveColumnWriterState &state) const { |
102 | 0 | return sizeof(bool); |
103 | 0 | } |
104 | | |
105 | | } // namespace duckdb |