/src/duckdb/extension/parquet/writer/decimal_column_writer.cpp
Line | Count | Source |
1 | | #include "writer/decimal_column_writer.hpp" |
2 | | |
3 | | namespace duckdb { |
4 | | |
5 | 0 | static void WriteParquetDecimal(hugeint_t input, data_ptr_t result) { |
6 | 0 | bool positive = input >= 0; |
7 | | // numbers are stored as two's complement so some muckery is required |
8 | 0 | if (!positive) { |
9 | 0 | input = NumericLimits<hugeint_t>::Maximum() + input + 1; |
10 | 0 | } |
11 | 0 | uint64_t high_bytes = uint64_t(input.upper); |
12 | 0 | uint64_t low_bytes = input.lower; |
13 | |
|
14 | 0 | for (idx_t i = 0; i < sizeof(uint64_t); i++) { |
15 | 0 | auto shift_count = (sizeof(uint64_t) - i - 1) * 8; |
16 | 0 | result[i] = (high_bytes >> shift_count) & 0xFF; |
17 | 0 | } |
18 | 0 | for (idx_t i = 0; i < sizeof(uint64_t); i++) { |
19 | 0 | auto shift_count = (sizeof(uint64_t) - i - 1) * 8; |
20 | 0 | result[sizeof(uint64_t) + i] = (low_bytes >> shift_count) & 0xFF; |
21 | 0 | } |
22 | 0 | if (!positive) { |
23 | 0 | result[0] |= 0x80; |
24 | 0 | } |
25 | 0 | } |
26 | | |
27 | | class FixedDecimalStatistics : public ColumnWriterStatistics { |
28 | | public: |
29 | 0 | FixedDecimalStatistics() : min(NumericLimits<hugeint_t>::Maximum()), max(NumericLimits<hugeint_t>::Minimum()) { |
30 | 0 | } |
31 | | |
32 | | hugeint_t min; |
33 | | hugeint_t max; |
34 | | |
35 | | public: |
36 | 0 | string GetStats(hugeint_t &input) { |
37 | 0 | data_t buffer[16]; |
38 | 0 | WriteParquetDecimal(input, buffer); |
39 | 0 | return string(const_char_ptr_cast(buffer), 16); |
40 | 0 | } |
41 | | |
42 | 0 | bool HasStats() override { |
43 | 0 | return min <= max; |
44 | 0 | } |
45 | | |
46 | 0 | void Update(hugeint_t &val) { |
47 | 0 | if (LessThan::Operation(val, min)) { |
48 | 0 | min = val; |
49 | 0 | } |
50 | 0 | if (GreaterThan::Operation(val, max)) { |
51 | 0 | max = val; |
52 | 0 | } |
53 | 0 | } |
54 | | |
55 | 0 | string GetMin() override { |
56 | 0 | return GetMinValue(); |
57 | 0 | } |
58 | 0 | string GetMax() override { |
59 | 0 | return GetMaxValue(); |
60 | 0 | } |
61 | 0 | string GetMinValue() override { |
62 | 0 | return HasStats() ? GetStats(min) : string(); |
63 | 0 | } |
64 | 0 | string GetMaxValue() override { |
65 | 0 | return HasStats() ? GetStats(max) : string(); |
66 | 0 | } |
67 | | }; |
68 | | |
69 | | FixedDecimalColumnWriter::FixedDecimalColumnWriter(ParquetWriter &writer, ParquetColumnSchema &&column_schema, |
70 | | vector<string> schema_path_p) |
71 | 0 | : PrimitiveColumnWriter(writer, std::move(column_schema), std::move(schema_path_p)) { |
72 | 0 | } |
73 | | |
74 | 0 | unique_ptr<ColumnWriterStatistics> FixedDecimalColumnWriter::InitializeStatsState() { |
75 | 0 | return make_uniq<FixedDecimalStatistics>(); |
76 | 0 | } |
77 | | |
78 | | void FixedDecimalColumnWriter::WriteVector(WriteStream &temp_writer, ColumnWriterStatistics *stats_p, |
79 | | ColumnWriterPageState *page_state, Vector &input_column, idx_t chunk_start, |
80 | 0 | idx_t chunk_end) { |
81 | 0 | auto &mask = FlatVector::Validity(input_column); |
82 | 0 | auto *ptr = FlatVector::GetData<hugeint_t>(input_column); |
83 | 0 | auto &stats = stats_p->Cast<FixedDecimalStatistics>(); |
84 | |
|
85 | 0 | data_t temp_buffer[16]; |
86 | 0 | for (idx_t r = chunk_start; r < chunk_end; r++) { |
87 | 0 | if (mask.RowIsValid(r)) { |
88 | 0 | stats.Update(ptr[r]); |
89 | 0 | WriteParquetDecimal(ptr[r], temp_buffer); |
90 | 0 | temp_writer.WriteData(temp_buffer, 16); |
91 | 0 | } |
92 | 0 | } |
93 | 0 | } |
94 | | |
95 | | idx_t FixedDecimalColumnWriter::GetRowSize(const Vector &vector, const idx_t index, |
96 | 0 | const PrimitiveColumnWriterState &state) const { |
97 | 0 | return sizeof(hugeint_t); |
98 | 0 | } |
99 | | |
100 | | } // namespace duckdb |