/src/duckdb/extension/parquet/writer/decimal_column_writer.cpp
Line | Count | Source |
1 | | #include "writer/decimal_column_writer.hpp" |
2 | | |
3 | | #include <stdint.h> |
4 | | #include <utility> |
5 | | |
6 | | #include "duckdb/common/helper.hpp" |
7 | | #include "duckdb/common/hugeint.hpp" |
8 | | #include "duckdb/common/limits.hpp" |
9 | | #include "duckdb/common/operator/comparison_operators.hpp" |
10 | | #include "duckdb/common/serializer/write_stream.hpp" |
11 | | #include "duckdb/common/types/validity_mask.hpp" |
12 | | #include "duckdb/common/vector/flat_vector.hpp" |
13 | | #include "parquet_column_schema.hpp" |
14 | | |
15 | | namespace duckdb { |
16 | | class ColumnWriterPageState; |
17 | | class ParquetWriter; |
18 | | class Vector; |
19 | | |
20 | 0 | static void WriteParquetDecimal(hugeint_t input, data_ptr_t result) { |
21 | 0 | bool positive = input >= 0; |
22 | | // numbers are stored as two's complement so some muckery is required |
23 | 0 | if (!positive) { |
24 | 0 | input = NumericLimits<hugeint_t>::Maximum() + input + 1; |
25 | 0 | } |
26 | 0 | uint64_t high_bytes = uint64_t(input.upper); |
27 | 0 | uint64_t low_bytes = input.lower; |
28 | |
|
29 | 0 | for (idx_t i = 0; i < sizeof(uint64_t); i++) { |
30 | 0 | auto shift_count = (sizeof(uint64_t) - i - 1) * 8; |
31 | 0 | result[i] = (high_bytes >> shift_count) & 0xFF; |
32 | 0 | } |
33 | 0 | for (idx_t i = 0; i < sizeof(uint64_t); i++) { |
34 | 0 | auto shift_count = (sizeof(uint64_t) - i - 1) * 8; |
35 | 0 | result[sizeof(uint64_t) + i] = (low_bytes >> shift_count) & 0xFF; |
36 | 0 | } |
37 | 0 | if (!positive) { |
38 | 0 | result[0] |= 0x80; |
39 | 0 | } |
40 | 0 | } |
41 | | |
42 | | class FixedDecimalStatistics : public ColumnWriterStatistics { |
43 | | public: |
44 | 0 | FixedDecimalStatistics() : min(NumericLimits<hugeint_t>::Maximum()), max(NumericLimits<hugeint_t>::Minimum()) { |
45 | 0 | } |
46 | | |
47 | | hugeint_t min; |
48 | | hugeint_t max; |
49 | | |
50 | | public: |
51 | 0 | string GetStats(hugeint_t &input) { |
52 | 0 | data_t buffer[16]; |
53 | 0 | WriteParquetDecimal(input, buffer); |
54 | 0 | return string(const_char_ptr_cast(buffer), 16); |
55 | 0 | } |
56 | | |
57 | 0 | bool HasStats() override { |
58 | 0 | return min <= max; |
59 | 0 | } |
60 | | |
61 | 0 | void Update(const hugeint_t &val) { |
62 | 0 | if (LessThan::Operation(val, min)) { |
63 | 0 | min = val; |
64 | 0 | } |
65 | 0 | if (GreaterThan::Operation(val, max)) { |
66 | 0 | max = val; |
67 | 0 | } |
68 | 0 | } |
69 | | |
70 | 0 | string GetMin() override { |
71 | 0 | return GetMinValue(); |
72 | 0 | } |
73 | 0 | string GetMax() override { |
74 | 0 | return GetMaxValue(); |
75 | 0 | } |
76 | 0 | string GetMinValue() override { |
77 | 0 | return HasStats() ? GetStats(min) : string(); |
78 | 0 | } |
79 | 0 | string GetMaxValue() override { |
80 | 0 | return HasStats() ? GetStats(max) : string(); |
81 | 0 | } |
82 | | }; |
83 | | |
84 | | FixedDecimalColumnWriter::FixedDecimalColumnWriter(ParquetWriter &writer, ParquetColumnSchema &&column_schema, |
85 | | vector<Identifier> schema_path_p) |
86 | 0 | : PrimitiveColumnWriter(writer, std::move(column_schema), std::move(schema_path_p)) { |
87 | 0 | } |
88 | | |
89 | 0 | unique_ptr<ColumnWriterStatistics> FixedDecimalColumnWriter::InitializeStatsState() { |
90 | 0 | return make_uniq<FixedDecimalStatistics>(); |
91 | 0 | } |
92 | | |
93 | | void FixedDecimalColumnWriter::WriteVector(WriteStream &temp_writer, ColumnWriterStatistics *stats_p, |
94 | | ColumnWriterPageState *page_state, Vector &input_column, idx_t chunk_start, |
95 | 0 | idx_t chunk_end) { |
96 | 0 | auto &mask = FlatVector::ValidityMutable(input_column); |
97 | 0 | auto *ptr = FlatVector::GetData<hugeint_t>(input_column); |
98 | 0 | auto &stats = stats_p->Cast<FixedDecimalStatistics>(); |
99 | |
|
100 | 0 | data_t temp_buffer[16]; |
101 | 0 | for (idx_t r = chunk_start; r < chunk_end; r++) { |
102 | 0 | if (mask.RowIsValid(r)) { |
103 | 0 | stats.Update(ptr[r]); |
104 | 0 | WriteParquetDecimal(ptr[r], temp_buffer); |
105 | 0 | temp_writer.WriteData(temp_buffer, 16); |
106 | 0 | } |
107 | 0 | } |
108 | 0 | } |
109 | | |
110 | | idx_t FixedDecimalColumnWriter::GetRowSize(const Vector &vector, const idx_t index, |
111 | 0 | const PrimitiveColumnWriterState &state) const { |
112 | 0 | return sizeof(hugeint_t); |
113 | 0 | } |
114 | | |
115 | | } // namespace duckdb |