/src/duckdb/src/optimizer/statistics/expression/propagate_cast.cpp
Line | Count | Source |
1 | | #include "duckdb/optimizer/statistics_propagator.hpp" |
2 | | #include "duckdb/planner/expression/bound_cast_expression.hpp" |
3 | | |
4 | | namespace duckdb { |
5 | | |
6 | | static unique_ptr<BaseStatistics> StatisticsOperationsNumericNumericCast(const BaseStatistics &input, |
7 | 3.55k | const LogicalType &target) { |
8 | 3.55k | if (!NumericStats::HasMinMax(input)) { |
9 | 454 | return nullptr; |
10 | 454 | } |
11 | 3.09k | Value min = NumericStats::Min(input); |
12 | 3.09k | Value max = NumericStats::Max(input); |
13 | 3.09k | if (!min.DefaultTryCastAs(target) || !max.DefaultTryCastAs(target)) { |
14 | | // overflow in cast: bailout |
15 | 1.11k | return nullptr; |
16 | 1.11k | } |
17 | 1.98k | auto result = NumericStats::CreateEmpty(target); |
18 | 1.98k | result.CopyBase(input); |
19 | 1.98k | NumericStats::SetMin(result, min); |
20 | 1.98k | NumericStats::SetMax(result, max); |
21 | 1.98k | return result.ToUnique(); |
22 | 3.09k | } |
23 | | |
24 | 3.92k | bool StatisticsPropagator::CanPropagateCast(const LogicalType &source, const LogicalType &target) { |
25 | 3.92k | if (source == target) { |
26 | 0 | return true; |
27 | 0 | } |
28 | | // we can only propagate numeric -> numeric |
29 | 3.92k | switch (source.InternalType()) { |
30 | 0 | case PhysicalType::INT8: |
31 | 65 | case PhysicalType::INT16: |
32 | 1.97k | case PhysicalType::INT32: |
33 | 3.01k | case PhysicalType::INT64: |
34 | 3.57k | case PhysicalType::INT128: |
35 | 3.57k | case PhysicalType::FLOAT: |
36 | 3.58k | case PhysicalType::DOUBLE: |
37 | 3.58k | break; |
38 | 346 | default: |
39 | 346 | return false; |
40 | 3.92k | } |
41 | 3.58k | switch (target.InternalType()) { |
42 | 0 | case PhysicalType::INT8: |
43 | 0 | case PhysicalType::INT16: |
44 | 0 | case PhysicalType::INT32: |
45 | 843 | case PhysicalType::INT64: |
46 | 1.53k | case PhysicalType::INT128: |
47 | 1.53k | case PhysicalType::FLOAT: |
48 | 3.55k | case PhysicalType::DOUBLE: |
49 | 3.55k | break; |
50 | 27 | default: |
51 | 27 | return false; |
52 | 3.58k | } |
53 | | // for time/timestamps/dates - there are various limitations on what we can propagate |
54 | | // Downcasting timestamps to times is not a truncation operation |
55 | 3.55k | switch (target.id()) { |
56 | 0 | case LogicalTypeId::TIME: { |
57 | 0 | switch (source.id()) { |
58 | 0 | case LogicalTypeId::TIMESTAMP: |
59 | 0 | case LogicalTypeId::TIMESTAMP_SEC: |
60 | 0 | case LogicalTypeId::TIMESTAMP_MS: |
61 | 0 | case LogicalTypeId::TIMESTAMP_NS: |
62 | 0 | case LogicalTypeId::TIMESTAMP_TZ: |
63 | 0 | return false; |
64 | 0 | default: |
65 | 0 | break; |
66 | 0 | } |
67 | 0 | break; |
68 | 0 | } |
69 | | // FIXME: perform actual stats propagation for these casts |
70 | 0 | case LogicalTypeId::TIMESTAMP: |
71 | 0 | case LogicalTypeId::TIMESTAMP_TZ: { |
72 | 0 | const bool to_timestamp = target.id() == LogicalTypeId::TIMESTAMP; |
73 | 0 | const bool to_timestamp_tz = target.id() == LogicalTypeId::TIMESTAMP_TZ; |
74 | | // Casting to timestamp[_tz] (us) from a different unit can not re-use stats |
75 | 0 | switch (source.id()) { |
76 | 0 | case LogicalTypeId::TIMESTAMP_NS: |
77 | 0 | case LogicalTypeId::TIMESTAMP_MS: |
78 | 0 | case LogicalTypeId::TIMESTAMP_SEC: |
79 | 0 | return false; |
80 | 0 | case LogicalTypeId::TIMESTAMP: { |
81 | 0 | if (to_timestamp_tz) { |
82 | | // Both use INT64 physical type, but should not be treated equal |
83 | 0 | return false; |
84 | 0 | } |
85 | 0 | break; |
86 | 0 | } |
87 | 0 | case LogicalTypeId::TIMESTAMP_TZ: { |
88 | 0 | if (to_timestamp) { |
89 | | // Both use INT64 physical type, but should not be treated equal |
90 | 0 | return false; |
91 | 0 | } |
92 | 0 | break; |
93 | 0 | } |
94 | 0 | default: |
95 | 0 | break; |
96 | 0 | } |
97 | 0 | break; |
98 | 0 | } |
99 | 0 | case LogicalTypeId::TIMESTAMP_NS: { |
100 | | // Same as above ^ |
101 | 0 | switch (source.id()) { |
102 | 0 | case LogicalTypeId::TIMESTAMP: |
103 | 0 | case LogicalTypeId::TIMESTAMP_TZ: |
104 | 0 | case LogicalTypeId::TIMESTAMP_MS: |
105 | 0 | case LogicalTypeId::TIMESTAMP_SEC: |
106 | 0 | return false; |
107 | 0 | default: |
108 | 0 | break; |
109 | 0 | } |
110 | 0 | break; |
111 | 0 | } |
112 | 0 | case LogicalTypeId::TIMESTAMP_MS: { |
113 | | // Same as above ^ |
114 | 0 | switch (source.id()) { |
115 | 0 | case LogicalTypeId::TIMESTAMP: |
116 | 0 | case LogicalTypeId::TIMESTAMP_TZ: |
117 | 0 | case LogicalTypeId::TIMESTAMP_NS: |
118 | 0 | case LogicalTypeId::TIMESTAMP_SEC: |
119 | 0 | return false; |
120 | 0 | default: |
121 | 0 | break; |
122 | 0 | } |
123 | 0 | break; |
124 | 0 | } |
125 | 0 | case LogicalTypeId::TIMESTAMP_SEC: { |
126 | | // Same as above ^ |
127 | 0 | switch (source.id()) { |
128 | 0 | case LogicalTypeId::TIMESTAMP: |
129 | 0 | case LogicalTypeId::TIMESTAMP_TZ: |
130 | 0 | case LogicalTypeId::TIMESTAMP_NS: |
131 | 0 | case LogicalTypeId::TIMESTAMP_MS: |
132 | 0 | return false; |
133 | 0 | default: |
134 | 0 | break; |
135 | 0 | } |
136 | 0 | break; |
137 | 0 | } |
138 | 3.55k | default: |
139 | 3.55k | break; |
140 | 3.55k | } |
141 | | // we can propagate! |
142 | 3.55k | return true; |
143 | 3.55k | } |
144 | | |
145 | | unique_ptr<BaseStatistics> StatisticsPropagator::TryPropagateCast(BaseStatistics &stats, const LogicalType &source, |
146 | 3.92k | const LogicalType &target) { |
147 | 3.92k | if (!CanPropagateCast(source, target)) { |
148 | 373 | return nullptr; |
149 | 373 | } |
150 | 3.55k | return StatisticsOperationsNumericNumericCast(stats, target); |
151 | 3.92k | } |
152 | | |
153 | | unique_ptr<BaseStatistics> StatisticsPropagator::PropagateExpression(BoundCastExpression &cast, |
154 | 6.58k | unique_ptr<Expression> &expr_ptr) { |
155 | 6.58k | auto child_stats = PropagateExpression(cast.child); |
156 | 6.58k | if (!child_stats) { |
157 | 2.66k | return nullptr; |
158 | 2.66k | } |
159 | 3.92k | auto result_stats = TryPropagateCast(*child_stats, cast.child->return_type, cast.return_type); |
160 | 3.92k | if (cast.try_cast && result_stats) { |
161 | 0 | result_stats->Set(StatsInfo::CAN_HAVE_NULL_VALUES); |
162 | 0 | } |
163 | 3.92k | return result_stats; |
164 | 6.58k | } |
165 | | |
166 | | } // namespace duckdb |