/src/mysql-server/libs/mysql/serialization/variable_length_integers.h
Line | Count | Source |
1 | | // Copyright (c) 2023, 2025, Oracle and/or its affiliates. |
2 | | // |
3 | | // This program is free software; you can redistribute it and/or modify |
4 | | // it under the terms of the GNU General Public License, version 2.0, |
5 | | // as published by the Free Software Foundation. |
6 | | // |
7 | | // This program is designed to work with certain software (including |
8 | | // but not limited to OpenSSL) that is licensed under separate terms, |
9 | | // as designated in a particular file or component or in included license |
10 | | // documentation. The authors of MySQL hereby grant you an additional |
11 | | // permission to link the program and your derivative works with the |
12 | | // separately licensed software that they have either included with |
13 | | // the program or referenced in the documentation. |
14 | | // |
15 | | // This program is distributed in the hope that it will be useful, |
16 | | // but WITHOUT ANY WARRANTY; without even the implied warranty of |
17 | | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
18 | | // GNU General Public License, version 2.0, for more details. |
19 | | // |
20 | | // You should have received a copy of the GNU General Public License |
21 | | // along with this program; if not, write to the Free Software |
22 | | // Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
23 | | |
24 | | #ifndef MYSQL_SERIALIZATION_VARIABLE_LENGTH_INTEGERS_H |
25 | | #define MYSQL_SERIALIZATION_VARIABLE_LENGTH_INTEGERS_H |
26 | | |
27 | | /// @file |
28 | | /// Experimental API header |
29 | | /// @details This file contains low-level internal functions used to store/load |
30 | | /// variable-length integers to/from the memory |
31 | | /// |
32 | | /// Please refer to the readme.md of the mysql_serialization library to find |
33 | | /// more information about the format |
34 | | |
35 | | #include <bit> |
36 | | #include <concepts> |
37 | | #include <limits> |
38 | | #include "my_byteorder.h" |
39 | | |
40 | | /// @addtogroup GroupLibsMysqlSerialization |
41 | | /// @{ |
42 | | |
43 | | namespace mysql::serialization::detail { |
44 | | |
45 | | /// @brief Calculates the number of bytes necessary to store data |
46 | | /// @tparam Type Integer type |
47 | | /// @param data The number to be stored into the memory |
48 | | /// @return The number of bytes necessary to store data. |
49 | | size_t get_size_integer_varlen_unsigned( |
50 | 0 | const std::unsigned_integral auto &data) { |
51 | 0 | // @details When bit_width(data) == N, the output buffer uses: |
52 | 0 | // * 1 byte, if N==0; |
53 | 0 | // * 1 + ceil((N-1)/7) bytes, if 1<=N<=63; |
54 | 0 | // * 9 bytes, if N==64. |
55 | 0 | // For the case 1<=N<=63, the function follows a straight line. It |
56 | 0 | // is a little above that line when N==0 and a little below that |
57 | 0 | // line when N==63. Therefore, it can be approximated by a line with |
58 | 0 | // slightly lower slope. The slope 575/4096 gives correct results |
59 | 0 | // for all values between 0 and 64, inclusive, and can be computed |
60 | 0 | // with just 1 multiplication and 1 shift. |
61 | 0 | int bits_in_number = std::bit_width(data); |
62 | 0 | return ((bits_in_number * 575) >> 12) + 1; |
63 | 0 | } |
64 | | |
65 | | /// @copydoc get_size_integer_varlen_unsigned |
66 | | /// @details Version for signed integers |
67 | | size_t get_size_integer_varlen_signed(const std::signed_integral auto &data) { |
68 | | // sign_mask = (data < 0) ? ~0 : 0 |
69 | | auto sign_mask = data >> (sizeof(data) * 8 - 1); |
70 | | return get_size_integer_varlen_unsigned(uint64_t(data ^ sign_mask) << 1); |
71 | | } |
72 | | |
73 | | /// @copydoc get_size_integer_varlen_unsigned |
74 | | /// @details Enabled for unsigned integers |
75 | 0 | size_t get_size_integer_varlen(const std::unsigned_integral auto &data) { |
76 | 0 | return get_size_integer_varlen_unsigned(data); |
77 | 0 | } |
78 | | |
79 | | /// @copydoc get_size_integer_varlen_unsigned |
80 | | /// @details Enabled for signed integers |
81 | | size_t get_size_integer_varlen(const std::signed_integral auto &data) { |
82 | | return get_size_integer_varlen_signed(data); |
83 | | } |
84 | | |
85 | | /// @brief Writes variable-length integer to the stream |
86 | | /// @param[in] stream Encoded data stream |
87 | | /// @param[out] data Integer to write |
88 | | /// @return Number of bytes written to the stream |
89 | | size_t write_varlen_bytes_unsigned(unsigned char *stream, |
90 | | const std::unsigned_integral auto &data) { |
91 | | uint64_t data_cpy = data; |
92 | | int byte_count = get_size_integer_varlen_unsigned(data); |
93 | | stream[0] = ((1 << (byte_count - 1)) - 1) | |
94 | | static_cast<uint8_t>(data_cpy << byte_count); |
95 | | // memcpy won't accept 0 bytes |
96 | | if (byte_count == 1) { |
97 | | return byte_count; |
98 | | } |
99 | | // If byte_count <= 8, shift right by 8 - byte_count. |
100 | | // If byte_count == 9, shift right by 8 - 9 + 1 = 0. |
101 | | data_cpy >>= (8 - byte_count + ((byte_count + 7) >> 4)); |
102 | | // reverse endianess for BIG ENDIAN archs |
103 | | data_cpy = htole64(data_cpy); |
104 | | memcpy(&stream[1], &data_cpy, byte_count - 1); |
105 | | return byte_count; |
106 | | } |
107 | | |
108 | | /// @copydoc write_varlen_bytes_unsigned |
109 | | /// @details Version for signed integers |
110 | | size_t write_varlen_bytes_signed(unsigned char *stream, |
111 | | const std::signed_integral auto &data) { |
112 | | // convert negatives into positive numbers |
113 | | // sign_mask is 0 if data >= 0 and ~0 if data < 0 |
114 | | auto sign_mask = (data >> (sizeof(data) * 8 - 1)); |
115 | | uint64_t data_cpy = (data ^ sign_mask); |
116 | | // insert sign bit as least significant bit |
117 | | data_cpy = (data_cpy << 1) | (sign_mask & 1); |
118 | | return write_varlen_bytes_unsigned(stream, data_cpy); |
119 | | } |
120 | | |
121 | | /// @copydoc write_varlen_bytes_unsigned |
122 | | /// @details Enabled for unsigned integers |
123 | | size_t write_varlen_bytes(unsigned char *stream, |
124 | | const std::unsigned_integral auto &data) { |
125 | | return write_varlen_bytes_unsigned(stream, data); |
126 | | } |
127 | | |
128 | | /// @copydoc write_varlen_bytes_unsigned |
129 | | /// @details Enabled for signed integers |
130 | | size_t write_varlen_bytes(unsigned char *stream, |
131 | | const std::signed_integral auto &data) { |
132 | | return write_varlen_bytes_signed(stream, data); |
133 | | } |
134 | | |
135 | | /// @brief Reads variable-length integer from the stream |
136 | | /// @param[in] stream Encoded data |
137 | | /// @param[in] stream_bytes Number of bytes in the stream |
138 | | /// @param[out] data Result value |
139 | | /// @return Number of bytes read from the stream or 0 on error. Error occurs |
140 | | /// if the stream ends before or in the middle of the encoded numbers. |
141 | | template <typename Type> |
142 | | size_t read_varlen_bytes_unsigned(const unsigned char *stream, |
143 | | std::size_t stream_bytes, Type &data) |
144 | | requires std::unsigned_integral<Type> |
145 | | { |
146 | | if (stream_bytes == 0) { |
147 | | return stream_bytes; |
148 | | } |
149 | | uint8_t first_byte = stream[0]; |
150 | | std::size_t num_bytes = std::countr_one(first_byte) + 1; |
151 | | if (num_bytes > stream_bytes) { |
152 | | return 0; |
153 | | } |
154 | | Type data_cpy = first_byte >> num_bytes; |
155 | | if (num_bytes == 1) { |
156 | | data = data_cpy; |
157 | | return num_bytes; |
158 | | } |
159 | | uint64_t data_tmp = 0; |
160 | | memcpy(&data_tmp, &stream[1], num_bytes - 1); |
161 | | data_tmp = le64toh(data_tmp); |
162 | | // If num_bytes <= 8, shift left by 8 - num_bytes. |
163 | | // If num_bytes == 9, shift left by 8 - 9 + 1 = 0. |
164 | | data_tmp <<= (8 - num_bytes + ((num_bytes + 7) >> 4)); |
165 | | if (data_tmp > std::numeric_limits<Type>::max()) { |
166 | | return 0; |
167 | | } |
168 | | data_cpy |= data_tmp; |
169 | | data = data_cpy; |
170 | | return num_bytes; |
171 | | } |
172 | | |
173 | | /// @copydoc read_varlen_bytes_unsigned |
174 | | template <typename Type> |
175 | | size_t read_varlen_bytes_signed(const unsigned char *stream, |
176 | | std::size_t stream_bytes, Type &data) |
177 | | requires std::signed_integral<Type> |
178 | | { |
179 | | using Type_unsigned = std::make_unsigned_t<Type>; |
180 | | Type_unsigned data_tmp = 0; |
181 | | std::size_t num_bytes = |
182 | | read_varlen_bytes_unsigned(stream, stream_bytes, data_tmp); |
183 | | // 0 if positive, ~0 if negative |
184 | | // static_cast is needed to avoid compilation warning on Windows. |
185 | | Type_unsigned sign_mask = -static_cast<Type>(data_tmp & 1); |
186 | | // the result if it is nonnegative, or -(result + 1) if it is negative. |
187 | | data_tmp = data_tmp >> 1; |
188 | | // the result |
189 | | data_tmp = data_tmp ^ sign_mask; |
190 | | data = Type(data_tmp); |
191 | | return num_bytes; |
192 | | } |
193 | | |
194 | | /// @copydoc read_varlen_bytes_unsigned |
195 | | size_t read_varlen_bytes(const unsigned char *stream, std::size_t stream_bytes, |
196 | | std::unsigned_integral auto &data) { |
197 | | return read_varlen_bytes_unsigned(stream, stream_bytes, data); |
198 | | } |
199 | | |
200 | | /// @copydoc read_varlen_bytes_unsigned |
201 | | size_t read_varlen_bytes(const unsigned char *stream, std::size_t stream_bytes, |
202 | | std::signed_integral auto &data) { |
203 | | return read_varlen_bytes_signed(stream, stream_bytes, data); |
204 | | } |
205 | | |
206 | | } // namespace mysql::serialization::detail |
207 | | |
208 | | /// @} |
209 | | |
210 | | #endif // MYSQL_SERIALIZATION_VARIABLE_LENGTH_INTEGERS_H |