/src/abseil-cpp/absl/strings/internal/escaping.cc
Line | Count | Source |
1 | | // Copyright 2020 The Abseil Authors. |
2 | | // |
3 | | // Licensed under the Apache License, Version 2.0 (the "License"); |
4 | | // you may not use this file except in compliance with the License. |
5 | | // You may obtain a copy of the License at |
6 | | // |
7 | | // https://www.apache.org/licenses/LICENSE-2.0 |
8 | | // |
9 | | // Unless required by applicable law or agreed to in writing, software |
10 | | // distributed under the License is distributed on an "AS IS" BASIS, |
11 | | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | | // See the License for the specific language governing permissions and |
13 | | // limitations under the License. |
14 | | |
15 | | #include "absl/strings/internal/escaping.h" |
16 | | |
17 | | #include <limits> |
18 | | |
19 | | #include "absl/base/internal/endian.h" |
20 | | #include "absl/base/internal/raw_logging.h" |
21 | | |
22 | | namespace absl { |
23 | | ABSL_NAMESPACE_BEGIN |
24 | | namespace strings_internal { |
25 | | |
26 | | // The two strings below provide maps from normal 6-bit characters to their |
27 | | // base64-escaped equivalent. |
28 | | // For the inverse case, see kUn(WebSafe)Base64 in the external |
29 | | // escaping.cc. |
30 | | ABSL_CONST_INIT const char kBase64Chars[] = |
31 | | "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; |
32 | | |
33 | | ABSL_CONST_INIT const char kWebSafeBase64Chars[] = |
34 | | "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_"; |
35 | | |
36 | 0 | size_t CalculateBase64EscapedLenInternal(size_t input_len, bool do_padding) { |
37 | | // Base64 encodes three bytes of input at a time. If the input is not |
38 | | // divisible by three, we pad as appropriate. |
39 | | // |
40 | | // Base64 encodes each three bytes of input into four bytes of output. |
41 | 0 | constexpr size_t kMaxSize = (std::numeric_limits<size_t>::max() - 1) / 4 * 3; |
42 | 0 | ABSL_INTERNAL_CHECK(input_len <= kMaxSize, |
43 | 0 | "CalculateBase64EscapedLenInternal() overflow"); |
44 | 0 | size_t len = (input_len / 3) * 4; |
45 | | |
46 | | // Since all base 64 input is an integral number of octets, only the following |
47 | | // cases can arise: |
48 | 0 | if (input_len % 3 == 0) { |
49 | | // (from https://tools.ietf.org/html/rfc3548) |
50 | | // (1) the final quantum of encoding input is an integral multiple of 24 |
51 | | // bits; here, the final unit of encoded output will be an integral |
52 | | // multiple of 4 characters with no "=" padding, |
53 | 0 | } else if (input_len % 3 == 1) { |
54 | | // (from https://tools.ietf.org/html/rfc3548) |
55 | | // (2) the final quantum of encoding input is exactly 8 bits; here, the |
56 | | // final unit of encoded output will be two characters followed by two |
57 | | // "=" padding characters, or |
58 | 0 | len += 2; |
59 | 0 | if (do_padding) { |
60 | 0 | len += 2; |
61 | 0 | } |
62 | 0 | } else { // (input_len % 3 == 2) |
63 | | // (from https://tools.ietf.org/html/rfc3548) |
64 | | // (3) the final quantum of encoding input is exactly 16 bits; here, the |
65 | | // final unit of encoded output will be three characters followed by one |
66 | | // "=" padding character. |
67 | 0 | len += 3; |
68 | 0 | if (do_padding) { |
69 | 0 | len += 1; |
70 | 0 | } |
71 | 0 | } |
72 | |
|
73 | 0 | return len; |
74 | 0 | } |
75 | | |
76 | | // ---------------------------------------------------------------------- |
77 | | // Take the input in groups of 4 characters and turn each |
78 | | // character into a code 0 to 63 thus: |
79 | | // A-Z map to 0 to 25 |
80 | | // a-z map to 26 to 51 |
81 | | // 0-9 map to 52 to 61 |
82 | | // +(- for WebSafe) maps to 62 |
83 | | // /(_ for WebSafe) maps to 63 |
84 | | // There will be four numbers, all less than 64 which can be represented |
85 | | // by a 6 digit binary number (aaaaaa, bbbbbb, cccccc, dddddd respectively). |
86 | | // Arrange the 6 digit binary numbers into three bytes as such: |
87 | | // aaaaaabb bbbbcccc ccdddddd |
88 | | // Equals signs (one or two) are used at the end of the encoded block to |
89 | | // indicate that the text was not an integer multiple of three bytes long. |
90 | | // ---------------------------------------------------------------------- |
91 | | size_t Base64EscapeInternal(const unsigned char* src, size_t szsrc, char* dest, |
92 | | size_t szdest, const char* base64, |
93 | 0 | bool do_padding) { |
94 | 0 | static const char kPad64 = '='; |
95 | |
|
96 | 0 | if (szsrc * 4 > szdest * 3) return 0; |
97 | | |
98 | 0 | char* cur_dest = dest; |
99 | 0 | const unsigned char* cur_src = src; |
100 | |
|
101 | 0 | char* const limit_dest = dest + szdest; |
102 | 0 | const unsigned char* const limit_src = src + szsrc; |
103 | | |
104 | | // (from https://tools.ietf.org/html/rfc3548) |
105 | | // Special processing is performed if fewer than 24 bits are available |
106 | | // at the end of the data being encoded. A full encoding quantum is |
107 | | // always completed at the end of a quantity. When fewer than 24 input |
108 | | // bits are available in an input group, zero bits are added (on the |
109 | | // right) to form an integral number of 6-bit groups. |
110 | | // |
111 | | // If do_padding is true, padding at the end of the data is performed. This |
112 | | // output padding uses the '=' character. |
113 | | |
114 | | // Three bytes of data encodes to four characters of cyphertext. |
115 | | // So we can pump through three-byte chunks atomically. |
116 | 0 | if (szsrc >= 3) { // "limit_src - 3" is UB if szsrc < 3. |
117 | 0 | while (cur_src < limit_src - 3) { // While we have >= 32 bits. |
118 | 0 | uint32_t in = absl::big_endian::Load32(cur_src) >> 8; |
119 | |
|
120 | 0 | cur_dest[0] = base64[in >> 18]; |
121 | 0 | in &= 0x3FFFF; |
122 | 0 | cur_dest[1] = base64[in >> 12]; |
123 | 0 | in &= 0xFFF; |
124 | 0 | cur_dest[2] = base64[in >> 6]; |
125 | 0 | in &= 0x3F; |
126 | 0 | cur_dest[3] = base64[in]; |
127 | |
|
128 | 0 | cur_dest += 4; |
129 | 0 | cur_src += 3; |
130 | 0 | } |
131 | 0 | } |
132 | | // To save time, we didn't update szdest or szsrc in the loop. So do it now. |
133 | 0 | szdest = static_cast<size_t>(limit_dest - cur_dest); |
134 | 0 | szsrc = static_cast<size_t>(limit_src - cur_src); |
135 | | |
136 | | /* now deal with the tail (<=3 bytes) */ |
137 | 0 | switch (szsrc) { |
138 | 0 | case 0: |
139 | | // Nothing left; nothing more to do. |
140 | 0 | break; |
141 | 0 | case 1: { |
142 | | // One byte left: this encodes to two characters, and (optionally) |
143 | | // two pad characters to round out the four-character cypherblock. |
144 | 0 | if (szdest < 2) return 0; |
145 | 0 | uint32_t in = cur_src[0]; |
146 | 0 | cur_dest[0] = base64[in >> 2]; |
147 | 0 | in &= 0x3; |
148 | 0 | cur_dest[1] = base64[in << 4]; |
149 | 0 | cur_dest += 2; |
150 | 0 | szdest -= 2; |
151 | 0 | if (do_padding) { |
152 | 0 | if (szdest < 2) return 0; |
153 | 0 | cur_dest[0] = kPad64; |
154 | 0 | cur_dest[1] = kPad64; |
155 | 0 | cur_dest += 2; |
156 | 0 | szdest -= 2; |
157 | 0 | } |
158 | 0 | break; |
159 | 0 | } |
160 | 0 | case 2: { |
161 | | // Two bytes left: this encodes to three characters, and (optionally) |
162 | | // one pad character to round out the four-character cypherblock. |
163 | 0 | if (szdest < 3) return 0; |
164 | 0 | uint32_t in = absl::big_endian::Load16(cur_src); |
165 | 0 | cur_dest[0] = base64[in >> 10]; |
166 | 0 | in &= 0x3FF; |
167 | 0 | cur_dest[1] = base64[in >> 4]; |
168 | 0 | in &= 0x00F; |
169 | 0 | cur_dest[2] = base64[in << 2]; |
170 | 0 | cur_dest += 3; |
171 | 0 | szdest -= 3; |
172 | 0 | if (do_padding) { |
173 | 0 | if (szdest < 1) return 0; |
174 | 0 | cur_dest[0] = kPad64; |
175 | 0 | cur_dest += 1; |
176 | 0 | szdest -= 1; |
177 | 0 | } |
178 | 0 | break; |
179 | 0 | } |
180 | 0 | case 3: { |
181 | | // Three bytes left: same as in the big loop above. We can't do this in |
182 | | // the loop because the loop above always reads 4 bytes, and the fourth |
183 | | // byte is past the end of the input. |
184 | 0 | if (szdest < 4) return 0; |
185 | 0 | uint32_t in = |
186 | 0 | (uint32_t{cur_src[0]} << 16) + absl::big_endian::Load16(cur_src + 1); |
187 | 0 | cur_dest[0] = base64[in >> 18]; |
188 | 0 | in &= 0x3FFFF; |
189 | 0 | cur_dest[1] = base64[in >> 12]; |
190 | 0 | in &= 0xFFF; |
191 | 0 | cur_dest[2] = base64[in >> 6]; |
192 | 0 | in &= 0x3F; |
193 | 0 | cur_dest[3] = base64[in]; |
194 | 0 | cur_dest += 4; |
195 | 0 | szdest -= 4; |
196 | 0 | break; |
197 | 0 | } |
198 | 0 | default: |
199 | | // Should not be reached: blocks of 4 bytes are handled |
200 | | // in the while loop before this switch statement. |
201 | 0 | ABSL_RAW_LOG(FATAL, "Logic problem? szsrc = %zu", szsrc); |
202 | 0 | break; |
203 | 0 | } |
204 | 0 | return static_cast<size_t>(cur_dest - dest); |
205 | 0 | } |
206 | | |
207 | | } // namespace strings_internal |
208 | | ABSL_NAMESPACE_END |
209 | | } // namespace absl |