Coverage Report

Created: 2026-05-30 06:23

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/abseil-cpp/absl/strings/internal/escaping.cc
Line
Count
Source
1
// Copyright 2020 The Abseil Authors.
2
//
3
// Licensed under the Apache License, Version 2.0 (the "License");
4
// you may not use this file except in compliance with the License.
5
// You may obtain a copy of the License at
6
//
7
//      https://www.apache.org/licenses/LICENSE-2.0
8
//
9
// Unless required by applicable law or agreed to in writing, software
10
// distributed under the License is distributed on an "AS IS" BASIS,
11
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
// See the License for the specific language governing permissions and
13
// limitations under the License.
14
15
#include "absl/strings/internal/escaping.h"
16
17
#include <limits>
18
19
#include "absl/base/internal/endian.h"
20
#include "absl/base/internal/raw_logging.h"
21
22
namespace absl {
23
ABSL_NAMESPACE_BEGIN
24
namespace strings_internal {
25
26
// The two strings below provide maps from normal 6-bit characters to their
27
// base64-escaped equivalent.
28
// For the inverse case, see kUn(WebSafe)Base64 in the external
29
// escaping.cc.
30
ABSL_CONST_INIT const char kBase64Chars[] =
31
    "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
32
33
ABSL_CONST_INIT const char kWebSafeBase64Chars[] =
34
    "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_";
35
36
0
size_t CalculateBase64EscapedLenInternal(size_t input_len, bool do_padding) {
37
  // Base64 encodes three bytes of input at a time. If the input is not
38
  // divisible by three, we pad as appropriate.
39
  //
40
  // Base64 encodes each three bytes of input into four bytes of output.
41
0
  constexpr size_t kMaxSize = (std::numeric_limits<size_t>::max() - 1) / 4 * 3;
42
0
  ABSL_INTERNAL_CHECK(input_len <= kMaxSize,
43
0
                      "CalculateBase64EscapedLenInternal() overflow");
44
0
  size_t len = (input_len / 3) * 4;
45
46
  // Since all base 64 input is an integral number of octets, only the following
47
  // cases can arise:
48
0
  if (input_len % 3 == 0) {
49
    // (from https://tools.ietf.org/html/rfc3548)
50
    // (1) the final quantum of encoding input is an integral multiple of 24
51
    // bits; here, the final unit of encoded output will be an integral
52
    // multiple of 4 characters with no "=" padding,
53
0
  } else if (input_len % 3 == 1) {
54
    // (from https://tools.ietf.org/html/rfc3548)
55
    // (2) the final quantum of encoding input is exactly 8 bits; here, the
56
    // final unit of encoded output will be two characters followed by two
57
    // "=" padding characters, or
58
0
    len += 2;
59
0
    if (do_padding) {
60
0
      len += 2;
61
0
    }
62
0
  } else {  // (input_len % 3 == 2)
63
    // (from https://tools.ietf.org/html/rfc3548)
64
    // (3) the final quantum of encoding input is exactly 16 bits; here, the
65
    // final unit of encoded output will be three characters followed by one
66
    // "=" padding character.
67
0
    len += 3;
68
0
    if (do_padding) {
69
0
      len += 1;
70
0
    }
71
0
  }
72
73
0
  return len;
74
0
}
75
76
// ----------------------------------------------------------------------
77
//   Take the input in groups of 4 characters and turn each
78
//   character into a code 0 to 63 thus:
79
//           A-Z map to 0 to 25
80
//           a-z map to 26 to 51
81
//           0-9 map to 52 to 61
82
//           +(- for WebSafe) maps to 62
83
//           /(_ for WebSafe) maps to 63
84
//   There will be four numbers, all less than 64 which can be represented
85
//   by a 6 digit binary number (aaaaaa, bbbbbb, cccccc, dddddd respectively).
86
//   Arrange the 6 digit binary numbers into three bytes as such:
87
//   aaaaaabb bbbbcccc ccdddddd
88
//   Equals signs (one or two) are used at the end of the encoded block to
89
//   indicate that the text was not an integer multiple of three bytes long.
90
// ----------------------------------------------------------------------
91
size_t Base64EscapeInternal(const unsigned char* src, size_t szsrc, char* dest,
92
                            size_t szdest, const char* base64,
93
0
                            bool do_padding) {
94
0
  static const char kPad64 = '=';
95
96
0
  if (szsrc * 4 > szdest * 3) return 0;
97
98
0
  char* cur_dest = dest;
99
0
  const unsigned char* cur_src = src;
100
101
0
  char* const limit_dest = dest + szdest;
102
0
  const unsigned char* const limit_src = src + szsrc;
103
104
  // (from https://tools.ietf.org/html/rfc3548)
105
  // Special processing is performed if fewer than 24 bits are available
106
  // at the end of the data being encoded.  A full encoding quantum is
107
  // always completed at the end of a quantity.  When fewer than 24 input
108
  // bits are available in an input group, zero bits are added (on the
109
  // right) to form an integral number of 6-bit groups.
110
  //
111
  // If do_padding is true, padding at the end of the data is performed. This
112
  // output padding uses the '=' character.
113
114
  // Three bytes of data encodes to four characters of cyphertext.
115
  // So we can pump through three-byte chunks atomically.
116
0
  if (szsrc >= 3) {                    // "limit_src - 3" is UB if szsrc < 3.
117
0
    while (cur_src < limit_src - 3) {  // While we have >= 32 bits.
118
0
      uint32_t in = absl::big_endian::Load32(cur_src) >> 8;
119
120
0
      cur_dest[0] = base64[in >> 18];
121
0
      in &= 0x3FFFF;
122
0
      cur_dest[1] = base64[in >> 12];
123
0
      in &= 0xFFF;
124
0
      cur_dest[2] = base64[in >> 6];
125
0
      in &= 0x3F;
126
0
      cur_dest[3] = base64[in];
127
128
0
      cur_dest += 4;
129
0
      cur_src += 3;
130
0
    }
131
0
  }
132
  // To save time, we didn't update szdest or szsrc in the loop.  So do it now.
133
0
  szdest = static_cast<size_t>(limit_dest - cur_dest);
134
0
  szsrc = static_cast<size_t>(limit_src - cur_src);
135
136
  /* now deal with the tail (<=3 bytes) */
137
0
  switch (szsrc) {
138
0
    case 0:
139
      // Nothing left; nothing more to do.
140
0
      break;
141
0
    case 1: {
142
      // One byte left: this encodes to two characters, and (optionally)
143
      // two pad characters to round out the four-character cypherblock.
144
0
      if (szdest < 2) return 0;
145
0
      uint32_t in = cur_src[0];
146
0
      cur_dest[0] = base64[in >> 2];
147
0
      in &= 0x3;
148
0
      cur_dest[1] = base64[in << 4];
149
0
      cur_dest += 2;
150
0
      szdest -= 2;
151
0
      if (do_padding) {
152
0
        if (szdest < 2) return 0;
153
0
        cur_dest[0] = kPad64;
154
0
        cur_dest[1] = kPad64;
155
0
        cur_dest += 2;
156
0
        szdest -= 2;
157
0
      }
158
0
      break;
159
0
    }
160
0
    case 2: {
161
      // Two bytes left: this encodes to three characters, and (optionally)
162
      // one pad character to round out the four-character cypherblock.
163
0
      if (szdest < 3) return 0;
164
0
      uint32_t in = absl::big_endian::Load16(cur_src);
165
0
      cur_dest[0] = base64[in >> 10];
166
0
      in &= 0x3FF;
167
0
      cur_dest[1] = base64[in >> 4];
168
0
      in &= 0x00F;
169
0
      cur_dest[2] = base64[in << 2];
170
0
      cur_dest += 3;
171
0
      szdest -= 3;
172
0
      if (do_padding) {
173
0
        if (szdest < 1) return 0;
174
0
        cur_dest[0] = kPad64;
175
0
        cur_dest += 1;
176
0
        szdest -= 1;
177
0
      }
178
0
      break;
179
0
    }
180
0
    case 3: {
181
      // Three bytes left: same as in the big loop above.  We can't do this in
182
      // the loop because the loop above always reads 4 bytes, and the fourth
183
      // byte is past the end of the input.
184
0
      if (szdest < 4) return 0;
185
0
      uint32_t in =
186
0
          (uint32_t{cur_src[0]} << 16) + absl::big_endian::Load16(cur_src + 1);
187
0
      cur_dest[0] = base64[in >> 18];
188
0
      in &= 0x3FFFF;
189
0
      cur_dest[1] = base64[in >> 12];
190
0
      in &= 0xFFF;
191
0
      cur_dest[2] = base64[in >> 6];
192
0
      in &= 0x3F;
193
0
      cur_dest[3] = base64[in];
194
0
      cur_dest += 4;
195
0
      szdest -= 4;
196
0
      break;
197
0
    }
198
0
    default:
199
      // Should not be reached: blocks of 4 bytes are handled
200
      // in the while loop before this switch statement.
201
0
      ABSL_RAW_LOG(FATAL, "Logic problem? szsrc = %zu", szsrc);
202
0
      break;
203
0
  }
204
0
  return static_cast<size_t>(cur_dest - dest);
205
0
}
206
207
}  // namespace strings_internal
208
ABSL_NAMESPACE_END
209
}  // namespace absl