/src/serenity/Userland/Libraries/LibWeb/Infra/Base64.cpp
Line | Count | Source |
1 | | /* |
2 | | * Copyright (c) 2022-2023, the SerenityOS developers. |
3 | | * |
4 | | * SPDX-License-Identifier: BSD-2-Clause |
5 | | */ |
6 | | |
7 | | #include <AK/Base64.h> |
8 | | #include <AK/ByteBuffer.h> |
9 | | #include <AK/CharacterTypes.h> |
10 | | #include <AK/Error.h> |
11 | | #include <AK/StringBuilder.h> |
12 | | #include <AK/StringView.h> |
13 | | #include <AK/Vector.h> |
14 | | #include <LibWeb/Infra/Base64.h> |
15 | | #include <LibWeb/Infra/CharacterTypes.h> |
16 | | |
17 | | namespace Web::Infra { |
18 | | |
19 | | // https://infra.spec.whatwg.org/#forgiving-base64 |
20 | | ErrorOr<ByteBuffer> decode_forgiving_base64(StringView input) |
21 | 0 | { |
22 | | // 1. Remove all ASCII whitespace from data. |
23 | | // FIXME: It is possible to avoid copying input here, it's just a bit tricky to remove the equal signs |
24 | 0 | StringBuilder builder; |
25 | 0 | for (auto character : input) { |
26 | 0 | if (!is_ascii_whitespace(character)) |
27 | 0 | TRY(builder.try_append(character)); |
28 | 0 | } |
29 | 0 | auto data = builder.string_view(); |
30 | | |
31 | | // 2. If data’s code point length divides by 4 leaving no remainder, then: |
32 | 0 | if (data.length() % 4 == 0) { |
33 | | // If data ends with one or two U+003D (=) code points, then remove them from data. |
34 | 0 | if (data.ends_with("=="sv)) |
35 | 0 | data = data.substring_view(0, data.length() - 2); |
36 | 0 | else if (data.ends_with('=')) |
37 | 0 | data = data.substring_view(0, data.length() - 1); |
38 | 0 | } |
39 | | |
40 | | // 3. If data’s code point length divides by 4 leaving a remainder of 1, then return failure. |
41 | 0 | if (data.length() % 4 == 1) |
42 | 0 | return Error::from_string_literal("Invalid input length in forgiving base64 decode"); |
43 | | |
44 | | // 4. If data contains a code point that is not one of |
45 | | // U+002B (+), U+002F (/), ASCII alphanumeric |
46 | | // then return failure. |
47 | 0 | for (auto point : data) { |
48 | 0 | if (point != '+' && point != '/' && !is_ascii_alphanumeric(point)) |
49 | 0 | return Error::from_string_literal("Invalid character in forgiving base64 decode"); |
50 | 0 | } |
51 | | |
52 | | // 5. Let output be an empty byte sequence. |
53 | | // 6. Let buffer be an empty buffer that can have bits appended to it. |
54 | 0 | Vector<u8> output; |
55 | 0 | u32 buffer = 0; |
56 | 0 | auto accumulated_bits = 0; |
57 | |
|
58 | 0 | auto add_to_buffer = [&](u8 number) { |
59 | 0 | VERIFY(number < 64); |
60 | 0 | u32 buffer_mask = number; |
61 | |
|
62 | 0 | if (accumulated_bits == 0) |
63 | 0 | buffer_mask <<= 18; |
64 | 0 | else if (accumulated_bits == 6) |
65 | 0 | buffer_mask <<= 12; |
66 | 0 | else if (accumulated_bits == 12) |
67 | 0 | buffer_mask <<= 6; |
68 | 0 | else if (accumulated_bits == 18) |
69 | 0 | buffer_mask <<= 0; |
70 | |
|
71 | 0 | buffer |= buffer_mask; |
72 | |
|
73 | 0 | accumulated_bits += 6; |
74 | 0 | }; |
75 | |
|
76 | 0 | auto append_bytes = [&]() { |
77 | 0 | output.append(static_cast<u8>((buffer & 0xff0000) >> 16)); |
78 | 0 | output.append(static_cast<u8>((buffer & 0xff00) >> 8)); |
79 | 0 | output.append(static_cast<u8>(buffer & 0xff)); |
80 | |
|
81 | 0 | buffer = 0; |
82 | 0 | accumulated_bits = 0; |
83 | 0 | }; |
84 | |
|
85 | 0 | auto alphabet_lookup_table = AK::base64_lookup_table(); |
86 | | |
87 | | // 7. Let position be a position variable for data, initially pointing at the start of data. |
88 | | // 8. While position does not point past the end of data: |
89 | 0 | for (auto point : data) { |
90 | | // 1. Find the code point pointed to by position in the second column of Table 1: The Base 64 Alphabet of RFC 4648. |
91 | | // Let n be the number given in the first cell of the same row. [RFC4648] |
92 | 0 | auto n = alphabet_lookup_table[point]; |
93 | 0 | VERIFY(n >= 0); |
94 | | |
95 | | // 2. Append the six bits corresponding to n, most significant bit first, to buffer. |
96 | 0 | add_to_buffer(static_cast<u8>(n)); |
97 | | |
98 | | // 3. buffer has accumulated 24 bits, |
99 | 0 | if (accumulated_bits == 24) { |
100 | | // interpret them as three 8-bit big-endian numbers. |
101 | | // Append three bytes with values equal to those numbers to output, in the same order, and then empty buffer |
102 | 0 | append_bytes(); |
103 | 0 | } |
104 | 0 | } |
105 | | |
106 | | // 9. If buffer is not empty, it contains either 12 or 18 bits. |
107 | 0 | VERIFY(accumulated_bits == 0 || accumulated_bits == 12 || accumulated_bits == 18); |
108 | | |
109 | | // If it contains 12 bits, then discard the last four and interpret the remaining eight as an 8-bit big-endian number. |
110 | 0 | if (accumulated_bits == 12) |
111 | 0 | output.append(static_cast<u8>((buffer & 0xff0000) >> 16)); |
112 | | |
113 | | // If it contains 18 bits, then discard the last two and interpret the remaining 16 as two 8-bit big-endian numbers. |
114 | | // Append the one or two bytes with values equal to those one or two numbers to output, in the same order. |
115 | 0 | if (accumulated_bits == 18) { |
116 | 0 | output.append(static_cast<u8>((buffer & 0xff0000) >> 16)); |
117 | 0 | output.append(static_cast<u8>((buffer & 0xff00) >> 8)); |
118 | 0 | } |
119 | |
|
120 | 0 | return ByteBuffer::copy(output); |
121 | 0 | } |
122 | | |
123 | | } |