Coverage Report

Created: 2025-11-16 07:46

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/serenity/Userland/Libraries/LibWeb/Infra/Base64.cpp
Line
Count
Source
1
/*
2
 * Copyright (c) 2022-2023, the SerenityOS developers.
3
 *
4
 * SPDX-License-Identifier: BSD-2-Clause
5
 */
6
7
#include <AK/Base64.h>
8
#include <AK/ByteBuffer.h>
9
#include <AK/CharacterTypes.h>
10
#include <AK/Error.h>
11
#include <AK/StringBuilder.h>
12
#include <AK/StringView.h>
13
#include <AK/Vector.h>
14
#include <LibWeb/Infra/Base64.h>
15
#include <LibWeb/Infra/CharacterTypes.h>
16
17
namespace Web::Infra {
18
19
// https://infra.spec.whatwg.org/#forgiving-base64
20
ErrorOr<ByteBuffer> decode_forgiving_base64(StringView input)
21
0
{
22
    // 1. Remove all ASCII whitespace from data.
23
    // FIXME: It is possible to avoid copying input here, it's just a bit tricky to remove the equal signs
24
0
    StringBuilder builder;
25
0
    for (auto character : input) {
26
0
        if (!is_ascii_whitespace(character))
27
0
            TRY(builder.try_append(character));
28
0
    }
29
0
    auto data = builder.string_view();
30
31
    // 2. If data’s code point length divides by 4 leaving no remainder, then:
32
0
    if (data.length() % 4 == 0) {
33
        // If data ends with one or two U+003D (=) code points, then remove them from data.
34
0
        if (data.ends_with("=="sv))
35
0
            data = data.substring_view(0, data.length() - 2);
36
0
        else if (data.ends_with('='))
37
0
            data = data.substring_view(0, data.length() - 1);
38
0
    }
39
40
    // 3. If data’s code point length divides by 4 leaving a remainder of 1, then return failure.
41
0
    if (data.length() % 4 == 1)
42
0
        return Error::from_string_literal("Invalid input length in forgiving base64 decode");
43
44
    // 4. If data contains a code point that is not one of
45
    //     U+002B (+), U+002F (/), ASCII alphanumeric
46
    // then return failure.
47
0
    for (auto point : data) {
48
0
        if (point != '+' && point != '/' && !is_ascii_alphanumeric(point))
49
0
            return Error::from_string_literal("Invalid character in forgiving base64 decode");
50
0
    }
51
52
    // 5. Let output be an empty byte sequence.
53
    // 6. Let buffer be an empty buffer that can have bits appended to it.
54
0
    Vector<u8> output;
55
0
    u32 buffer = 0;
56
0
    auto accumulated_bits = 0;
57
58
0
    auto add_to_buffer = [&](u8 number) {
59
0
        VERIFY(number < 64);
60
0
        u32 buffer_mask = number;
61
62
0
        if (accumulated_bits == 0)
63
0
            buffer_mask <<= 18;
64
0
        else if (accumulated_bits == 6)
65
0
            buffer_mask <<= 12;
66
0
        else if (accumulated_bits == 12)
67
0
            buffer_mask <<= 6;
68
0
        else if (accumulated_bits == 18)
69
0
            buffer_mask <<= 0;
70
71
0
        buffer |= buffer_mask;
72
73
0
        accumulated_bits += 6;
74
0
    };
75
76
0
    auto append_bytes = [&]() {
77
0
        output.append(static_cast<u8>((buffer & 0xff0000) >> 16));
78
0
        output.append(static_cast<u8>((buffer & 0xff00) >> 8));
79
0
        output.append(static_cast<u8>(buffer & 0xff));
80
81
0
        buffer = 0;
82
0
        accumulated_bits = 0;
83
0
    };
84
85
0
    auto alphabet_lookup_table = AK::base64_lookup_table();
86
87
    // 7. Let position be a position variable for data, initially pointing at the start of data.
88
    // 8. While position does not point past the end of data:
89
0
    for (auto point : data) {
90
        // 1. Find the code point pointed to by position in the second column of Table 1: The Base 64 Alphabet of RFC 4648.
91
        //    Let n be the number given in the first cell of the same row. [RFC4648]
92
0
        auto n = alphabet_lookup_table[point];
93
0
        VERIFY(n >= 0);
94
95
        // 2. Append the six bits corresponding to n, most significant bit first, to buffer.
96
0
        add_to_buffer(static_cast<u8>(n));
97
98
        // 3. buffer has accumulated 24 bits,
99
0
        if (accumulated_bits == 24) {
100
            // interpret them as three 8-bit big-endian numbers.
101
            // Append three bytes with values equal to those numbers to output, in the same order, and then empty buffer
102
0
            append_bytes();
103
0
        }
104
0
    }
105
106
    // 9. If buffer is not empty, it contains either 12 or 18 bits.
107
0
    VERIFY(accumulated_bits == 0 || accumulated_bits == 12 || accumulated_bits == 18);
108
109
    // If it contains 12 bits, then discard the last four and interpret the remaining eight as an 8-bit big-endian number.
110
0
    if (accumulated_bits == 12)
111
0
        output.append(static_cast<u8>((buffer & 0xff0000) >> 16));
112
113
    // If it contains 18 bits, then discard the last two and interpret the remaining 16 as two 8-bit big-endian numbers.
114
    // Append the one or two bytes with values equal to those one or two numbers to output, in the same order.
115
0
    if (accumulated_bits == 18) {
116
0
        output.append(static_cast<u8>((buffer & 0xff0000) >> 16));
117
0
        output.append(static_cast<u8>((buffer & 0xff00) >> 8));
118
0
    }
119
120
0
    return ByteBuffer::copy(output);
121
0
}
122
123
}