/src/serenity/Userland/Libraries/LibCrypto/Checksum/CRC32.cpp

Source (jump to first uncovered line)
/*
 * Copyright (c) 2020-2022, the SerenityOS developers.
 *
 * SPDX-License-Identifier: BSD-2-Clause
 */

#include <AK/Array.h>
#include <AK/NumericLimits.h>
#include <AK/Span.h>
#include <AK/Types.h>
#include <LibCrypto/Checksum/CRC32.h>

#ifdef __ARM_ACLE
#    include <arm_acle.h>
#endif

namespace Crypto::Checksum {

#if defined(__ARM_ACLE) && __ARM_ARCH >= 8 && defined(__ARM_FEATURE_CRC32)
void CRC32::update(ReadonlyBytes span)
{
    // FIXME: Does this require runtime checking on rpi?
    //        (Maybe the instruction is present on the rpi4 but not on the rpi3?)

    u8 const* data = span.data();
    size_t size = span.size();

    while (size > 0 && (reinterpret_cast<FlatPtr>(data) & 7) != 0) {
        m_state = __crc32b(m_state, *data);
        ++data;
        --size;
    }

    auto* data64 = reinterpret_cast<u64 const*>(data);
    while (size >= 8) {
        m_state = __crc32d(m_state, *data64);
        ++data64;
        size -= 8;
    }

    data = reinterpret_cast<u8 const*>(data64);
    while (size > 0) {
        m_state = __crc32b(m_state, *data);
        ++data;
        --size;
    }
}

// FIXME: On Intel, use _mm_crc32_u8 / _mm_crc32_u64 if available (SSE 4.2).

#else

static constexpr size_t ethernet_polynomial = 0xEDB88320;

#    if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__

// This implements Intel's slicing-by-8 algorithm. Their original paper is no longer on their website,
// but their source code is still available for reference:
// https://sourceforge.net/projects/slicing-by-8/
static constexpr auto generate_table()
{
    Array<Array<u32, 256>, 8> data {};

    for (u32 i = 0; i < 256; ++i) {
        auto value = i;

        for (size_t j = 0; j < 8; ++j)
            value = (value >> 1) ^ ((value & 1) * ethernet_polynomial);

        data[0][i] = value;
    }

    for (u32 i = 0; i < 256; ++i) {
        for (size_t j = 1; j < 8; ++j)
            data[j][i] = (data[j - 1][i] >> 8) ^ data[0][data[j - 1][i] & 0xff];
    }

    return data;
}

static constexpr auto table = generate_table();

struct AlignmentData {
    ReadonlyBytes misaligned;
    ReadonlyBytes aligned;
};

static AlignmentData split_bytes_for_alignment(ReadonlyBytes data, size_t alignment)
{
    auto address = reinterpret_cast<uintptr_t>(data.data());
    auto offset = alignment - address % alignment;

    if (offset == alignment)
        return { {}, data };

    if (data.size() < alignment)
        return { data, {} };

    return { data.trim(offset), data.slice(offset) };
}

static constexpr u32 single_byte_crc(u32 crc, u8 byte)
{
    return (crc >> 8) ^ table[0][(crc & 0xff) ^ byte];
}

void CRC32::update(ReadonlyBytes data)
{
    // The provided data may not be aligned to a 4-byte boundary, required to reinterpret its address
    // into a u32 in the loop below. So we split the bytes into two segments: the misaligned bytes
    // (which undergo the standard 1-byte-at-a-time algorithm) and remaining aligned bytes.
    auto [misaligned_data, aligned_data] = split_bytes_for_alignment(data, alignof(u32));

    for (auto byte : misaligned_data)
        m_state = single_byte_crc(m_state, byte);

    while (aligned_data.size() >= 8) {
        auto const* segment = reinterpret_cast<u32 const*>(aligned_data.data());
        auto low = *segment ^ m_state;
        auto high = *(++segment);

        m_state = table[0][(high >> 24) & 0xff]
            ^ table[1][(high >> 16) & 0xff]
            ^ table[2][(high >> 8) & 0xff]
            ^ table[3][high & 0xff]
            ^ table[4][(low >> 24) & 0xff]
            ^ table[5][(low >> 16) & 0xff]
            ^ table[6][(low >> 8) & 0xff]
            ^ table[7][low & 0xff];

        aligned_data = aligned_data.slice(8);
    }

    for (auto byte : aligned_data)
        m_state = single_byte_crc(m_state, byte);
}

#    else

// FIXME: Implement the slicing-by-8 algorithm for big endian CPUs.
static constexpr auto generate_table()
{
    Array<u32, 256> data {};
    for (auto i = 0u; i < data.size(); i++) {
        u32 value = i;

        for (auto j = 0; j < 8; j++) {
            if (value & 1) {
                value = ethernet_polynomial ^ (value >> 1);
            } else {
                value = value >> 1;
            }
        }

        data[i] = value;
    }
    return data;
}

static constexpr auto table = generate_table();

void CRC32::update(ReadonlyBytes data)
{
    for (size_t i = 0; i < data.size(); i++) {
        m_state = table[(m_state ^ data.at(i)) & 0xFF] ^ (m_state >> 8);
    }
}

#    endif
#endif

u32 CRC32::digest()
{
    return ~m_state;
}

}

Line	Count	Source (jump to first uncovered line)
1		/*
2		* Copyright (c) 2020-2022, the SerenityOS developers.
3		*
4		* SPDX-License-Identifier: BSD-2-Clause
5		*/
6
7		#include <AK/Array.h>
8		#include <AK/NumericLimits.h>
9		#include <AK/Span.h>
10		#include <AK/Types.h>
11		#include <LibCrypto/Checksum/CRC32.h>
12
13		#ifdef __ARM_ACLE
14		# include <arm_acle.h>
15		#endif
16
17		namespace Crypto::Checksum {
18
19		#if defined(__ARM_ACLE) && __ARM_ARCH >= 8 && defined(__ARM_FEATURE_CRC32)
20		void CRC32::update(ReadonlyBytes span)
21		{
22		// FIXME: Does this require runtime checking on rpi?
23		// (Maybe the instruction is present on the rpi4 but not on the rpi3?)
24
25		u8 const* data = span.data();
26		size_t size = span.size();
27
28		while (size > 0 && (reinterpret_cast<FlatPtr>(data) & 7) != 0) {
29		m_state = __crc32b(m_state, *data);
30		++data;
31		--size;
32		}
33
34		auto* data64 = reinterpret_cast<u64 const*>(data);
35		while (size >= 8) {
36		m_state = __crc32d(m_state, *data64);
37		++data64;
38		size -= 8;
39		}
40
41		data = reinterpret_cast<u8 const*>(data64);
42		while (size > 0) {
43		m_state = __crc32b(m_state, *data);
44		++data;
45		--size;
46		}
47		}
48
49		// FIXME: On Intel, use _mm_crc32_u8 / _mm_crc32_u64 if available (SSE 4.2).
50
51		#else
52
53		static constexpr size_t ethernet_polynomial = 0xEDB88320;
54
55		# if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
56
57		// This implements Intel's slicing-by-8 algorithm. Their original paper is no longer on their website,
58		// but their source code is still available for reference:
59		// https://sourceforge.net/projects/slicing-by-8/
60		static constexpr auto generate_table()
61	0	{
62	0	Array<Array<u32, 256>, 8> data {};
63	0
64	0	for (u32 i = 0; i < 256; ++i) {
65	0	auto value = i;
66	0
67	0	for (size_t j = 0; j < 8; ++j)
68	0	value = (value >> 1) ^ ((value & 1) * ethernet_polynomial);
69	0
70	0	data[0][i] = value;
71	0	}
72	0
73	0	for (u32 i = 0; i < 256; ++i) {
74	0	for (size_t j = 1; j < 8; ++j)
75	0	data[j][i] = (data[j - 1][i] >> 8) ^ data[0][data[j - 1][i] & 0xff];
76	0	}
77	0
78	0	return data;
79	0	}
80
81		static constexpr auto table = generate_table();
82
83		struct AlignmentData {
84		ReadonlyBytes misaligned;
85		ReadonlyBytes aligned;
86		};
87
88		static AlignmentData split_bytes_for_alignment(ReadonlyBytes data, size_t alignment)
89	2.12M	{
90	2.12M	auto address = reinterpret_cast<uintptr_t>(data.data());
91	2.12M	auto offset = alignment - address % alignment;
92
93	2.12M	if (offset == alignment)
94	2.09M	return { {}, data };
95
96	27.5k	if (data.size() < alignment)
97	839	return { data, {} };
98
99	26.7k	return { data.trim(offset), data.slice(offset) };
100	27.5k	}
101
102		static constexpr u32 single_byte_crc(u32 crc, u8 byte)
103	132k	{
104	132k	return (crc >> 8) ^ table[0][(crc & 0xff) ^ byte];
105	132k	}
106
107		void CRC32::update(ReadonlyBytes data)
108	2.12M	{
109		// The provided data may not be aligned to a 4-byte boundary, required to reinterpret its address
110		// into a u32 in the loop below. So we split the bytes into two segments: the misaligned bytes
111		// (which undergo the standard 1-byte-at-a-time algorithm) and remaining aligned bytes.
112	2.12M	auto [misaligned_data, aligned_data] = split_bytes_for_alignment(data, alignof(u32));
113
114	2.12M	for (auto byte : misaligned_data)
115	54.7k	m_state = single_byte_crc(m_state, byte);
116
117	1.09G	while (aligned_data.size() >= 8) {
118	1.09G	auto const* segment = reinterpret_cast<u32 const*>(aligned_data.data());
119	1.09G	auto low = *segment ^ m_state;
120	1.09G	auto high = *(++segment);
121
122	1.09G	m_state = table[0][(high >> 24) & 0xff]
123	1.09G	^ table[1][(high >> 16) & 0xff]
124	1.09G	^ table[2][(high >> 8) & 0xff]
125	1.09G	^ table[3][high & 0xff]
126	1.09G	^ table[4][(low >> 24) & 0xff]
127	1.09G	^ table[5][(low >> 16) & 0xff]
128	1.09G	^ table[6][(low >> 8) & 0xff]
129	1.09G	^ table[7][low & 0xff];
130
131	1.09G	aligned_data = aligned_data.slice(8);
132	1.09G	}
133
134	2.12M	for (auto byte : aligned_data)
135	77.7k	m_state = single_byte_crc(m_state, byte);
136	2.12M	}
137
138		# else
139
140		// FIXME: Implement the slicing-by-8 algorithm for big endian CPUs.
141		static constexpr auto generate_table()
142		{
143		Array<u32, 256> data {};
144		for (auto i = 0u; i < data.size(); i++) {
145		u32 value = i;
146
147		for (auto j = 0; j < 8; j++) {
148		if (value & 1) {
149		value = ethernet_polynomial ^ (value >> 1);
150		} else {
151		value = value >> 1;
152		}
153		}
154
155		data[i] = value;
156		}
157		return data;
158		}
159
160		static constexpr auto table = generate_table();
161
162		void CRC32::update(ReadonlyBytes data)
163		{
164		for (size_t i = 0; i < data.size(); i++) {
165		m_state = table[(m_state ^ data.at(i)) & 0xFF] ^ (m_state >> 8);
166		}
167		}
168
169		# endif
170		#endif
171
172		u32 CRC32::digest()
173	53.3k	{
174	53.3k	return ~m_state;
175	53.3k	}
176
177		}

Coverage Report

Created: 2025-09-05 06:52