Coverage Report

Created: 2025-10-28 06:58

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/zlib-ng/arch/generic/crc32_braid_c.c
Line
Count
Source
1
/* crc32_braid.c -- compute the CRC-32 of a data stream
2
 * Copyright (C) 1995-2022 Mark Adler
3
 * For conditions of distribution and use, see copyright notice in zlib.h
4
 *
5
 * This interleaved implementation of a CRC makes use of pipelined multiple
6
 * arithmetic-logic units, commonly found in modern CPU cores. It is due to
7
 * Kadatch and Jenkins (2010). See doc/crc-doc.1.0.pdf in this distribution.
8
 */
9
10
#include "zbuild.h"
11
#include "crc32_braid_p.h"
12
#include "crc32_braid_tbl.h"
13
14
/*
15
  A CRC of a message is computed on BRAID_N braids of words in the message, where
16
  each word consists of BRAID_W bytes (4 or 8). If BRAID_N is 3, for example, then
17
  three running sparse CRCs are calculated respectively on each braid, at these
18
  indices in the array of words: 0, 3, 6, ..., 1, 4, 7, ..., and 2, 5, 8, ...
19
  This is done starting at a word boundary, and continues until as many blocks of
20
  BRAID_N * BRAID_W bytes as are available have been processed. The results are
21
  combined into a single CRC at the end. For this code, BRAID_N must be in the
22
  range 1..6 and BRAID_W must be 4 or 8. The upper limit on BRAID_N can be increased
23
  if desired by adding more #if blocks, extending the patterns apparent in the code.
24
  In addition, crc32 tables would need to be regenerated, if the maximum BRAID_N
25
  value is increased.
26
27
  BRAID_N and BRAID_W are chosen empirically by benchmarking the execution time
28
  on a given processor. The choices for BRAID_N and BRAID_W below were based on
29
  testing on Intel Kaby Lake i7, AMD Ryzen 7, ARM Cortex-A57, Sparc64-VII, PowerPC
30
  POWER9, and MIPS64 Octeon II processors.
31
  The Intel, AMD, and ARM processors were all fastest with BRAID_N=5, BRAID_W=8.
32
  The Sparc, PowerPC, and MIPS64 were all fastest at BRAID_N=5, BRAID_W=4.
33
  They were all tested with either gcc or clang, all using the -O3 optimization
34
  level. Your mileage may vary.
35
*/
36
37
/* ========================================================================= */
38
#ifdef BRAID_W
39
/*
40
  Return the CRC of the BRAID_W bytes in the word_t data, taking the
41
  least-significant byte of the word as the first byte of data, without any pre
42
  or post conditioning. This is used to combine the CRCs of each braid.
43
 */
44
#  if BYTE_ORDER == LITTLE_ENDIAN
45
0
static uint32_t crc_word(z_word_t data) {
46
0
    int k;
47
0
    for (k = 0; k < BRAID_W; k++)
48
0
        data = (data >> 8) ^ crc_table[data & 0xff];
49
0
    return (uint32_t)data;
50
0
}
51
#  elif BYTE_ORDER == BIG_ENDIAN
52
static z_word_t crc_word(z_word_t data) {
53
    int k;
54
    for (k = 0; k < BRAID_W; k++)
55
        data = (data << 8) ^
56
            crc_big_table[(data >> ((BRAID_W - 1) << 3)) & 0xff];
57
    return data;
58
}
59
#  endif /* BYTE_ORDER */
60
#endif /* BRAID_W */
61
62
/* ========================================================================= */
63
0
Z_INTERNAL uint32_t crc32_braid_internal(uint32_t c, const uint8_t *buf, size_t len) {
64
65
0
#ifdef BRAID_W
66
    /* If provided enough bytes, do a braided CRC calculation. */
67
0
    if (len >= BRAID_N * BRAID_W + BRAID_W - 1) {
68
0
        size_t blks;
69
0
        z_word_t const *words;
70
0
        int k;
71
72
        /* Compute the CRC up to a z_word_t boundary. */
73
0
        while (len && ((uintptr_t)buf & (BRAID_W - 1)) != 0) {
74
0
            len--;
75
0
            CRC_DO1;
76
0
        }
77
78
        /* Compute the CRC on as many BRAID_N z_word_t blocks as are available. */
79
0
        blks = len / (BRAID_N * BRAID_W);
80
0
        len -= blks * BRAID_N * BRAID_W;
81
0
        words = (z_word_t const *)buf;
82
83
0
        z_word_t crc0, word0, comb;
84
0
#if BRAID_N > 1
85
0
        z_word_t crc1, word1;
86
0
#if BRAID_N > 2
87
0
        z_word_t crc2, word2;
88
0
#if BRAID_N > 3
89
0
        z_word_t crc3, word3;
90
0
#if BRAID_N > 4
91
0
        z_word_t crc4, word4;
92
#if BRAID_N > 5
93
        z_word_t crc5, word5;
94
#endif
95
0
#endif
96
0
#endif
97
0
#endif
98
0
#endif
99
        /* Initialize the CRC for each braid. */
100
0
        crc0 = ZSWAPWORD(c);
101
0
#if BRAID_N > 1
102
0
        crc1 = 0;
103
0
#if BRAID_N > 2
104
0
        crc2 = 0;
105
0
#if BRAID_N > 3
106
0
        crc3 = 0;
107
0
#if BRAID_N > 4
108
0
        crc4 = 0;
109
#if BRAID_N > 5
110
        crc5 = 0;
111
#endif
112
0
#endif
113
0
#endif
114
0
#endif
115
0
#endif
116
        /* Process the first blks-1 blocks, computing the CRCs on each braid independently. */
117
0
        while (--blks) {
118
            /* Load the word for each braid into registers. */
119
0
            word0 = crc0 ^ words[0];
120
0
#if BRAID_N > 1
121
0
            word1 = crc1 ^ words[1];
122
0
#if BRAID_N > 2
123
0
            word2 = crc2 ^ words[2];
124
0
#if BRAID_N > 3
125
0
            word3 = crc3 ^ words[3];
126
0
#if BRAID_N > 4
127
0
            word4 = crc4 ^ words[4];
128
#if BRAID_N > 5
129
            word5 = crc5 ^ words[5];
130
#endif
131
0
#endif
132
0
#endif
133
0
#endif
134
0
#endif
135
0
            words += BRAID_N;
136
137
            /* Compute and update the CRC for each word. The loop should get unrolled. */
138
0
            crc0 = BRAID_TABLE[0][word0 & 0xff];
139
0
#if BRAID_N > 1
140
0
            crc1 = BRAID_TABLE[0][word1 & 0xff];
141
0
#if BRAID_N > 2
142
0
            crc2 = BRAID_TABLE[0][word2 & 0xff];
143
0
#if BRAID_N > 3
144
0
            crc3 = BRAID_TABLE[0][word3 & 0xff];
145
0
#if BRAID_N > 4
146
0
            crc4 = BRAID_TABLE[0][word4 & 0xff];
147
#if BRAID_N > 5
148
            crc5 = BRAID_TABLE[0][word5 & 0xff];
149
#endif
150
0
#endif
151
0
#endif
152
0
#endif
153
0
#endif
154
0
            for (k = 1; k < BRAID_W; k++) {
155
0
                crc0 ^= BRAID_TABLE[k][(word0 >> (k << 3)) & 0xff];
156
0
#if BRAID_N > 1
157
0
                crc1 ^= BRAID_TABLE[k][(word1 >> (k << 3)) & 0xff];
158
0
#if BRAID_N > 2
159
0
                crc2 ^= BRAID_TABLE[k][(word2 >> (k << 3)) & 0xff];
160
0
#if BRAID_N > 3
161
0
                crc3 ^= BRAID_TABLE[k][(word3 >> (k << 3)) & 0xff];
162
0
#if BRAID_N > 4
163
0
                crc4 ^= BRAID_TABLE[k][(word4 >> (k << 3)) & 0xff];
164
#if BRAID_N > 5
165
                crc5 ^= BRAID_TABLE[k][(word5 >> (k << 3)) & 0xff];
166
#endif
167
0
#endif
168
0
#endif
169
0
#endif
170
0
#endif
171
0
            }
172
0
        }
173
174
        /* Process the last block, combining the CRCs of the BRAID_N braids at the same time. */
175
0
        comb = crc_word(crc0 ^ words[0]);
176
0
#if BRAID_N > 1
177
0
        comb = crc_word(crc1 ^ words[1] ^ comb);
178
0
#if BRAID_N > 2
179
0
        comb = crc_word(crc2 ^ words[2] ^ comb);
180
0
#if BRAID_N > 3
181
0
        comb = crc_word(crc3 ^ words[3] ^ comb);
182
0
#if BRAID_N > 4
183
0
        comb = crc_word(crc4 ^ words[4] ^ comb);
184
#if BRAID_N > 5
185
        comb = crc_word(crc5 ^ words[5] ^ comb);
186
#endif
187
0
#endif
188
0
#endif
189
0
#endif
190
0
#endif
191
0
        words += BRAID_N;
192
0
        Assert(comb <= UINT32_MAX, "comb should fit in uint32_t");
193
0
        c = (uint32_t)ZSWAPWORD(comb);
194
195
        /* Update the pointer to the remaining bytes to process. */
196
0
        buf = (const unsigned char *)words;
197
0
    }
198
199
0
#endif /* BRAID_W */
200
201
    /* Complete the computation of the CRC on any remaining bytes. */
202
0
    while (len >= 8) {
203
0
        len -= 8;
204
0
        CRC_DO8;
205
0
    }
206
0
    while (len) {
207
0
        len--;
208
0
        CRC_DO1;
209
0
    }
210
211
    /* Return the CRC, post-conditioned. */
212
0
    return c;
213
0
}
214
215
0
Z_INTERNAL uint32_t crc32_braid(uint32_t c, const uint8_t *buf, size_t len) {
216
0
    c = (~c) & 0xffffffff;
217
218
0
    c = crc32_braid_internal(c, buf, len);
219
220
    /* Return the CRC, post-conditioned. */
221
0
    return c ^ 0xffffffff;
222
0
}