Coverage Report

Created: 2025-06-24 06:45

/src/binutils-gdb/zlib/crc32.c
Line
Count
Source (jump to first uncovered line)
1
/* crc32.c -- compute the CRC-32 of a data stream
2
 * Copyright (C) 1995-2022 Mark Adler
3
 * For conditions of distribution and use, see copyright notice in zlib.h
4
 *
5
 * This interleaved implementation of a CRC makes use of pipelined multiple
6
 * arithmetic-logic units, commonly found in modern CPU cores. It is due to
7
 * Kadatch and Jenkins (2010). See doc/crc-doc.1.0.pdf in this distribution.
8
 */
9
10
/* @(#) $Id$ */
11
12
/*
13
  Note on the use of DYNAMIC_CRC_TABLE: there is no mutex or semaphore
14
  protection on the static variables used to control the first-use generation
15
  of the crc tables. Therefore, if you #define DYNAMIC_CRC_TABLE, you should
16
  first call get_crc_table() to initialize the tables before allowing more than
17
  one thread to use crc32().
18
19
  MAKECRCH can be #defined to write out crc32.h. A main() routine is also
20
  produced, so that this one source file can be compiled to an executable.
21
 */
22
23
#ifdef MAKECRCH
24
#  include <stdio.h>
25
#  ifndef DYNAMIC_CRC_TABLE
26
#    define DYNAMIC_CRC_TABLE
27
#  endif /* !DYNAMIC_CRC_TABLE */
28
#endif /* MAKECRCH */
29
30
#include "zutil.h"      /* for Z_U4, Z_U8, z_crc_t, and FAR definitions */
31
32
 /*
33
  A CRC of a message is computed on N braids of words in the message, where
34
  each word consists of W bytes (4 or 8). If N is 3, for example, then three
35
  running sparse CRCs are calculated respectively on each braid, at these
36
  indices in the array of words: 0, 3, 6, ..., 1, 4, 7, ..., and 2, 5, 8, ...
37
  This is done starting at a word boundary, and continues until as many blocks
38
  of N * W bytes as are available have been processed. The results are combined
39
  into a single CRC at the end. For this code, N must be in the range 1..6 and
40
  W must be 4 or 8. The upper limit on N can be increased if desired by adding
41
  more #if blocks, extending the patterns apparent in the code. In addition,
42
  crc32.h would need to be regenerated, if the maximum N value is increased.
43
44
  N and W are chosen empirically by benchmarking the execution time on a given
45
  processor. The choices for N and W below were based on testing on Intel Kaby
46
  Lake i7, AMD Ryzen 7, ARM Cortex-A57, Sparc64-VII, PowerPC POWER9, and MIPS64
47
  Octeon II processors. The Intel, AMD, and ARM processors were all fastest
48
  with N=5, W=8. The Sparc, PowerPC, and MIPS64 were all fastest at N=5, W=4.
49
  They were all tested with either gcc or clang, all using the -O3 optimization
50
  level. Your mileage may vary.
51
 */
52
53
/* Define N */
54
#ifdef Z_TESTN
55
#  define N Z_TESTN
56
#else
57
0
#  define N 5
58
#endif
59
#if N < 1 || N > 6
60
#  error N must be in 1..6
61
#endif
62
63
/*
64
  z_crc_t must be at least 32 bits. z_word_t must be at least as long as
65
  z_crc_t. It is assumed here that z_word_t is either 32 bits or 64 bits, and
66
  that bytes are eight bits.
67
 */
68
69
/*
70
  Define W and the associated z_word_t type. If W is not defined, then a
71
  braided calculation is not used, and the associated tables and code are not
72
  compiled.
73
 */
74
#ifdef Z_TESTW
75
#  if Z_TESTW-1 != -1
76
#    define W Z_TESTW
77
#  endif
78
#else
79
#  ifdef MAKECRCH
80
#    define W 8         /* required for MAKECRCH */
81
#  else
82
#    if defined(__x86_64__) || defined(__aarch64__)
83
0
#      define W 8
84
#    else
85
#      define W 4
86
#    endif
87
#  endif
88
#endif
89
#ifdef W
90
#  if W == 8 && defined(Z_U8)
91
     typedef Z_U8 z_word_t;
92
#  elif defined(Z_U4)
93
#    undef W
94
#    define W 4
95
     typedef Z_U4 z_word_t;
96
#  else
97
#    undef W
98
#  endif
99
#endif
100
101
/* Local functions. */
102
local z_crc_t multmodp OF((z_crc_t a, z_crc_t b));
103
local z_crc_t x2nmodp OF((z_off64_t n, unsigned k));
104
105
/* If available, use the ARM processor CRC32 instruction. */
106
#if defined(__aarch64__) && defined(__ARM_FEATURE_CRC32) && W == 8
107
#  define ARMCRC32
108
#endif
109
110
#if defined(W) && (!defined(ARMCRC32) || defined(DYNAMIC_CRC_TABLE))
111
/*
112
  Swap the bytes in a z_word_t to convert between little and big endian. Any
113
  self-respecting compiler will optimize this to a single machine byte-swap
114
  instruction, if one is available. This assumes that word_t is either 32 bits
115
  or 64 bits.
116
 */
117
local z_word_t byte_swap(word)
118
    z_word_t word;
119
0
{
120
0
#  if W == 8
121
0
    return
122
0
        (word & 0xff00000000000000) >> 56 |
123
0
        (word & 0xff000000000000) >> 40 |
124
0
        (word & 0xff0000000000) >> 24 |
125
0
        (word & 0xff00000000) >> 8 |
126
0
        (word & 0xff000000) << 8 |
127
0
        (word & 0xff0000) << 24 |
128
0
        (word & 0xff00) << 40 |
129
0
        (word & 0xff) << 56;
130
#  else   /* W == 4 */
131
    return
132
        (word & 0xff000000) >> 24 |
133
        (word & 0xff0000) >> 8 |
134
        (word & 0xff00) << 8 |
135
        (word & 0xff) << 24;
136
#  endif
137
0
}
138
#endif
139
140
/* CRC polynomial. */
141
0
#define POLY 0xedb88320         /* p(x) reflected, with x^32 implied */
142
143
#ifdef DYNAMIC_CRC_TABLE
144
145
local z_crc_t FAR crc_table[256];
146
local z_crc_t FAR x2n_table[32];
147
local void make_crc_table OF((void));
148
#ifdef W
149
   local z_word_t FAR crc_big_table[256];
150
   local z_crc_t FAR crc_braid_table[W][256];
151
   local z_word_t FAR crc_braid_big_table[W][256];
152
   local void braid OF((z_crc_t [][256], z_word_t [][256], int, int));
153
#endif
154
#ifdef MAKECRCH
155
   local void write_table OF((FILE *, const z_crc_t FAR *, int));
156
   local void write_table32hi OF((FILE *, const z_word_t FAR *, int));
157
   local void write_table64 OF((FILE *, const z_word_t FAR *, int));
158
#endif /* MAKECRCH */
159
160
/*
161
  Define a once() function depending on the availability of atomics. If this is
162
  compiled with DYNAMIC_CRC_TABLE defined, and if CRCs will be computed in
163
  multiple threads, and if atomics are not available, then get_crc_table() must
164
  be called to initialize the tables and must return before any threads are
165
  allowed to compute or combine CRCs.
166
 */
167
168
/* Definition of once functionality. */
169
typedef struct once_s once_t;
170
local void once OF((once_t *, void (*)(void)));
171
172
/* Check for the availability of atomics. */
173
#if defined(__STDC__) && __STDC_VERSION__ >= 201112L && \
174
    !defined(__STDC_NO_ATOMICS__)
175
176
#include <stdatomic.h>
177
178
/* Structure for once(), which must be initialized with ONCE_INIT. */
179
struct once_s {
180
    atomic_flag begun;
181
    atomic_int done;
182
};
183
#define ONCE_INIT {ATOMIC_FLAG_INIT, 0}
184
185
/*
186
  Run the provided init() function exactly once, even if multiple threads
187
  invoke once() at the same time. The state must be a once_t initialized with
188
  ONCE_INIT.
189
 */
190
local void once(state, init)
191
    once_t *state;
192
    void (*init)(void);
193
{
194
    if (!atomic_load(&state->done)) {
195
        if (atomic_flag_test_and_set(&state->begun))
196
            while (!atomic_load(&state->done))
197
                ;
198
        else {
199
            init();
200
            atomic_store(&state->done, 1);
201
        }
202
    }
203
}
204
205
#else   /* no atomics */
206
207
/* Structure for once(), which must be initialized with ONCE_INIT. */
208
struct once_s {
209
    volatile int begun;
210
    volatile int done;
211
};
212
#define ONCE_INIT {0, 0}
213
214
/* Test and set. Alas, not atomic, but tries to minimize the period of
215
   vulnerability. */
216
local int test_and_set OF((int volatile *));
217
local int test_and_set(flag)
218
    int volatile *flag;
219
{
220
    int was;
221
222
    was = *flag;
223
    *flag = 1;
224
    return was;
225
}
226
227
/* Run the provided init() function once. This is not thread-safe. */
228
local void once(state, init)
229
    once_t *state;
230
    void (*init)(void);
231
{
232
    if (!state->done) {
233
        if (test_and_set(&state->begun))
234
            while (!state->done)
235
                ;
236
        else {
237
            init();
238
            state->done = 1;
239
        }
240
    }
241
}
242
243
#endif
244
245
/* State for once(). */
246
local once_t made = ONCE_INIT;
247
248
/*
249
  Generate tables for a byte-wise 32-bit CRC calculation on the polynomial:
250
  x^32+x^26+x^23+x^22+x^16+x^12+x^11+x^10+x^8+x^7+x^5+x^4+x^2+x+1.
251
252
  Polynomials over GF(2) are represented in binary, one bit per coefficient,
253
  with the lowest powers in the most significant bit. Then adding polynomials
254
  is just exclusive-or, and multiplying a polynomial by x is a right shift by
255
  one. If we call the above polynomial p, and represent a byte as the
256
  polynomial q, also with the lowest power in the most significant bit (so the
257
  byte 0xb1 is the polynomial x^7+x^3+x^2+1), then the CRC is (q*x^32) mod p,
258
  where a mod b means the remainder after dividing a by b.
259
260
  This calculation is done using the shift-register method of multiplying and
261
  taking the remainder. The register is initialized to zero, and for each
262
  incoming bit, x^32 is added mod p to the register if the bit is a one (where
263
  x^32 mod p is p+x^32 = x^26+...+1), and the register is multiplied mod p by x
264
  (which is shifting right by one and adding x^32 mod p if the bit shifted out
265
  is a one). We start with the highest power (least significant bit) of q and
266
  repeat for all eight bits of q.
267
268
  The table is simply the CRC of all possible eight bit values. This is all the
269
  information needed to generate CRCs on data a byte at a time for all
270
  combinations of CRC register values and incoming bytes.
271
 */
272
273
local void make_crc_table()
274
{
275
    unsigned i, j, n;
276
    z_crc_t p;
277
278
    /* initialize the CRC of bytes tables */
279
    for (i = 0; i < 256; i++) {
280
        p = i;
281
        for (j = 0; j < 8; j++)
282
            p = p & 1 ? (p >> 1) ^ POLY : p >> 1;
283
        crc_table[i] = p;
284
#ifdef W
285
        crc_big_table[i] = byte_swap(p);
286
#endif
287
    }
288
289
    /* initialize the x^2^n mod p(x) table */
290
    p = (z_crc_t)1 << 30;         /* x^1 */
291
    x2n_table[0] = p;
292
    for (n = 1; n < 32; n++)
293
        x2n_table[n] = p = multmodp(p, p);
294
295
#ifdef W
296
    /* initialize the braiding tables -- needs x2n_table[] */
297
    braid(crc_braid_table, crc_braid_big_table, N, W);
298
#endif
299
300
#ifdef MAKECRCH
301
    {
302
        /*
303
          The crc32.h header file contains tables for both 32-bit and 64-bit
304
          z_word_t's, and so requires a 64-bit type be available. In that case,
305
          z_word_t must be defined to be 64-bits. This code then also generates
306
          and writes out the tables for the case that z_word_t is 32 bits.
307
         */
308
#if !defined(W) || W != 8
309
#  error Need a 64-bit integer type in order to generate crc32.h.
310
#endif
311
        FILE *out;
312
        int k, n;
313
        z_crc_t ltl[8][256];
314
        z_word_t big[8][256];
315
316
        out = fopen("crc32.h", "w");
317
        if (out == NULL) return;
318
319
        /* write out little-endian CRC table to crc32.h */
320
        fprintf(out,
321
            "/* crc32.h -- tables for rapid CRC calculation\n"
322
            " * Generated automatically by crc32.c\n */\n"
323
            "\n"
324
            "local const z_crc_t FAR crc_table[] = {\n"
325
            "    ");
326
        write_table(out, crc_table, 256);
327
        fprintf(out,
328
            "};\n");
329
330
        /* write out big-endian CRC table for 64-bit z_word_t to crc32.h */
331
        fprintf(out,
332
            "\n"
333
            "#ifdef W\n"
334
            "\n"
335
            "#if W == 8\n"
336
            "\n"
337
            "local const z_word_t FAR crc_big_table[] = {\n"
338
            "    ");
339
        write_table64(out, crc_big_table, 256);
340
        fprintf(out,
341
            "};\n");
342
343
        /* write out big-endian CRC table for 32-bit z_word_t to crc32.h */
344
        fprintf(out,
345
            "\n"
346
            "#else /* W == 4 */\n"
347
            "\n"
348
            "local const z_word_t FAR crc_big_table[] = {\n"
349
            "    ");
350
        write_table32hi(out, crc_big_table, 256);
351
        fprintf(out,
352
            "};\n"
353
            "\n"
354
            "#endif\n");
355
356
        /* write out braid tables for each value of N */
357
        for (n = 1; n <= 6; n++) {
358
            fprintf(out,
359
            "\n"
360
            "#if N == %d\n", n);
361
362
            /* compute braid tables for this N and 64-bit word_t */
363
            braid(ltl, big, n, 8);
364
365
            /* write out braid tables for 64-bit z_word_t to crc32.h */
366
            fprintf(out,
367
            "\n"
368
            "#if W == 8\n"
369
            "\n"
370
            "local const z_crc_t FAR crc_braid_table[][256] = {\n");
371
            for (k = 0; k < 8; k++) {
372
                fprintf(out, "   {");
373
                write_table(out, ltl[k], 256);
374
                fprintf(out, "}%s", k < 7 ? ",\n" : "");
375
            }
376
            fprintf(out,
377
            "};\n"
378
            "\n"
379
            "local const z_word_t FAR crc_braid_big_table[][256] = {\n");
380
            for (k = 0; k < 8; k++) {
381
                fprintf(out, "   {");
382
                write_table64(out, big[k], 256);
383
                fprintf(out, "}%s", k < 7 ? ",\n" : "");
384
            }
385
            fprintf(out,
386
            "};\n");
387
388
            /* compute braid tables for this N and 32-bit word_t */
389
            braid(ltl, big, n, 4);
390
391
            /* write out braid tables for 32-bit z_word_t to crc32.h */
392
            fprintf(out,
393
            "\n"
394
            "#else /* W == 4 */\n"
395
            "\n"
396
            "local const z_crc_t FAR crc_braid_table[][256] = {\n");
397
            for (k = 0; k < 4; k++) {
398
                fprintf(out, "   {");
399
                write_table(out, ltl[k], 256);
400
                fprintf(out, "}%s", k < 3 ? ",\n" : "");
401
            }
402
            fprintf(out,
403
            "};\n"
404
            "\n"
405
            "local const z_word_t FAR crc_braid_big_table[][256] = {\n");
406
            for (k = 0; k < 4; k++) {
407
                fprintf(out, "   {");
408
                write_table32hi(out, big[k], 256);
409
                fprintf(out, "}%s", k < 3 ? ",\n" : "");
410
            }
411
            fprintf(out,
412
            "};\n"
413
            "\n"
414
            "#endif\n"
415
            "\n"
416
            "#endif\n");
417
        }
418
        fprintf(out,
419
            "\n"
420
            "#endif\n");
421
422
        /* write out zeros operator table to crc32.h */
423
        fprintf(out,
424
            "\n"
425
            "local const z_crc_t FAR x2n_table[] = {\n"
426
            "    ");
427
        write_table(out, x2n_table, 32);
428
        fprintf(out,
429
            "};\n");
430
        fclose(out);
431
    }
432
#endif /* MAKECRCH */
433
}
434
435
#ifdef MAKECRCH
436
437
/*
438
   Write the 32-bit values in table[0..k-1] to out, five per line in
439
   hexadecimal separated by commas.
440
 */
441
local void write_table(out, table, k)
442
    FILE *out;
443
    const z_crc_t FAR *table;
444
    int k;
445
{
446
    int n;
447
448
    for (n = 0; n < k; n++)
449
        fprintf(out, "%s0x%08lx%s", n == 0 || n % 5 ? "" : "    ",
450
                (unsigned long)(table[n]),
451
                n == k - 1 ? "" : (n % 5 == 4 ? ",\n" : ", "));
452
}
453
454
/*
455
   Write the high 32-bits of each value in table[0..k-1] to out, five per line
456
   in hexadecimal separated by commas.
457
 */
458
local void write_table32hi(out, table, k)
459
FILE *out;
460
const z_word_t FAR *table;
461
int k;
462
{
463
    int n;
464
465
    for (n = 0; n < k; n++)
466
        fprintf(out, "%s0x%08lx%s", n == 0 || n % 5 ? "" : "    ",
467
                (unsigned long)(table[n] >> 32),
468
                n == k - 1 ? "" : (n % 5 == 4 ? ",\n" : ", "));
469
}
470
471
/*
472
  Write the 64-bit values in table[0..k-1] to out, three per line in
473
  hexadecimal separated by commas. This assumes that if there is a 64-bit
474
  type, then there is also a long long integer type, and it is at least 64
475
  bits. If not, then the type cast and format string can be adjusted
476
  accordingly.
477
 */
478
local void write_table64(out, table, k)
479
    FILE *out;
480
    const z_word_t FAR *table;
481
    int k;
482
{
483
    int n;
484
485
    for (n = 0; n < k; n++)
486
        fprintf(out, "%s0x%016llx%s", n == 0 || n % 3 ? "" : "    ",
487
                (unsigned long long)(table[n]),
488
                n == k - 1 ? "" : (n % 3 == 2 ? ",\n" : ", "));
489
}
490
491
/* Actually do the deed. */
492
int main()
493
{
494
    make_crc_table();
495
    return 0;
496
}
497
498
#endif /* MAKECRCH */
499
500
#ifdef W
501
/*
502
  Generate the little and big-endian braid tables for the given n and z_word_t
503
  size w. Each array must have room for w blocks of 256 elements.
504
 */
505
local void braid(ltl, big, n, w)
506
    z_crc_t ltl[][256];
507
    z_word_t big[][256];
508
    int n;
509
    int w;
510
{
511
    int k;
512
    z_crc_t i, p, q;
513
    for (k = 0; k < w; k++) {
514
        p = x2nmodp((n * w + 3 - k) << 3, 0);
515
        ltl[k][0] = 0;
516
        big[w - 1 - k][0] = 0;
517
        for (i = 1; i < 256; i++) {
518
            ltl[k][i] = q = multmodp(i << 24, p);
519
            big[w - 1 - k][i] = byte_swap(q);
520
        }
521
    }
522
}
523
#endif
524
525
#else /* !DYNAMIC_CRC_TABLE */
526
/* ========================================================================
527
 * Tables for byte-wise and braided CRC-32 calculations, and a table of powers
528
 * of x for combining CRC-32s, all made by make_crc_table().
529
 */
530
#include "crc32.h"
531
#endif /* DYNAMIC_CRC_TABLE */
532
533
/* ========================================================================
534
 * Routines used for CRC calculation. Some are also required for the table
535
 * generation above.
536
 */
537
538
/*
539
  Return a(x) multiplied by b(x) modulo p(x), where p(x) is the CRC polynomial,
540
  reflected. For speed, this requires that a not be zero.
541
 */
542
local z_crc_t multmodp(a, b)
543
    z_crc_t a;
544
    z_crc_t b;
545
0
{
546
0
    z_crc_t m, p;
547
548
0
    m = (z_crc_t)1 << 31;
549
0
    p = 0;
550
0
    for (;;) {
551
0
        if (a & m) {
552
0
            p ^= b;
553
0
            if ((a & (m - 1)) == 0)
554
0
                break;
555
0
        }
556
0
        m >>= 1;
557
0
        b = b & 1 ? (b >> 1) ^ POLY : b >> 1;
558
0
    }
559
0
    return p;
560
0
}
561
562
/*
563
  Return x^(n * 2^k) modulo p(x). Requires that x2n_table[] has been
564
  initialized.
565
 */
566
local z_crc_t x2nmodp(n, k)
567
    z_off64_t n;
568
    unsigned k;
569
0
{
570
0
    z_crc_t p;
571
572
0
    p = (z_crc_t)1 << 31;           /* x^0 == 1 */
573
0
    while (n) {
574
0
        if (n & 1)
575
0
            p = multmodp(x2n_table[k & 31], p);
576
0
        n >>= 1;
577
0
        k++;
578
0
    }
579
0
    return p;
580
0
}
581
582
/* =========================================================================
583
 * This function can be used by asm versions of crc32(), and to force the
584
 * generation of the CRC tables in a threaded application.
585
 */
586
const z_crc_t FAR * ZEXPORT get_crc_table()
587
0
{
588
#ifdef DYNAMIC_CRC_TABLE
589
    once(&made, make_crc_table);
590
#endif /* DYNAMIC_CRC_TABLE */
591
0
    return (const z_crc_t FAR *)crc_table;
592
0
}
593
594
/* =========================================================================
595
 * Use ARM machine instructions if available. This will compute the CRC about
596
 * ten times faster than the braided calculation. This code does not check for
597
 * the presence of the CRC instruction at run time. __ARM_FEATURE_CRC32 will
598
 * only be defined if the compilation specifies an ARM processor architecture
599
 * that has the instructions. For example, compiling with -march=armv8.1-a or
600
 * -march=armv8-a+crc, or -march=native if the compile machine has the crc32
601
 * instructions.
602
 */
603
#ifdef ARMCRC32
604
605
/*
606
   Constants empirically determined to maximize speed. These values are from
607
   measurements on a Cortex-A57. Your mileage may vary.
608
 */
609
#define Z_BATCH 3990                /* number of words in a batch */
610
#define Z_BATCH_ZEROS 0xa10d3d0c    /* computed from Z_BATCH = 3990 */
611
#define Z_BATCH_MIN 800             /* fewest words in a final batch */
612
613
unsigned long ZEXPORT crc32_z(crc, buf, len)
614
    unsigned long crc;
615
    const unsigned char FAR *buf;
616
    z_size_t len;
617
{
618
    z_crc_t val;
619
    z_word_t crc1, crc2;
620
    const z_word_t *word;
621
    z_word_t val0, val1, val2;
622
    z_size_t last, last2, i;
623
    z_size_t num;
624
625
    /* Return initial CRC, if requested. */
626
    if (buf == Z_NULL) return 0;
627
628
#ifdef DYNAMIC_CRC_TABLE
629
    once(&made, make_crc_table);
630
#endif /* DYNAMIC_CRC_TABLE */
631
632
    /* Pre-condition the CRC */
633
    crc ^= 0xffffffff;
634
635
    /* Compute the CRC up to a word boundary. */
636
    while (len && ((z_size_t)buf & 7) != 0) {
637
        len--;
638
        val = *buf++;
639
        __asm__ volatile("crc32b %w0, %w0, %w1" : "+r"(crc) : "r"(val));
640
    }
641
642
    /* Prepare to compute the CRC on full 64-bit words word[0..num-1]. */
643
    word = (z_word_t const *)buf;
644
    num = len >> 3;
645
    len &= 7;
646
647
    /* Do three interleaved CRCs to realize the throughput of one crc32x
648
       instruction per cycle. Each CRC is calcuated on Z_BATCH words. The three
649
       CRCs are combined into a single CRC after each set of batches. */
650
    while (num >= 3 * Z_BATCH) {
651
        crc1 = 0;
652
        crc2 = 0;
653
        for (i = 0; i < Z_BATCH; i++) {
654
            val0 = word[i];
655
            val1 = word[i + Z_BATCH];
656
            val2 = word[i + 2 * Z_BATCH];
657
            __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc) : "r"(val0));
658
            __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc1) : "r"(val1));
659
            __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc2) : "r"(val2));
660
        }
661
        word += 3 * Z_BATCH;
662
        num -= 3 * Z_BATCH;
663
        crc = multmodp(Z_BATCH_ZEROS, crc) ^ crc1;
664
        crc = multmodp(Z_BATCH_ZEROS, crc) ^ crc2;
665
    }
666
667
    /* Do one last smaller batch with the remaining words, if there are enough
668
       to pay for the combination of CRCs. */
669
    last = num / 3;
670
    if (last >= Z_BATCH_MIN) {
671
        last2 = last << 1;
672
        crc1 = 0;
673
        crc2 = 0;
674
        for (i = 0; i < last; i++) {
675
            val0 = word[i];
676
            val1 = word[i + last];
677
            val2 = word[i + last2];
678
            __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc) : "r"(val0));
679
            __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc1) : "r"(val1));
680
            __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc2) : "r"(val2));
681
        }
682
        word += 3 * last;
683
        num -= 3 * last;
684
        val = x2nmodp(last, 6);
685
        crc = multmodp(val, crc) ^ crc1;
686
        crc = multmodp(val, crc) ^ crc2;
687
    }
688
689
    /* Compute the CRC on any remaining words. */
690
    for (i = 0; i < num; i++) {
691
        val0 = word[i];
692
        __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc) : "r"(val0));
693
    }
694
    word += num;
695
696
    /* Complete the CRC on any remaining bytes. */
697
    buf = (const unsigned char FAR *)word;
698
    while (len) {
699
        len--;
700
        val = *buf++;
701
        __asm__ volatile("crc32b %w0, %w0, %w1" : "+r"(crc) : "r"(val));
702
    }
703
704
    /* Return the CRC, post-conditioned. */
705
    return crc ^ 0xffffffff;
706
}
707
708
#else
709
710
#ifdef W
711
712
/*
713
  Return the CRC of the W bytes in the word_t data, taking the
714
  least-significant byte of the word as the first byte of data, without any pre
715
  or post conditioning. This is used to combine the CRCs of each braid.
716
 */
717
local z_crc_t crc_word(data)
718
    z_word_t data;
719
0
{
720
0
    int k;
721
0
    for (k = 0; k < W; k++)
722
0
        data = (data >> 8) ^ crc_table[data & 0xff];
723
0
    return (z_crc_t)data;
724
0
}
725
726
local z_word_t crc_word_big(data)
727
    z_word_t data;
728
0
{
729
0
    int k;
730
0
    for (k = 0; k < W; k++)
731
0
        data = (data << 8) ^
732
0
            crc_big_table[(data >> ((W - 1) << 3)) & 0xff];
733
0
    return data;
734
0
}
735
736
#endif
737
738
/* ========================================================================= */
739
unsigned long ZEXPORT crc32_z(crc, buf, len)
740
    unsigned long crc;
741
    const unsigned char FAR *buf;
742
    z_size_t len;
743
0
{
744
    /* Return initial CRC, if requested. */
745
0
    if (buf == Z_NULL) return 0;
746
747
#ifdef DYNAMIC_CRC_TABLE
748
    once(&made, make_crc_table);
749
#endif /* DYNAMIC_CRC_TABLE */
750
751
    /* Pre-condition the CRC */
752
0
    crc ^= 0xffffffff;
753
754
0
#ifdef W
755
756
    /* If provided enough bytes, do a braided CRC calculation. */
757
0
    if (len >= N * W + W - 1) {
758
0
        z_size_t blks;
759
0
        z_word_t const *words;
760
0
        unsigned endian;
761
0
        int k;
762
763
        /* Compute the CRC up to a z_word_t boundary. */
764
0
        while (len && ((z_size_t)buf & (W - 1)) != 0) {
765
0
            len--;
766
0
            crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
767
0
        }
768
769
        /* Compute the CRC on as many N z_word_t blocks as are available. */
770
0
        blks = len / (N * W);
771
0
        len -= blks * N * W;
772
0
        words = (z_word_t const *)buf;
773
774
        /* Do endian check at execution time instead of compile time, since ARM
775
           processors can change the endianess at execution time. If the
776
           compiler knows what the endianess will be, it can optimize out the
777
           check and the unused branch. */
778
0
        endian = 1;
779
0
        if (*(unsigned char *)&endian) {
780
            /* Little endian. */
781
782
0
            z_crc_t crc0;
783
0
            z_word_t word0;
784
0
#if N > 1
785
0
            z_crc_t crc1;
786
0
            z_word_t word1;
787
0
#if N > 2
788
0
            z_crc_t crc2;
789
0
            z_word_t word2;
790
0
#if N > 3
791
0
            z_crc_t crc3;
792
0
            z_word_t word3;
793
0
#if N > 4
794
0
            z_crc_t crc4;
795
0
            z_word_t word4;
796
#if N > 5
797
            z_crc_t crc5;
798
            z_word_t word5;
799
#endif
800
0
#endif
801
0
#endif
802
0
#endif
803
0
#endif
804
805
            /* Initialize the CRC for each braid. */
806
0
            crc0 = crc;
807
0
#if N > 1
808
0
            crc1 = 0;
809
0
#if N > 2
810
0
            crc2 = 0;
811
0
#if N > 3
812
0
            crc3 = 0;
813
0
#if N > 4
814
0
            crc4 = 0;
815
#if N > 5
816
            crc5 = 0;
817
#endif
818
0
#endif
819
0
#endif
820
0
#endif
821
0
#endif
822
823
            /*
824
              Process the first blks-1 blocks, computing the CRCs on each braid
825
              independently.
826
             */
827
0
            while (--blks) {
828
                /* Load the word for each braid into registers. */
829
0
                word0 = crc0 ^ words[0];
830
0
#if N > 1
831
0
                word1 = crc1 ^ words[1];
832
0
#if N > 2
833
0
                word2 = crc2 ^ words[2];
834
0
#if N > 3
835
0
                word3 = crc3 ^ words[3];
836
0
#if N > 4
837
0
                word4 = crc4 ^ words[4];
838
#if N > 5
839
                word5 = crc5 ^ words[5];
840
#endif
841
0
#endif
842
0
#endif
843
0
#endif
844
0
#endif
845
0
                words += N;
846
847
                /* Compute and update the CRC for each word. The loop should
848
                   get unrolled. */
849
0
                crc0 = crc_braid_table[0][word0 & 0xff];
850
0
#if N > 1
851
0
                crc1 = crc_braid_table[0][word1 & 0xff];
852
0
#if N > 2
853
0
                crc2 = crc_braid_table[0][word2 & 0xff];
854
0
#if N > 3
855
0
                crc3 = crc_braid_table[0][word3 & 0xff];
856
0
#if N > 4
857
0
                crc4 = crc_braid_table[0][word4 & 0xff];
858
#if N > 5
859
                crc5 = crc_braid_table[0][word5 & 0xff];
860
#endif
861
0
#endif
862
0
#endif
863
0
#endif
864
0
#endif
865
0
                for (k = 1; k < W; k++) {
866
0
                    crc0 ^= crc_braid_table[k][(word0 >> (k << 3)) & 0xff];
867
0
#if N > 1
868
0
                    crc1 ^= crc_braid_table[k][(word1 >> (k << 3)) & 0xff];
869
0
#if N > 2
870
0
                    crc2 ^= crc_braid_table[k][(word2 >> (k << 3)) & 0xff];
871
0
#if N > 3
872
0
                    crc3 ^= crc_braid_table[k][(word3 >> (k << 3)) & 0xff];
873
0
#if N > 4
874
0
                    crc4 ^= crc_braid_table[k][(word4 >> (k << 3)) & 0xff];
875
#if N > 5
876
                    crc5 ^= crc_braid_table[k][(word5 >> (k << 3)) & 0xff];
877
#endif
878
0
#endif
879
0
#endif
880
0
#endif
881
0
#endif
882
0
                }
883
0
            }
884
885
            /*
886
              Process the last block, combining the CRCs of the N braids at the
887
              same time.
888
             */
889
0
            crc = crc_word(crc0 ^ words[0]);
890
0
#if N > 1
891
0
            crc = crc_word(crc1 ^ words[1] ^ crc);
892
0
#if N > 2
893
0
            crc = crc_word(crc2 ^ words[2] ^ crc);
894
0
#if N > 3
895
0
            crc = crc_word(crc3 ^ words[3] ^ crc);
896
0
#if N > 4
897
0
            crc = crc_word(crc4 ^ words[4] ^ crc);
898
#if N > 5
899
            crc = crc_word(crc5 ^ words[5] ^ crc);
900
#endif
901
0
#endif
902
0
#endif
903
0
#endif
904
0
#endif
905
0
            words += N;
906
0
        }
907
0
        else {
908
            /* Big endian. */
909
910
0
            z_word_t crc0, word0, comb;
911
0
#if N > 1
912
0
            z_word_t crc1, word1;
913
0
#if N > 2
914
0
            z_word_t crc2, word2;
915
0
#if N > 3
916
0
            z_word_t crc3, word3;
917
0
#if N > 4
918
0
            z_word_t crc4, word4;
919
#if N > 5
920
            z_word_t crc5, word5;
921
#endif
922
0
#endif
923
0
#endif
924
0
#endif
925
0
#endif
926
927
            /* Initialize the CRC for each braid. */
928
0
            crc0 = byte_swap(crc);
929
0
#if N > 1
930
0
            crc1 = 0;
931
0
#if N > 2
932
0
            crc2 = 0;
933
0
#if N > 3
934
0
            crc3 = 0;
935
0
#if N > 4
936
0
            crc4 = 0;
937
#if N > 5
938
            crc5 = 0;
939
#endif
940
0
#endif
941
0
#endif
942
0
#endif
943
0
#endif
944
945
            /*
946
              Process the first blks-1 blocks, computing the CRCs on each braid
947
              independently.
948
             */
949
0
            while (--blks) {
950
                /* Load the word for each braid into registers. */
951
0
                word0 = crc0 ^ words[0];
952
0
#if N > 1
953
0
                word1 = crc1 ^ words[1];
954
0
#if N > 2
955
0
                word2 = crc2 ^ words[2];
956
0
#if N > 3
957
0
                word3 = crc3 ^ words[3];
958
0
#if N > 4
959
0
                word4 = crc4 ^ words[4];
960
#if N > 5
961
                word5 = crc5 ^ words[5];
962
#endif
963
0
#endif
964
0
#endif
965
0
#endif
966
0
#endif
967
0
                words += N;
968
969
                /* Compute and update the CRC for each word. The loop should
970
                   get unrolled. */
971
0
                crc0 = crc_braid_big_table[0][word0 & 0xff];
972
0
#if N > 1
973
0
                crc1 = crc_braid_big_table[0][word1 & 0xff];
974
0
#if N > 2
975
0
                crc2 = crc_braid_big_table[0][word2 & 0xff];
976
0
#if N > 3
977
0
                crc3 = crc_braid_big_table[0][word3 & 0xff];
978
0
#if N > 4
979
0
                crc4 = crc_braid_big_table[0][word4 & 0xff];
980
#if N > 5
981
                crc5 = crc_braid_big_table[0][word5 & 0xff];
982
#endif
983
0
#endif
984
0
#endif
985
0
#endif
986
0
#endif
987
0
                for (k = 1; k < W; k++) {
988
0
                    crc0 ^= crc_braid_big_table[k][(word0 >> (k << 3)) & 0xff];
989
0
#if N > 1
990
0
                    crc1 ^= crc_braid_big_table[k][(word1 >> (k << 3)) & 0xff];
991
0
#if N > 2
992
0
                    crc2 ^= crc_braid_big_table[k][(word2 >> (k << 3)) & 0xff];
993
0
#if N > 3
994
0
                    crc3 ^= crc_braid_big_table[k][(word3 >> (k << 3)) & 0xff];
995
0
#if N > 4
996
0
                    crc4 ^= crc_braid_big_table[k][(word4 >> (k << 3)) & 0xff];
997
#if N > 5
998
                    crc5 ^= crc_braid_big_table[k][(word5 >> (k << 3)) & 0xff];
999
#endif
1000
0
#endif
1001
0
#endif
1002
0
#endif
1003
0
#endif
1004
0
                }
1005
0
            }
1006
1007
            /*
1008
              Process the last block, combining the CRCs of the N braids at the
1009
              same time.
1010
             */
1011
0
            comb = crc_word_big(crc0 ^ words[0]);
1012
0
#if N > 1
1013
0
            comb = crc_word_big(crc1 ^ words[1] ^ comb);
1014
0
#if N > 2
1015
0
            comb = crc_word_big(crc2 ^ words[2] ^ comb);
1016
0
#if N > 3
1017
0
            comb = crc_word_big(crc3 ^ words[3] ^ comb);
1018
0
#if N > 4
1019
0
            comb = crc_word_big(crc4 ^ words[4] ^ comb);
1020
#if N > 5
1021
            comb = crc_word_big(crc5 ^ words[5] ^ comb);
1022
#endif
1023
0
#endif
1024
0
#endif
1025
0
#endif
1026
0
#endif
1027
0
            words += N;
1028
0
            crc = byte_swap(comb);
1029
0
        }
1030
1031
        /*
1032
          Update the pointer to the remaining bytes to process.
1033
         */
1034
0
        buf = (unsigned char const *)words;
1035
0
    }
1036
1037
0
#endif /* W */
1038
1039
    /* Complete the computation of the CRC on any remaining bytes. */
1040
0
    while (len >= 8) {
1041
0
        len -= 8;
1042
0
        crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1043
0
        crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1044
0
        crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1045
0
        crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1046
0
        crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1047
0
        crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1048
0
        crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1049
0
        crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1050
0
    }
1051
0
    while (len) {
1052
0
        len--;
1053
0
        crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1054
0
    }
1055
1056
    /* Return the CRC, post-conditioned. */
1057
0
    return crc ^ 0xffffffff;
1058
0
}
1059
1060
#endif
1061
1062
/* ========================================================================= */
1063
unsigned long ZEXPORT crc32(crc, buf, len)
1064
    unsigned long crc;
1065
    const unsigned char FAR *buf;
1066
    uInt len;
1067
0
{
1068
0
    return crc32_z(crc, buf, len);
1069
0
}
1070
1071
/* ========================================================================= */
1072
uLong ZEXPORT crc32_combine64(crc1, crc2, len2)
1073
    uLong crc1;
1074
    uLong crc2;
1075
    z_off64_t len2;
1076
0
{
1077
#ifdef DYNAMIC_CRC_TABLE
1078
    once(&made, make_crc_table);
1079
#endif /* DYNAMIC_CRC_TABLE */
1080
0
    return multmodp(x2nmodp(len2, 3), crc1) ^ crc2;
1081
0
}
1082
1083
/* ========================================================================= */
1084
uLong ZEXPORT crc32_combine(crc1, crc2, len2)
1085
    uLong crc1;
1086
    uLong crc2;
1087
    z_off_t len2;
1088
0
{
1089
0
    return crc32_combine64(crc1, crc2, len2);
1090
0
}
1091
1092
/* ========================================================================= */
1093
uLong ZEXPORT crc32_combine_gen64(len2)
1094
    z_off64_t len2;
1095
0
{
1096
#ifdef DYNAMIC_CRC_TABLE
1097
    once(&made, make_crc_table);
1098
#endif /* DYNAMIC_CRC_TABLE */
1099
0
    return x2nmodp(len2, 3);
1100
0
}
1101
1102
/* ========================================================================= */
1103
uLong ZEXPORT crc32_combine_gen(len2)
1104
    z_off_t len2;
1105
0
{
1106
0
    return crc32_combine_gen64(len2);
1107
0
}
1108
1109
/* ========================================================================= */
1110
uLong crc32_combine_op(crc1, crc2, op)
1111
    uLong crc1;
1112
    uLong crc2;
1113
    uLong op;
1114
0
{
1115
0
    return multmodp(op, crc1) ^ crc2;
1116
0
}