Coverage Report

Created: 2024-01-20 12:44

/src/zlib/crc32.c
Line
Count
Source (jump to first uncovered line)
1
/* crc32.c -- compute the CRC-32 of a data stream
2
 * Copyright (C) 1995-2006, 2010, 2011, 2012, 2016, 2018 Mark Adler
3
 * For conditions of distribution and use, see copyright notice in zlib.h
4
 *
5
 * This interleaved implementation of a CRC makes use of pipelined multiple
6
 * arithmetic-logic units, commonly found in modern CPU cores. It is due to
7
 * Kadatch and Jenkins (2010). See doc/crc-doc.1.0.pdf in this distribution.
8
 */
9
10
/* @(#) $Id$ */
11
12
/*
13
  Note on the use of DYNAMIC_CRC_TABLE: there is no mutex or semaphore
14
  protection on the static variables used to control the first-use generation
15
  of the crc tables. Therefore, if you #define DYNAMIC_CRC_TABLE, you should
16
  first call get_crc_table() to initialize the tables before allowing more than
17
  one thread to use crc32().
18
19
  MAKECRCH can be #defined to write out crc32.h. A main() routine is also
20
  produced, so that this one source file can be compiled to an executable.
21
 */
22
23
#ifdef MAKECRCH
24
#  include <stdio.h>
25
#  ifndef DYNAMIC_CRC_TABLE
26
#    define DYNAMIC_CRC_TABLE
27
#  endif /* !DYNAMIC_CRC_TABLE */
28
#endif /* MAKECRCH */
29
30
#include "zutil.h"      /* for Z_U4, Z_U8, z_crc_t, and FAR definitions */
31
32
 /*
33
  A CRC of a message is computed on N braids of words in the message, where
34
  each word consists of W bytes (4 or 8). If N is 3, for example, then three
35
  running sparse CRCs are calculated respectively on each braid, at these
36
  indices in the array of words: 0, 3, 6, ..., 1, 4, 7, ..., and 2, 5, 8, ...
37
  This is done starting at a word boundary, and continues until as many blocks
38
  of N * W bytes as are available have been processed. The results are combined
39
  into a single CRC at the end. For this code, N must be in the range 1..6 and
40
  W must be 4 or 8. The upper limit on N can be increased if desired by adding
41
  more #if blocks, extending the patterns apparent in the code. In addition,
42
  crc32.h would need to be regenerated, if the maximum N value is increased.
43
44
  N and W are chosen empirically by benchmarking the execution time on a given
45
  processor. The choices for N and W below were based on testing on Intel Kaby
46
  Lake i7, AMD Ryzen 7, ARM Cortex-A57, Sparc64-VII, PowerPC POWER9, and MIPS64
47
  Octeon II processors. The Intel, AMD, and ARM processors were all fastest
48
  with N=5, W=8. The Sparc, PowerPC, and MIPS64 were all fastest at N=5, W=4.
49
  They were all tested with either gcc or clang, all using the -O3 optimization
50
  level. Your mileage may vary.
51
 */
52
53
/* Define N */
54
#ifdef Z_TESTN
55
#  define N Z_TESTN
56
#else
57
0
#  define N 5
58
#endif
59
#if N < 1 || N > 6
60
#  error N must be in 1..6
61
#endif
62
63
/*
64
  z_crc_t must be at least 32 bits. z_word_t must be at least as long as
65
  z_crc_t. It is assumed here that z_word_t is either 32 bits or 64 bits, and
66
  that bytes are eight bits.
67
 */
68
69
/*
70
  Define W and the associated z_word_t type. If W is not defined, then a
71
  braided calculation is not used, and the associated tables and code are not
72
  compiled.
73
 */
74
#ifdef Z_TESTW
75
#  if Z_TESTW-1 != -1
76
#    define W Z_TESTW
77
#  endif
78
#else
79
#  ifdef MAKECRCH
80
#    define W 8         /* required for MAKECRCH */
81
#  else
82
#    if defined(__x86_64__) || defined(__aarch64__)
83
0
#      define W 8
84
#    else
85
#      define W 4
86
#    endif
87
#  endif
88
#endif
89
#ifdef W
90
#  if W == 8 && defined(Z_U8)
91
     typedef Z_U8 z_word_t;
92
#  elif defined(Z_U4)
93
#    undef W
94
#    define W 4
95
     typedef Z_U4 z_word_t;
96
#  else
97
#    undef W
98
#  endif
99
#endif
100
101
/* Local functions. */
102
local z_crc_t multmodp OF((z_crc_t a, z_crc_t b));
103
local z_crc_t x2nmodp OF((z_off64_t n, unsigned k));
104
#ifdef W
105
   local z_word_t byte_swap OF((z_word_t word));
106
   local z_crc_t crc_word OF((z_word_t data));
107
   local z_word_t crc_word_big OF((z_word_t data));
108
#endif /* W */
109
110
/* CRC polynomial. */
111
0
#define POLY 0xedb88320         /* p(x) reflected, with x^32 implied */
112
113
#ifdef DYNAMIC_CRC_TABLE
114
115
local z_crc_t FAR crc_table[256];
116
local z_crc_t FAR x2n_table[32];
117
local void make_crc_table OF((void));
118
#ifdef W
119
   local z_word_t FAR crc_big_table[256];
120
   local z_crc_t FAR crc_braid_table[W][256];
121
   local z_word_t FAR crc_braid_big_table[W][256];
122
   local void braid OF((z_crc_t [][256], z_word_t [][256], int, int));
123
#endif
124
#ifdef MAKECRCH
125
   local void write_table OF((FILE *, const z_crc_t FAR *, int));
126
   local void write_table32hi OF((FILE *, const z_word_t FAR *, int));
127
   local void write_table64 OF((FILE *, const z_word_t FAR *, int));
128
#endif /* MAKECRCH */
129
130
/*
131
  Define a once() function depending on the availability of atomics. If this is
132
  compiled with DYNAMIC_CRC_TABLE defined, and if CRCs will be computed in
133
  multiple threads, and if atomics are not available, then get_crc_table() must
134
  be called to initialize the tables and must return before any threads are
135
  allowed to compute or combine CRCs.
136
 */
137
138
/* Definition of once functionality. */
139
typedef struct once_s once_t;
140
local void once OF((once_t *, void (*)(void)));
141
142
/* Check for the availability of atomics. */
143
#if defined(__STDC__) && __STDC_VERSION__ >= 201112L && \
144
    !defined(__STDC_NO_ATOMICS__)
145
146
#include <stdatomic.h>
147
148
/* Structure for once(), which must be initialized with ONCE_INIT. */
149
struct once_s {
150
    atomic_flag begun;
151
    atomic_int done;
152
};
153
#define ONCE_INIT {ATOMIC_FLAG_INIT, 0}
154
155
/*
156
  Run the provided init() function exactly once, even if multiple threads
157
  invoke once() at the same time. The state must be a once_t initialized with
158
  ONCE_INIT.
159
 */
160
local void once(state, init)
161
    once_t *state;
162
    void (*init)(void);
163
{
164
    if (!atomic_load(&state->done)) {
165
        if (atomic_flag_test_and_set(&state->begun))
166
            while (!atomic_load(&state->done))
167
                ;
168
        else {
169
            init();
170
            atomic_store(&state->done, 1);
171
        }
172
    }
173
}
174
175
#else   /* no atomics */
176
177
/* Structure for once(), which must be initialized with ONCE_INIT. */
178
struct once_s {
179
    volatile int begun;
180
    volatile int done;
181
};
182
#define ONCE_INIT {0, 0}
183
184
/* Test and set. Alas, not atomic, but tries to minimize the period of
185
   vulnerability. */
186
local int test_and_set OF((int volatile *));
187
local int test_and_set(flag)
188
    int volatile *flag;
189
{
190
    int was;
191
192
    was = *flag;
193
    *flag = 1;
194
    return was;
195
}
196
197
/* Run the provided init() function once. This is not thread-safe. */
198
local void once(state, init)
199
    once_t *state;
200
    void (*init)(void);
201
{
202
    if (!state->done) {
203
        if (test_and_set(&state->begun))
204
            while (!state->done)
205
                ;
206
        else {
207
            init();
208
            state->done = 1;
209
        }
210
    }
211
}
212
213
#endif
214
215
/* State for once(). */
216
local once_t made = ONCE_INIT;
217
218
/*
219
  Generate tables for a byte-wise 32-bit CRC calculation on the polynomial:
220
  x^32+x^26+x^23+x^22+x^16+x^12+x^11+x^10+x^8+x^7+x^5+x^4+x^2+x+1.
221
222
  Polynomials over GF(2) are represented in binary, one bit per coefficient,
223
  with the lowest powers in the most significant bit. Then adding polynomials
224
  is just exclusive-or, and multiplying a polynomial by x is a right shift by
225
  one. If we call the above polynomial p, and represent a byte as the
226
  polynomial q, also with the lowest power in the most significant bit (so the
227
  byte 0xb1 is the polynomial x^7+x^3+x^2+1), then the CRC is (q*x^32) mod p,
228
  where a mod b means the remainder after dividing a by b.
229
230
  This calculation is done using the shift-register method of multiplying and
231
  taking the remainder. The register is initialized to zero, and for each
232
  incoming bit, x^32 is added mod p to the register if the bit is a one (where
233
  x^32 mod p is p+x^32 = x^26+...+1), and the register is multiplied mod p by x
234
  (which is shifting right by one and adding x^32 mod p if the bit shifted out
235
  is a one). We start with the highest power (least significant bit) of q and
236
  repeat for all eight bits of q.
237
238
  The table is simply the CRC of all possible eight bit values. This is all the
239
  information needed to generate CRCs on data a byte at a time for all
240
  combinations of CRC register values and incoming bytes.
241
 */
242
243
local void make_crc_table()
244
{
245
    unsigned i, j, n;
246
    z_crc_t p;
247
248
    /* initialize the CRC of bytes tables */
249
    for (i = 0; i < 256; i++) {
250
        p = i;
251
        for (j = 0; j < 8; j++)
252
            p = p & 1 ? (p >> 1) ^ POLY : p >> 1;
253
        crc_table[i] = p;
254
#ifdef W
255
        crc_big_table[i] = byte_swap(p);
256
#endif
257
    }
258
259
    /* initialize the x^2^n mod p(x) table */
260
    p = (z_crc_t)1 << 30;         /* x^1 */
261
    x2n_table[0] = p;
262
    for (n = 1; n < 32; n++)
263
        x2n_table[n] = p = multmodp(p, p);
264
265
#ifdef W
266
    /* initialize the braiding tables -- needs x2n_table[] */
267
    braid(crc_braid_table, crc_braid_big_table, N, W);
268
#endif
269
270
#ifdef MAKECRCH
271
    {
272
        /*
273
          The crc32.h header file contains tables for both 32-bit and 64-bit
274
          z_word_t's, and so requires a 64-bit type be available. In that case,
275
          z_word_t must be defined to be 64-bits. This code then also generates
276
          and writes out the tables for the case that z_word_t is 32 bits.
277
         */
278
#if !defined(W) || W != 8
279
#  error Need a 64-bit integer type in order to generate crc32.h.
280
#endif
281
        FILE *out;
282
        int k, n;
283
        z_crc_t ltl[8][256];
284
        z_word_t big[8][256];
285
286
        out = fopen("crc32.h", "w");
287
        if (out == NULL) return;
288
289
        /* write out little-endian CRC table to crc32.h */
290
        fprintf(out,
291
            "/* crc32.h -- tables for rapid CRC calculation\n"
292
            " * Generated automatically by crc32.c\n */\n"
293
            "\n"
294
            "local const z_crc_t FAR crc_table[] = {\n"
295
            "    ");
296
        write_table(out, crc_table, 256);
297
        fprintf(out,
298
            "};\n");
299
300
        /* write out big-endian CRC table for 64-bit z_word_t to crc32.h */
301
        fprintf(out,
302
            "\n"
303
            "#ifdef W\n"
304
            "\n"
305
            "#if W == 8\n"
306
            "\n"
307
            "local const z_word_t FAR crc_big_table[] = {\n"
308
            "    ");
309
        write_table64(out, crc_big_table, 256);
310
        fprintf(out,
311
            "};\n");
312
313
        /* write out big-endian CRC table for 32-bit z_word_t to crc32.h */
314
        fprintf(out,
315
            "\n"
316
            "#else /* W == 4 */\n"
317
            "\n"
318
            "local const z_word_t FAR crc_big_table[] = {\n"
319
            "    ");
320
        write_table32hi(out, crc_big_table, 256);
321
        fprintf(out,
322
            "};\n"
323
            "\n"
324
            "#endif\n");
325
326
        /* write out braid tables for each value of N */
327
        for (n = 1; n <= 6; n++) {
328
            fprintf(out,
329
            "\n"
330
            "#if N == %d\n", n);
331
332
            /* compute braid tables for this N and 64-bit word_t */
333
            braid(ltl, big, n, 8);
334
335
            /* write out braid tables for 64-bit z_word_t to crc32.h */
336
            fprintf(out,
337
            "\n"
338
            "#if W == 8\n"
339
            "\n"
340
            "local const z_crc_t FAR crc_braid_table[][256] = {\n");
341
            for (k = 0; k < 8; k++) {
342
                fprintf(out, "   {");
343
                write_table(out, ltl[k], 256);
344
                fprintf(out, "}%s", k < 7 ? ",\n" : "");
345
            }
346
            fprintf(out,
347
            "};\n"
348
            "\n"
349
            "local const z_word_t FAR crc_braid_big_table[][256] = {\n");
350
            for (k = 0; k < 8; k++) {
351
                fprintf(out, "   {");
352
                write_table64(out, big[k], 256);
353
                fprintf(out, "}%s", k < 7 ? ",\n" : "");
354
            }
355
            fprintf(out,
356
            "};\n");
357
358
            /* compute braid tables for this N and 32-bit word_t */
359
            braid(ltl, big, n, 4);
360
361
            /* write out braid tables for 32-bit z_word_t to crc32.h */
362
            fprintf(out,
363
            "\n"
364
            "#else /* W == 4 */\n"
365
            "\n"
366
            "local const z_crc_t FAR crc_braid_table[][256] = {\n");
367
            for (k = 0; k < 4; k++) {
368
                fprintf(out, "   {");
369
                write_table(out, ltl[k], 256);
370
                fprintf(out, "}%s", k < 3 ? ",\n" : "");
371
            }
372
            fprintf(out,
373
            "};\n"
374
            "\n"
375
            "local const z_word_t FAR crc_braid_big_table[][256] = {\n");
376
            for (k = 0; k < 4; k++) {
377
                fprintf(out, "   {");
378
                write_table32hi(out, big[k], 256);
379
                fprintf(out, "}%s", k < 3 ? ",\n" : "");
380
            }
381
            fprintf(out,
382
            "};\n"
383
            "\n"
384
            "#endif\n"
385
            "\n"
386
            "#endif\n");
387
        }
388
        fprintf(out,
389
            "\n"
390
            "#endif\n");
391
392
        /* write out zeros operator table to crc32.h */
393
        fprintf(out,
394
            "\n"
395
            "local const z_crc_t FAR x2n_table[] = {\n"
396
            "    ");
397
        write_table(out, x2n_table, 32);
398
        fprintf(out,
399
            "};\n");
400
        fclose(out);
401
    }
402
#endif /* MAKECRCH */
403
}
404
405
#ifdef MAKECRCH
406
407
/*
408
   Write the 32-bit values in table[0..k-1] to out, five per line in
409
   hexadecimal separated by commas.
410
 */
411
local void write_table(out, table, k)
412
    FILE *out;
413
    const z_crc_t FAR *table;
414
    int k;
415
{
416
    int n;
417
418
    for (n = 0; n < k; n++)
419
        fprintf(out, "%s0x%08lx%s", n == 0 || n % 5 ? "" : "    ",
420
                (unsigned long)(table[n]),
421
                n == k - 1 ? "" : (n % 5 == 4 ? ",\n" : ", "));
422
}
423
424
/*
425
   Write the high 32-bits of each value in table[0..k-1] to out, five per line
426
   in hexadecimal separated by commas.
427
 */
428
local void write_table32hi(out, table, k)
429
FILE *out;
430
const z_word_t FAR *table;
431
int k;
432
{
433
    int n;
434
435
    for (n = 0; n < k; n++)
436
        fprintf(out, "%s0x%08lx%s", n == 0 || n % 5 ? "" : "    ",
437
                (unsigned long)(table[n] >> 32),
438
                n == k - 1 ? "" : (n % 5 == 4 ? ",\n" : ", "));
439
}
440
441
/*
442
  Write the 64-bit values in table[0..k-1] to out, three per line in
443
  hexadecimal separated by commas. This assumes that if there is a 64-bit
444
  type, then there is also a long long integer type, and it is at least 64
445
  bits. If not, then the type cast and format string can be adjusted
446
  accordingly.
447
 */
448
local void write_table64(out, table, k)
449
    FILE *out;
450
    const z_word_t FAR *table;
451
    int k;
452
{
453
    int n;
454
455
    for (n = 0; n < k; n++)
456
        fprintf(out, "%s0x%016llx%s", n == 0 || n % 3 ? "" : "    ",
457
                (unsigned long long)(table[n]),
458
                n == k - 1 ? "" : (n % 3 == 2 ? ",\n" : ", "));
459
}
460
461
/* Actually do the deed. */
462
int main()
463
{
464
    make_crc_table();
465
    return 0;
466
}
467
468
#endif /* MAKECRCH */
469
470
#ifdef W
471
/*
472
  Generate the little and big-endian braid tables for the given n and z_word_t
473
  size w. Each array must have room for w blocks of 256 elements.
474
 */
475
local void braid(ltl, big, n, w)
476
    z_crc_t ltl[][256];
477
    z_word_t big[][256];
478
    int n;
479
    int w;
480
{
481
    int k;
482
    z_crc_t i, p, q;
483
    for (k = 0; k < w; k++) {
484
        p = x2nmodp((n * w + 3 - k) << 3, 0);
485
        ltl[k][0] = 0;
486
        big[w - 1 - k][0] = 0;
487
        for (i = 1; i < 256; i++) {
488
            ltl[k][i] = q = multmodp(i << 24, p);
489
            big[w - 1 - k][i] = byte_swap(q);
490
        }
491
    }
492
}
493
#endif
494
495
#else /* !DYNAMIC_CRC_TABLE */
496
/* ========================================================================
497
 * Tables for byte-wise and braided CRC-32 calculations, and a table of powers
498
 * of x for combining CRC-32s, all made by make_crc_table().
499
 */
500
#include "crc32.h"
501
#endif /* DYNAMIC_CRC_TABLE */
502
503
/* ========================================================================
504
 * Routines used for CRC calculation. Some are also required for the table
505
 * generation above.
506
 */
507
508
/*
509
  Return a(x) multiplied by b(x) modulo p(x), where p(x) is the CRC polynomial,
510
  reflected. For speed, this requires that a not be zero.
511
 */
512
local z_crc_t multmodp(a, b)
513
    z_crc_t a;
514
    z_crc_t b;
515
0
{
516
0
    z_crc_t m, p;
517
518
0
    m = (z_crc_t)1 << 31;
519
0
    p = 0;
520
0
    for (;;) {
521
0
        if (a & m) {
522
0
            p ^= b;
523
0
            if ((a & (m - 1)) == 0)
524
0
                break;
525
0
        }
526
0
        m >>= 1;
527
0
        b = b & 1 ? (b >> 1) ^ POLY : b >> 1;
528
0
    }
529
0
    return p;
530
0
}
531
532
/*
533
  Return x^(n * 2^k) modulo p(x). Requires that x2n_table[] has been
534
  initialized.
535
 */
536
local z_crc_t x2nmodp(n, k)
537
    z_off64_t n;
538
    unsigned k;
539
0
{
540
0
    z_crc_t p;
541
542
0
    p = (z_crc_t)1 << 31;           /* x^0 == 1 */
543
0
    while (n) {
544
0
        if (n & 1)
545
0
            p = multmodp(x2n_table[k & 31], p);
546
0
        n >>= 1;
547
0
        k++;
548
0
    }
549
0
    return p;
550
0
}
551
552
#ifdef W
553
554
/*
555
  Swap the bytes in a z_word_t to convert between little and big endian. Any
556
  self-respecting compiler will optimize this to a single machine byte-swap
557
  instruction, if one is available. This assumes that word_t is either 32 bits
558
  or 64 bits.
559
 */
560
local z_word_t byte_swap(word)
561
    z_word_t word;
562
0
{
563
0
#if W == 8
564
0
    return
565
0
        (word & 0xff00000000000000) >> 56 |
566
0
        (word & 0xff000000000000) >> 40 |
567
0
        (word & 0xff0000000000) >> 24 |
568
0
        (word & 0xff00000000) >> 8 |
569
0
        (word & 0xff000000) << 8 |
570
0
        (word & 0xff0000) << 24 |
571
0
        (word & 0xff00) << 40 |
572
0
        (word & 0xff) << 56;
573
#else   /* W == 4 */
574
    return
575
        (word & 0xff000000) >> 24 |
576
        (word & 0xff0000) >> 8 |
577
        (word & 0xff00) << 8 |
578
        (word & 0xff) << 24;
579
#endif
580
0
}
581
582
/*
583
  Return the CRC of the W bytes in the word_t data, taking the
584
  least-significant byte of the word as the first byte of data, without any pre
585
  or post conditioning. This is used to combine the CRCs of each braid.
586
 */
587
local z_crc_t crc_word(data)
588
    z_word_t data;
589
0
{
590
0
    int k;
591
0
    for (k = 0; k < W; k++)
592
0
        data = (data >> 8) ^ crc_table[data & 0xff];
593
0
    return (z_crc_t)data;
594
0
}
595
596
local z_word_t crc_word_big(data)
597
    z_word_t data;
598
0
{
599
0
    int k;
600
0
    for (k = 0; k < W; k++)
601
0
        data = (data << 8) ^
602
0
            crc_big_table[(data >> ((W - 1) << 3)) & 0xff];
603
0
    return data;
604
0
}
605
606
#endif /* W */
607
608
/* =========================================================================
609
 * This function can be used by asm versions of crc32(), and to force the
610
 * generation of the CRC tables in a threaded application.
611
 */
612
const z_crc_t FAR * ZEXPORT get_crc_table()
613
0
{
614
#ifdef DYNAMIC_CRC_TABLE
615
    once(&made, make_crc_table);
616
#endif /* DYNAMIC_CRC_TABLE */
617
0
    return (const z_crc_t FAR *)crc_table;
618
0
}
619
620
/* =========================================================================
621
 * Use ARM machine instructions if available. This will compute the CRC about
622
 * ten times faster than the braided calculation. This code does not check for
623
 * the presence of the CRC instruction at run time. __ARM_FEATURE_CRC32 will
624
 * only be defined if the compilation specifies an ARM processor architecture
625
 * that has the instructions. For example, compiling with -march=armv8.1-a or
626
 * -march=armv8-a+crc, or -march=native if the compile machine has the crc32
627
 * instructions.
628
 */
629
#if defined(__aarch64__) && defined(__ARM_FEATURE_CRC32) && W == 8
630
631
/*
632
   Constants empirically determined to maximize speed. These values are from
633
   measurements on a Cortex-A57. Your mileage may vary.
634
 */
635
#define Z_BATCH 3990                /* number of words in a batch */
636
#define Z_BATCH_ZEROS 0xa10d3d0c    /* computed from Z_BATCH = 3990 */
637
#define Z_BATCH_MIN 800             /* fewest words in a final batch */
638
639
unsigned long ZEXPORT crc32_z(crc, buf, len)
640
    unsigned long crc;
641
    const unsigned char FAR *buf;
642
    z_size_t len;
643
{
644
    z_crc_t val;
645
    z_word_t crc1, crc2;
646
    const z_word_t *word;
647
    z_word_t val0, val1, val2;
648
    z_size_t last, last2, i;
649
    z_size_t num;
650
651
    /* Return initial CRC, if requested. */
652
    if (buf == Z_NULL) return 0;
653
654
#ifdef DYNAMIC_CRC_TABLE
655
    once(&made, make_crc_table);
656
#endif /* DYNAMIC_CRC_TABLE */
657
658
    /* Pre-condition the CRC */
659
    crc ^= 0xffffffff;
660
661
    /* Compute the CRC up to a word boundary. */
662
    while (len && ((z_size_t)buf & 7) != 0) {
663
        len--;
664
        val = *buf++;
665
        __asm__ volatile("crc32b %w0, %w0, %w1" : "+r"(crc) : "r"(val));
666
    }
667
668
    /* Prepare to compute the CRC on full 64-bit words word[0..num-1]. */
669
    word = (z_word_t const *)buf;
670
    num = len >> 3;
671
    len &= 7;
672
673
    /* Do three interleaved CRCs to realize the throughput of one crc32x
674
       instruction per cycle. Each CRC is calcuated on Z_BATCH words. The three
675
       CRCs are combined into a single CRC after each set of batches. */
676
    while (num >= 3 * Z_BATCH) {
677
        crc1 = 0;
678
        crc2 = 0;
679
        for (i = 0; i < Z_BATCH; i++) {
680
            val0 = word[i];
681
            val1 = word[i + Z_BATCH];
682
            val2 = word[i + 2 * Z_BATCH];
683
            __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc) : "r"(val0));
684
            __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc1) : "r"(val1));
685
            __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc2) : "r"(val2));
686
        }
687
        word += 3 * Z_BATCH;
688
        num -= 3 * Z_BATCH;
689
        crc = multmodp(Z_BATCH_ZEROS, crc) ^ crc1;
690
        crc = multmodp(Z_BATCH_ZEROS, crc) ^ crc2;
691
    }
692
693
    /* Do one last smaller batch with the remaining words, if there are enough
694
       to pay for the combination of CRCs. */
695
    last = num / 3;
696
    if (last >= Z_BATCH_MIN) {
697
        last2 = last << 1;
698
        crc1 = 0;
699
        crc2 = 0;
700
        for (i = 0; i < last; i++) {
701
            val0 = word[i];
702
            val1 = word[i + last];
703
            val2 = word[i + last2];
704
            __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc) : "r"(val0));
705
            __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc1) : "r"(val1));
706
            __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc2) : "r"(val2));
707
        }
708
        word += 3 * last;
709
        num -= 3 * last;
710
        val = x2nmodp(last, 6);
711
        crc = multmodp(val, crc) ^ crc1;
712
        crc = multmodp(val, crc) ^ crc2;
713
    }
714
715
    /* Compute the CRC on any remaining words. */
716
    for (i = 0; i < num; i++) {
717
        val0 = word[i];
718
        __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc) : "r"(val0));
719
    }
720
    word += num;
721
722
    /* Complete the CRC on any remaining bytes. */
723
    buf = (const unsigned char FAR *)word;
724
    while (len) {
725
        len--;
726
        val = *buf++;
727
        __asm__ volatile("crc32b %w0, %w0, %w1" : "+r"(crc) : "r"(val));
728
    }
729
730
    /* Return the CRC, post-conditioned. */
731
    return crc ^ 0xffffffff;
732
}
733
734
#else
735
736
/* ========================================================================= */
737
unsigned long ZEXPORT crc32_z(crc, buf, len)
738
    unsigned long crc;
739
    const unsigned char FAR *buf;
740
    z_size_t len;
741
0
{
742
    /* Return initial CRC, if requested. */
743
0
    if (buf == Z_NULL) return 0;
744
745
#ifdef DYNAMIC_CRC_TABLE
746
    once(&made, make_crc_table);
747
#endif /* DYNAMIC_CRC_TABLE */
748
749
    /* Pre-condition the CRC */
750
0
    crc ^= 0xffffffff;
751
752
0
#ifdef W
753
754
    /* If provided enough bytes, do a braided CRC calculation. */
755
0
    if (len >= N * W + W - 1) {
756
0
        z_size_t blks;
757
0
        z_word_t const *words;
758
0
        unsigned endian;
759
0
        int k;
760
761
        /* Compute the CRC up to a z_word_t boundary. */
762
0
        while (len && ((z_size_t)buf & (W - 1)) != 0) {
763
0
            len--;
764
0
            crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
765
0
        }
766
767
        /* Compute the CRC on as many N z_word_t blocks as are available. */
768
0
        blks = len / (N * W);
769
0
        len -= blks * N * W;
770
0
        words = (z_word_t const *)buf;
771
772
        /* Do endian check at execution time instead of compile time, since ARM
773
           processors can change the endianess at execution time. If the
774
           compiler knows what the endianess will be, it can optimize out the
775
           check and the unused branch. */
776
0
        endian = 1;
777
0
        if (*(unsigned char *)&endian) {
778
            /* Little endian. */
779
780
0
            z_crc_t crc0;
781
0
            z_word_t word0;
782
0
#if N > 1
783
0
            z_crc_t crc1;
784
0
            z_word_t word1;
785
0
#if N > 2
786
0
            z_crc_t crc2;
787
0
            z_word_t word2;
788
0
#if N > 3
789
0
            z_crc_t crc3;
790
0
            z_word_t word3;
791
0
#if N > 4
792
0
            z_crc_t crc4;
793
0
            z_word_t word4;
794
#if N > 5
795
            z_crc_t crc5;
796
            z_word_t word5;
797
#endif
798
0
#endif
799
0
#endif
800
0
#endif
801
0
#endif
802
803
            /* Initialize the CRC for each braid. */
804
0
            crc0 = crc;
805
0
#if N > 1
806
0
            crc1 = 0;
807
0
#if N > 2
808
0
            crc2 = 0;
809
0
#if N > 3
810
0
            crc3 = 0;
811
0
#if N > 4
812
0
            crc4 = 0;
813
#if N > 5
814
            crc5 = 0;
815
#endif
816
0
#endif
817
0
#endif
818
0
#endif
819
0
#endif
820
821
            /*
822
              Process the first blks-1 blocks, computing the CRCs on each braid
823
              independently.
824
             */
825
0
            while (--blks) {
826
                /* Load the word for each braid into registers. */
827
0
                word0 = crc0 ^ words[0];
828
0
#if N > 1
829
0
                word1 = crc1 ^ words[1];
830
0
#if N > 2
831
0
                word2 = crc2 ^ words[2];
832
0
#if N > 3
833
0
                word3 = crc3 ^ words[3];
834
0
#if N > 4
835
0
                word4 = crc4 ^ words[4];
836
#if N > 5
837
                word5 = crc5 ^ words[5];
838
#endif
839
0
#endif
840
0
#endif
841
0
#endif
842
0
#endif
843
0
                words += N;
844
845
                /* Compute and update the CRC for each word. The loop should
846
                   get unrolled. */
847
0
                crc0 = crc_braid_table[0][word0 & 0xff];
848
0
#if N > 1
849
0
                crc1 = crc_braid_table[0][word1 & 0xff];
850
0
#if N > 2
851
0
                crc2 = crc_braid_table[0][word2 & 0xff];
852
0
#if N > 3
853
0
                crc3 = crc_braid_table[0][word3 & 0xff];
854
0
#if N > 4
855
0
                crc4 = crc_braid_table[0][word4 & 0xff];
856
#if N > 5
857
                crc5 = crc_braid_table[0][word5 & 0xff];
858
#endif
859
0
#endif
860
0
#endif
861
0
#endif
862
0
#endif
863
0
                for (k = 1; k < W; k++) {
864
0
                    crc0 ^= crc_braid_table[k][(word0 >> (k << 3)) & 0xff];
865
0
#if N > 1
866
0
                    crc1 ^= crc_braid_table[k][(word1 >> (k << 3)) & 0xff];
867
0
#if N > 2
868
0
                    crc2 ^= crc_braid_table[k][(word2 >> (k << 3)) & 0xff];
869
0
#if N > 3
870
0
                    crc3 ^= crc_braid_table[k][(word3 >> (k << 3)) & 0xff];
871
0
#if N > 4
872
0
                    crc4 ^= crc_braid_table[k][(word4 >> (k << 3)) & 0xff];
873
#if N > 5
874
                    crc5 ^= crc_braid_table[k][(word5 >> (k << 3)) & 0xff];
875
#endif
876
0
#endif
877
0
#endif
878
0
#endif
879
0
#endif
880
0
                }
881
0
            }
882
883
            /*
884
              Process the last block, combining the CRCs of the N braids at the
885
              same time.
886
             */
887
0
            crc = crc_word(crc0 ^ words[0]);
888
0
#if N > 1
889
0
            crc = crc_word(crc1 ^ words[1] ^ crc);
890
0
#if N > 2
891
0
            crc = crc_word(crc2 ^ words[2] ^ crc);
892
0
#if N > 3
893
0
            crc = crc_word(crc3 ^ words[3] ^ crc);
894
0
#if N > 4
895
0
            crc = crc_word(crc4 ^ words[4] ^ crc);
896
#if N > 5
897
            crc = crc_word(crc5 ^ words[5] ^ crc);
898
#endif
899
0
#endif
900
0
#endif
901
0
#endif
902
0
#endif
903
0
            words += N;
904
0
        }
905
0
        else {
906
            /* Big endian. */
907
908
0
            z_word_t crc0, word0, comb;
909
0
#if N > 1
910
0
            z_word_t crc1, word1;
911
0
#if N > 2
912
0
            z_word_t crc2, word2;
913
0
#if N > 3
914
0
            z_word_t crc3, word3;
915
0
#if N > 4
916
0
            z_word_t crc4, word4;
917
#if N > 5
918
            z_word_t crc5, word5;
919
#endif
920
0
#endif
921
0
#endif
922
0
#endif
923
0
#endif
924
925
            /* Initialize the CRC for each braid. */
926
0
            crc0 = byte_swap(crc);
927
0
#if N > 1
928
0
            crc1 = 0;
929
0
#if N > 2
930
0
            crc2 = 0;
931
0
#if N > 3
932
0
            crc3 = 0;
933
0
#if N > 4
934
0
            crc4 = 0;
935
#if N > 5
936
            crc5 = 0;
937
#endif
938
0
#endif
939
0
#endif
940
0
#endif
941
0
#endif
942
943
            /*
944
              Process the first blks-1 blocks, computing the CRCs on each braid
945
              independently.
946
             */
947
0
            while (--blks) {
948
                /* Load the word for each braid into registers. */
949
0
                word0 = crc0 ^ words[0];
950
0
#if N > 1
951
0
                word1 = crc1 ^ words[1];
952
0
#if N > 2
953
0
                word2 = crc2 ^ words[2];
954
0
#if N > 3
955
0
                word3 = crc3 ^ words[3];
956
0
#if N > 4
957
0
                word4 = crc4 ^ words[4];
958
#if N > 5
959
                word5 = crc5 ^ words[5];
960
#endif
961
0
#endif
962
0
#endif
963
0
#endif
964
0
#endif
965
0
                words += N;
966
967
                /* Compute and update the CRC for each word. The loop should
968
                   get unrolled. */
969
0
                crc0 = crc_braid_big_table[0][word0 & 0xff];
970
0
#if N > 1
971
0
                crc1 = crc_braid_big_table[0][word1 & 0xff];
972
0
#if N > 2
973
0
                crc2 = crc_braid_big_table[0][word2 & 0xff];
974
0
#if N > 3
975
0
                crc3 = crc_braid_big_table[0][word3 & 0xff];
976
0
#if N > 4
977
0
                crc4 = crc_braid_big_table[0][word4 & 0xff];
978
#if N > 5
979
                crc5 = crc_braid_big_table[0][word5 & 0xff];
980
#endif
981
0
#endif
982
0
#endif
983
0
#endif
984
0
#endif
985
0
                for (k = 1; k < W; k++) {
986
0
                    crc0 ^= crc_braid_big_table[k][(word0 >> (k << 3)) & 0xff];
987
0
#if N > 1
988
0
                    crc1 ^= crc_braid_big_table[k][(word1 >> (k << 3)) & 0xff];
989
0
#if N > 2
990
0
                    crc2 ^= crc_braid_big_table[k][(word2 >> (k << 3)) & 0xff];
991
0
#if N > 3
992
0
                    crc3 ^= crc_braid_big_table[k][(word3 >> (k << 3)) & 0xff];
993
0
#if N > 4
994
0
                    crc4 ^= crc_braid_big_table[k][(word4 >> (k << 3)) & 0xff];
995
#if N > 5
996
                    crc5 ^= crc_braid_big_table[k][(word5 >> (k << 3)) & 0xff];
997
#endif
998
0
#endif
999
0
#endif
1000
0
#endif
1001
0
#endif
1002
0
                }
1003
0
            }
1004
1005
            /*
1006
              Process the last block, combining the CRCs of the N braids at the
1007
              same time.
1008
             */
1009
0
            comb = crc_word_big(crc0 ^ words[0]);
1010
0
#if N > 1
1011
0
            comb = crc_word_big(crc1 ^ words[1] ^ comb);
1012
0
#if N > 2
1013
0
            comb = crc_word_big(crc2 ^ words[2] ^ comb);
1014
0
#if N > 3
1015
0
            comb = crc_word_big(crc3 ^ words[3] ^ comb);
1016
0
#if N > 4
1017
0
            comb = crc_word_big(crc4 ^ words[4] ^ comb);
1018
#if N > 5
1019
            comb = crc_word_big(crc5 ^ words[5] ^ comb);
1020
#endif
1021
0
#endif
1022
0
#endif
1023
0
#endif
1024
0
#endif
1025
0
            words += N;
1026
0
            crc = byte_swap(comb);
1027
0
        }
1028
1029
        /*
1030
          Update the pointer to the remaining bytes to process.
1031
         */
1032
0
        buf = (unsigned char const *)words;
1033
0
    }
1034
1035
0
#endif /* W */
1036
1037
    /* Complete the computation of the CRC on any remaining bytes. */
1038
0
    while (len >= 8) {
1039
0
        len -= 8;
1040
0
        crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1041
0
        crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1042
0
        crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1043
0
        crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1044
0
        crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1045
0
        crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1046
0
        crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1047
0
        crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1048
0
    }
1049
0
    while (len) {
1050
0
        len--;
1051
0
        crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1052
0
    }
1053
1054
    /* Return the CRC, post-conditioned. */
1055
0
    return crc ^ 0xffffffff;
1056
0
}
1057
1058
#endif
1059
1060
/* ========================================================================= */
1061
unsigned long ZEXPORT crc32(crc, buf, len)
1062
    unsigned long crc;
1063
    const unsigned char FAR *buf;
1064
    uInt len;
1065
0
{
1066
0
    return crc32_z(crc, buf, len);
1067
0
}
1068
1069
/* ========================================================================= */
1070
uLong ZEXPORT crc32_combine64(crc1, crc2, len2)
1071
    uLong crc1;
1072
    uLong crc2;
1073
    z_off64_t len2;
1074
0
{
1075
#ifdef DYNAMIC_CRC_TABLE
1076
    once(&made, make_crc_table);
1077
#endif /* DYNAMIC_CRC_TABLE */
1078
0
    return multmodp(x2nmodp(len2, 3), crc1) ^ crc2;
1079
0
}
1080
1081
/* ========================================================================= */
1082
uLong ZEXPORT crc32_combine(crc1, crc2, len2)
1083
    uLong crc1;
1084
    uLong crc2;
1085
    z_off_t len2;
1086
0
{
1087
0
    return crc32_combine64(crc1, crc2, len2);
1088
0
}
1089
1090
/* ========================================================================= */
1091
uLong ZEXPORT crc32_combine_gen64(len2)
1092
    z_off64_t len2;
1093
0
{
1094
#ifdef DYNAMIC_CRC_TABLE
1095
    once(&made, make_crc_table);
1096
#endif /* DYNAMIC_CRC_TABLE */
1097
0
    return x2nmodp(len2, 3);
1098
0
}
1099
1100
/* ========================================================================= */
1101
uLong ZEXPORT crc32_combine_gen(len2)
1102
    z_off_t len2;
1103
0
{
1104
0
    return crc32_combine_gen64(len2);
1105
0
}
1106
1107
/* ========================================================================= */
1108
uLong crc32_combine_op(crc1, crc2, op)
1109
    uLong crc1;
1110
    uLong crc2;
1111
    uLong op;
1112
0
{
1113
0
    return multmodp(op, crc1) ^ crc2;
1114
0
}