Coverage Report

Created: 2023-06-07 06:03

/src/zlib/crc32.c
Line
Count
Source (jump to first uncovered line)
1
/* crc32.c -- compute the CRC-32 of a data stream
2
 * Copyright (C) 1995-2022 Mark Adler
3
 * For conditions of distribution and use, see copyright notice in zlib.h
4
 *
5
 * This interleaved implementation of a CRC makes use of pipelined multiple
6
 * arithmetic-logic units, commonly found in modern CPU cores. It is due to
7
 * Kadatch and Jenkins (2010). See doc/crc-doc.1.0.pdf in this distribution.
8
 */
9
10
/* @(#) $Id$ */
11
12
/*
13
  Note on the use of DYNAMIC_CRC_TABLE: there is no mutex or semaphore
14
  protection on the static variables used to control the first-use generation
15
  of the crc tables. Therefore, if you #define DYNAMIC_CRC_TABLE, you should
16
  first call get_crc_table() to initialize the tables before allowing more than
17
  one thread to use crc32().
18
19
  MAKECRCH can be #defined to write out crc32.h. A main() routine is also
20
  produced, so that this one source file can be compiled to an executable.
21
 */
22
23
#ifdef MAKECRCH
24
#  include <stdio.h>
25
#  ifndef DYNAMIC_CRC_TABLE
26
#    define DYNAMIC_CRC_TABLE
27
#  endif /* !DYNAMIC_CRC_TABLE */
28
#endif /* MAKECRCH */
29
30
#include "zutil.h"      /* for Z_U4, Z_U8, z_crc_t, and FAR definitions */
31
32
 /*
33
  A CRC of a message is computed on N braids of words in the message, where
34
  each word consists of W bytes (4 or 8). If N is 3, for example, then three
35
  running sparse CRCs are calculated respectively on each braid, at these
36
  indices in the array of words: 0, 3, 6, ..., 1, 4, 7, ..., and 2, 5, 8, ...
37
  This is done starting at a word boundary, and continues until as many blocks
38
  of N * W bytes as are available have been processed. The results are combined
39
  into a single CRC at the end. For this code, N must be in the range 1..6 and
40
  W must be 4 or 8. The upper limit on N can be increased if desired by adding
41
  more #if blocks, extending the patterns apparent in the code. In addition,
42
  crc32.h would need to be regenerated, if the maximum N value is increased.
43
44
  N and W are chosen empirically by benchmarking the execution time on a given
45
  processor. The choices for N and W below were based on testing on Intel Kaby
46
  Lake i7, AMD Ryzen 7, ARM Cortex-A57, Sparc64-VII, PowerPC POWER9, and MIPS64
47
  Octeon II processors. The Intel, AMD, and ARM processors were all fastest
48
  with N=5, W=8. The Sparc, PowerPC, and MIPS64 were all fastest at N=5, W=4.
49
  They were all tested with either gcc or clang, all using the -O3 optimization
50
  level. Your mileage may vary.
51
 */
52
53
/* Define N */
54
#ifdef Z_TESTN
55
#  define N Z_TESTN
56
#else
57
650k
#  define N 5
58
#endif
59
#if N < 1 || N > 6
60
#  error N must be in 1..6
61
#endif
62
63
/*
64
  z_crc_t must be at least 32 bits. z_word_t must be at least as long as
65
  z_crc_t. It is assumed here that z_word_t is either 32 bits or 64 bits, and
66
  that bytes are eight bits.
67
 */
68
69
/*
70
  Define W and the associated z_word_t type. If W is not defined, then a
71
  braided calculation is not used, and the associated tables and code are not
72
  compiled.
73
 */
74
#ifdef Z_TESTW
75
#  if Z_TESTW-1 != -1
76
#    define W Z_TESTW
77
#  endif
78
#else
79
#  ifdef MAKECRCH
80
#    define W 8         /* required for MAKECRCH */
81
#  else
82
#    if defined(__x86_64__) || defined(__aarch64__)
83
4.92M
#      define W 8
84
#    else
85
#      define W 4
86
#    endif
87
#  endif
88
#endif
89
#ifdef W
90
#  if W == 8 && defined(Z_U8)
91
     typedef Z_U8 z_word_t;
92
#  elif defined(Z_U4)
93
#    undef W
94
#    define W 4
95
     typedef Z_U4 z_word_t;
96
#  else
97
#    undef W
98
#  endif
99
#endif
100
101
/* If available, use the ARM processor CRC32 instruction. */
102
#if defined(__aarch64__) && defined(__ARM_FEATURE_CRC32) && W == 8
103
#  define ARMCRC32
104
#endif
105
106
/* Local functions. */
107
local z_crc_t multmodp OF((z_crc_t a, z_crc_t b));
108
local z_crc_t x2nmodp OF((z_off64_t n, unsigned k));
109
110
#if defined(W) && (!defined(ARMCRC32) || defined(DYNAMIC_CRC_TABLE))
111
    local z_word_t byte_swap OF((z_word_t word));
112
#endif
113
114
#if defined(W) && !defined(ARMCRC32)
115
    local z_crc_t crc_word OF((z_word_t data));
116
    local z_word_t crc_word_big OF((z_word_t data));
117
#endif
118
119
#if defined(W) && (!defined(ARMCRC32) || defined(DYNAMIC_CRC_TABLE))
120
/*
121
  Swap the bytes in a z_word_t to convert between little and big endian. Any
122
  self-respecting compiler will optimize this to a single machine byte-swap
123
  instruction, if one is available. This assumes that word_t is either 32 bits
124
  or 64 bits.
125
 */
126
local z_word_t byte_swap(word)
127
    z_word_t word;
128
0
{
129
0
#  if W == 8
130
0
    return
131
0
        (word & 0xff00000000000000) >> 56 |
132
0
        (word & 0xff000000000000) >> 40 |
133
0
        (word & 0xff0000000000) >> 24 |
134
0
        (word & 0xff00000000) >> 8 |
135
0
        (word & 0xff000000) << 8 |
136
0
        (word & 0xff0000) << 24 |
137
0
        (word & 0xff00) << 40 |
138
0
        (word & 0xff) << 56;
139
#  else   /* W == 4 */
140
    return
141
        (word & 0xff000000) >> 24 |
142
        (word & 0xff0000) >> 8 |
143
        (word & 0xff00) << 8 |
144
        (word & 0xff) << 24;
145
#  endif
146
0
}
147
#endif
148
149
/* CRC polynomial. */
150
0
#define POLY 0xedb88320         /* p(x) reflected, with x^32 implied */
151
152
#ifdef DYNAMIC_CRC_TABLE
153
154
local z_crc_t FAR crc_table[256];
155
local z_crc_t FAR x2n_table[32];
156
local void make_crc_table OF((void));
157
#ifdef W
158
   local z_word_t FAR crc_big_table[256];
159
   local z_crc_t FAR crc_braid_table[W][256];
160
   local z_word_t FAR crc_braid_big_table[W][256];
161
   local void braid OF((z_crc_t [][256], z_word_t [][256], int, int));
162
#endif
163
#ifdef MAKECRCH
164
   local void write_table OF((FILE *, const z_crc_t FAR *, int));
165
   local void write_table32hi OF((FILE *, const z_word_t FAR *, int));
166
   local void write_table64 OF((FILE *, const z_word_t FAR *, int));
167
#endif /* MAKECRCH */
168
169
/*
170
  Define a once() function depending on the availability of atomics. If this is
171
  compiled with DYNAMIC_CRC_TABLE defined, and if CRCs will be computed in
172
  multiple threads, and if atomics are not available, then get_crc_table() must
173
  be called to initialize the tables and must return before any threads are
174
  allowed to compute or combine CRCs.
175
 */
176
177
/* Definition of once functionality. */
178
typedef struct once_s once_t;
179
local void once OF((once_t *, void (*)(void)));
180
181
/* Check for the availability of atomics. */
182
#if defined(__STDC__) && __STDC_VERSION__ >= 201112L && \
183
    !defined(__STDC_NO_ATOMICS__)
184
185
#include <stdatomic.h>
186
187
/* Structure for once(), which must be initialized with ONCE_INIT. */
188
struct once_s {
189
    atomic_flag begun;
190
    atomic_int done;
191
};
192
#define ONCE_INIT {ATOMIC_FLAG_INIT, 0}
193
194
/*
195
  Run the provided init() function exactly once, even if multiple threads
196
  invoke once() at the same time. The state must be a once_t initialized with
197
  ONCE_INIT.
198
 */
199
local void once(state, init)
200
    once_t *state;
201
    void (*init)(void);
202
{
203
    if (!atomic_load(&state->done)) {
204
        if (atomic_flag_test_and_set(&state->begun))
205
            while (!atomic_load(&state->done))
206
                ;
207
        else {
208
            init();
209
            atomic_store(&state->done, 1);
210
        }
211
    }
212
}
213
214
#else   /* no atomics */
215
216
/* Structure for once(), which must be initialized with ONCE_INIT. */
217
struct once_s {
218
    volatile int begun;
219
    volatile int done;
220
};
221
#define ONCE_INIT {0, 0}
222
223
/* Test and set. Alas, not atomic, but tries to minimize the period of
224
   vulnerability. */
225
local int test_and_set OF((int volatile *));
226
local int test_and_set(flag)
227
    int volatile *flag;
228
{
229
    int was;
230
231
    was = *flag;
232
    *flag = 1;
233
    return was;
234
}
235
236
/* Run the provided init() function once. This is not thread-safe. */
237
local void once(state, init)
238
    once_t *state;
239
    void (*init)(void);
240
{
241
    if (!state->done) {
242
        if (test_and_set(&state->begun))
243
            while (!state->done)
244
                ;
245
        else {
246
            init();
247
            state->done = 1;
248
        }
249
    }
250
}
251
252
#endif
253
254
/* State for once(). */
255
local once_t made = ONCE_INIT;
256
257
/*
258
  Generate tables for a byte-wise 32-bit CRC calculation on the polynomial:
259
  x^32+x^26+x^23+x^22+x^16+x^12+x^11+x^10+x^8+x^7+x^5+x^4+x^2+x+1.
260
261
  Polynomials over GF(2) are represented in binary, one bit per coefficient,
262
  with the lowest powers in the most significant bit. Then adding polynomials
263
  is just exclusive-or, and multiplying a polynomial by x is a right shift by
264
  one. If we call the above polynomial p, and represent a byte as the
265
  polynomial q, also with the lowest power in the most significant bit (so the
266
  byte 0xb1 is the polynomial x^7+x^3+x^2+1), then the CRC is (q*x^32) mod p,
267
  where a mod b means the remainder after dividing a by b.
268
269
  This calculation is done using the shift-register method of multiplying and
270
  taking the remainder. The register is initialized to zero, and for each
271
  incoming bit, x^32 is added mod p to the register if the bit is a one (where
272
  x^32 mod p is p+x^32 = x^26+...+1), and the register is multiplied mod p by x
273
  (which is shifting right by one and adding x^32 mod p if the bit shifted out
274
  is a one). We start with the highest power (least significant bit) of q and
275
  repeat for all eight bits of q.
276
277
  The table is simply the CRC of all possible eight bit values. This is all the
278
  information needed to generate CRCs on data a byte at a time for all
279
  combinations of CRC register values and incoming bytes.
280
 */
281
282
local void make_crc_table()
283
{
284
    unsigned i, j, n;
285
    z_crc_t p;
286
287
    /* initialize the CRC of bytes tables */
288
    for (i = 0; i < 256; i++) {
289
        p = i;
290
        for (j = 0; j < 8; j++)
291
            p = p & 1 ? (p >> 1) ^ POLY : p >> 1;
292
        crc_table[i] = p;
293
#ifdef W
294
        crc_big_table[i] = byte_swap(p);
295
#endif
296
    }
297
298
    /* initialize the x^2^n mod p(x) table */
299
    p = (z_crc_t)1 << 30;         /* x^1 */
300
    x2n_table[0] = p;
301
    for (n = 1; n < 32; n++)
302
        x2n_table[n] = p = multmodp(p, p);
303
304
#ifdef W
305
    /* initialize the braiding tables -- needs x2n_table[] */
306
    braid(crc_braid_table, crc_braid_big_table, N, W);
307
#endif
308
309
#ifdef MAKECRCH
310
    {
311
        /*
312
          The crc32.h header file contains tables for both 32-bit and 64-bit
313
          z_word_t's, and so requires a 64-bit type be available. In that case,
314
          z_word_t must be defined to be 64-bits. This code then also generates
315
          and writes out the tables for the case that z_word_t is 32 bits.
316
         */
317
#if !defined(W) || W != 8
318
#  error Need a 64-bit integer type in order to generate crc32.h.
319
#endif
320
        FILE *out;
321
        int k, n;
322
        z_crc_t ltl[8][256];
323
        z_word_t big[8][256];
324
325
        out = fopen("crc32.h", "w");
326
        if (out == NULL) return;
327
328
        /* write out little-endian CRC table to crc32.h */
329
        fprintf(out,
330
            "/* crc32.h -- tables for rapid CRC calculation\n"
331
            " * Generated automatically by crc32.c\n */\n"
332
            "\n"
333
            "local const z_crc_t FAR crc_table[] = {\n"
334
            "    ");
335
        write_table(out, crc_table, 256);
336
        fprintf(out,
337
            "};\n");
338
339
        /* write out big-endian CRC table for 64-bit z_word_t to crc32.h */
340
        fprintf(out,
341
            "\n"
342
            "#ifdef W\n"
343
            "\n"
344
            "#if W == 8\n"
345
            "\n"
346
            "local const z_word_t FAR crc_big_table[] = {\n"
347
            "    ");
348
        write_table64(out, crc_big_table, 256);
349
        fprintf(out,
350
            "};\n");
351
352
        /* write out big-endian CRC table for 32-bit z_word_t to crc32.h */
353
        fprintf(out,
354
            "\n"
355
            "#else /* W == 4 */\n"
356
            "\n"
357
            "local const z_word_t FAR crc_big_table[] = {\n"
358
            "    ");
359
        write_table32hi(out, crc_big_table, 256);
360
        fprintf(out,
361
            "};\n"
362
            "\n"
363
            "#endif\n");
364
365
        /* write out braid tables for each value of N */
366
        for (n = 1; n <= 6; n++) {
367
            fprintf(out,
368
            "\n"
369
            "#if N == %d\n", n);
370
371
            /* compute braid tables for this N and 64-bit word_t */
372
            braid(ltl, big, n, 8);
373
374
            /* write out braid tables for 64-bit z_word_t to crc32.h */
375
            fprintf(out,
376
            "\n"
377
            "#if W == 8\n"
378
            "\n"
379
            "local const z_crc_t FAR crc_braid_table[][256] = {\n");
380
            for (k = 0; k < 8; k++) {
381
                fprintf(out, "   {");
382
                write_table(out, ltl[k], 256);
383
                fprintf(out, "}%s", k < 7 ? ",\n" : "");
384
            }
385
            fprintf(out,
386
            "};\n"
387
            "\n"
388
            "local const z_word_t FAR crc_braid_big_table[][256] = {\n");
389
            for (k = 0; k < 8; k++) {
390
                fprintf(out, "   {");
391
                write_table64(out, big[k], 256);
392
                fprintf(out, "}%s", k < 7 ? ",\n" : "");
393
            }
394
            fprintf(out,
395
            "};\n");
396
397
            /* compute braid tables for this N and 32-bit word_t */
398
            braid(ltl, big, n, 4);
399
400
            /* write out braid tables for 32-bit z_word_t to crc32.h */
401
            fprintf(out,
402
            "\n"
403
            "#else /* W == 4 */\n"
404
            "\n"
405
            "local const z_crc_t FAR crc_braid_table[][256] = {\n");
406
            for (k = 0; k < 4; k++) {
407
                fprintf(out, "   {");
408
                write_table(out, ltl[k], 256);
409
                fprintf(out, "}%s", k < 3 ? ",\n" : "");
410
            }
411
            fprintf(out,
412
            "};\n"
413
            "\n"
414
            "local const z_word_t FAR crc_braid_big_table[][256] = {\n");
415
            for (k = 0; k < 4; k++) {
416
                fprintf(out, "   {");
417
                write_table32hi(out, big[k], 256);
418
                fprintf(out, "}%s", k < 3 ? ",\n" : "");
419
            }
420
            fprintf(out,
421
            "};\n"
422
            "\n"
423
            "#endif\n"
424
            "\n"
425
            "#endif\n");
426
        }
427
        fprintf(out,
428
            "\n"
429
            "#endif\n");
430
431
        /* write out zeros operator table to crc32.h */
432
        fprintf(out,
433
            "\n"
434
            "local const z_crc_t FAR x2n_table[] = {\n"
435
            "    ");
436
        write_table(out, x2n_table, 32);
437
        fprintf(out,
438
            "};\n");
439
        fclose(out);
440
    }
441
#endif /* MAKECRCH */
442
}
443
444
#ifdef MAKECRCH
445
446
/*
447
   Write the 32-bit values in table[0..k-1] to out, five per line in
448
   hexadecimal separated by commas.
449
 */
450
local void write_table(out, table, k)
451
    FILE *out;
452
    const z_crc_t FAR *table;
453
    int k;
454
{
455
    int n;
456
457
    for (n = 0; n < k; n++)
458
        fprintf(out, "%s0x%08lx%s", n == 0 || n % 5 ? "" : "    ",
459
                (unsigned long)(table[n]),
460
                n == k - 1 ? "" : (n % 5 == 4 ? ",\n" : ", "));
461
}
462
463
/*
464
   Write the high 32-bits of each value in table[0..k-1] to out, five per line
465
   in hexadecimal separated by commas.
466
 */
467
local void write_table32hi(out, table, k)
468
FILE *out;
469
const z_word_t FAR *table;
470
int k;
471
{
472
    int n;
473
474
    for (n = 0; n < k; n++)
475
        fprintf(out, "%s0x%08lx%s", n == 0 || n % 5 ? "" : "    ",
476
                (unsigned long)(table[n] >> 32),
477
                n == k - 1 ? "" : (n % 5 == 4 ? ",\n" : ", "));
478
}
479
480
/*
481
  Write the 64-bit values in table[0..k-1] to out, three per line in
482
  hexadecimal separated by commas. This assumes that if there is a 64-bit
483
  type, then there is also a long long integer type, and it is at least 64
484
  bits. If not, then the type cast and format string can be adjusted
485
  accordingly.
486
 */
487
local void write_table64(out, table, k)
488
    FILE *out;
489
    const z_word_t FAR *table;
490
    int k;
491
{
492
    int n;
493
494
    for (n = 0; n < k; n++)
495
        fprintf(out, "%s0x%016llx%s", n == 0 || n % 3 ? "" : "    ",
496
                (unsigned long long)(table[n]),
497
                n == k - 1 ? "" : (n % 3 == 2 ? ",\n" : ", "));
498
}
499
500
/* Actually do the deed. */
501
int main()
502
{
503
    make_crc_table();
504
    return 0;
505
}
506
507
#endif /* MAKECRCH */
508
509
#ifdef W
510
/*
511
  Generate the little and big-endian braid tables for the given n and z_word_t
512
  size w. Each array must have room for w blocks of 256 elements.
513
 */
514
local void braid(ltl, big, n, w)
515
    z_crc_t ltl[][256];
516
    z_word_t big[][256];
517
    int n;
518
    int w;
519
{
520
    int k;
521
    z_crc_t i, p, q;
522
    for (k = 0; k < w; k++) {
523
        p = x2nmodp((n * w + 3 - k) << 3, 0);
524
        ltl[k][0] = 0;
525
        big[w - 1 - k][0] = 0;
526
        for (i = 1; i < 256; i++) {
527
            ltl[k][i] = q = multmodp(i << 24, p);
528
            big[w - 1 - k][i] = byte_swap(q);
529
        }
530
    }
531
}
532
#endif
533
534
#else /* !DYNAMIC_CRC_TABLE */
535
/* ========================================================================
536
 * Tables for byte-wise and braided CRC-32 calculations, and a table of powers
537
 * of x for combining CRC-32s, all made by make_crc_table().
538
 */
539
#include "crc32.h"
540
#endif /* DYNAMIC_CRC_TABLE */
541
542
/* ========================================================================
543
 * Routines used for CRC calculation. Some are also required for the table
544
 * generation above.
545
 */
546
547
/*
548
  Return a(x) multiplied by b(x) modulo p(x), where p(x) is the CRC polynomial,
549
  reflected. For speed, this requires that a not be zero.
550
 */
551
local z_crc_t multmodp(a, b)
552
    z_crc_t a;
553
    z_crc_t b;
554
0
{
555
0
    z_crc_t m, p;
556
557
0
    m = (z_crc_t)1 << 31;
558
0
    p = 0;
559
0
    for (;;) {
560
0
        if (a & m) {
561
0
            p ^= b;
562
0
            if ((a & (m - 1)) == 0)
563
0
                break;
564
0
        }
565
0
        m >>= 1;
566
0
        b = b & 1 ? (b >> 1) ^ POLY : b >> 1;
567
0
    }
568
0
    return p;
569
0
}
570
571
/*
572
  Return x^(n * 2^k) modulo p(x). Requires that x2n_table[] has been
573
  initialized.
574
 */
575
local z_crc_t x2nmodp(n, k)
576
    z_off64_t n;
577
    unsigned k;
578
0
{
579
0
    z_crc_t p;
580
581
0
    p = (z_crc_t)1 << 31;           /* x^0 == 1 */
582
0
    while (n) {
583
0
        if (n & 1)
584
0
            p = multmodp(x2n_table[k & 31], p);
585
0
        n >>= 1;
586
0
        k++;
587
0
    }
588
0
    return p;
589
0
}
590
591
/* =========================================================================
592
 * This function can be used by asm versions of crc32(), and to force the
593
 * generation of the CRC tables in a threaded application.
594
 */
595
const z_crc_t FAR * ZEXPORT get_crc_table()
596
0
{
597
#ifdef DYNAMIC_CRC_TABLE
598
    once(&made, make_crc_table);
599
#endif /* DYNAMIC_CRC_TABLE */
600
0
    return (const z_crc_t FAR *)crc_table;
601
0
}
602
603
/* =========================================================================
604
 * Use ARM machine instructions if available. This will compute the CRC about
605
 * ten times faster than the braided calculation. This code does not check for
606
 * the presence of the CRC instruction at run time. __ARM_FEATURE_CRC32 will
607
 * only be defined if the compilation specifies an ARM processor architecture
608
 * that has the instructions. For example, compiling with -march=armv8.1-a or
609
 * -march=armv8-a+crc, or -march=native if the compile machine has the crc32
610
 * instructions.
611
 */
612
#ifdef ARMCRC32
613
614
/*
615
   Constants empirically determined to maximize speed. These values are from
616
   measurements on a Cortex-A57. Your mileage may vary.
617
 */
618
#define Z_BATCH 3990                /* number of words in a batch */
619
#define Z_BATCH_ZEROS 0xa10d3d0c    /* computed from Z_BATCH = 3990 */
620
#define Z_BATCH_MIN 800             /* fewest words in a final batch */
621
622
unsigned long ZEXPORT crc32_z(crc, buf, len)
623
    unsigned long crc;
624
    const unsigned char FAR *buf;
625
    z_size_t len;
626
{
627
    z_crc_t val;
628
    z_word_t crc1, crc2;
629
    const z_word_t *word;
630
    z_word_t val0, val1, val2;
631
    z_size_t last, last2, i;
632
    z_size_t num;
633
634
    /* Return initial CRC, if requested. */
635
    if (buf == Z_NULL) return 0;
636
637
#ifdef DYNAMIC_CRC_TABLE
638
    once(&made, make_crc_table);
639
#endif /* DYNAMIC_CRC_TABLE */
640
641
    /* Pre-condition the CRC */
642
    crc = (~crc) & 0xffffffff;
643
644
    /* Compute the CRC up to a word boundary. */
645
    while (len && ((z_size_t)buf & 7) != 0) {
646
        len--;
647
        val = *buf++;
648
        __asm__ volatile("crc32b %w0, %w0, %w1" : "+r"(crc) : "r"(val));
649
    }
650
651
    /* Prepare to compute the CRC on full 64-bit words word[0..num-1]. */
652
    word = (z_word_t const *)buf;
653
    num = len >> 3;
654
    len &= 7;
655
656
    /* Do three interleaved CRCs to realize the throughput of one crc32x
657
       instruction per cycle. Each CRC is calculated on Z_BATCH words. The
658
       three CRCs are combined into a single CRC after each set of batches. */
659
    while (num >= 3 * Z_BATCH) {
660
        crc1 = 0;
661
        crc2 = 0;
662
        for (i = 0; i < Z_BATCH; i++) {
663
            val0 = word[i];
664
            val1 = word[i + Z_BATCH];
665
            val2 = word[i + 2 * Z_BATCH];
666
            __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc) : "r"(val0));
667
            __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc1) : "r"(val1));
668
            __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc2) : "r"(val2));
669
        }
670
        word += 3 * Z_BATCH;
671
        num -= 3 * Z_BATCH;
672
        crc = multmodp(Z_BATCH_ZEROS, crc) ^ crc1;
673
        crc = multmodp(Z_BATCH_ZEROS, crc) ^ crc2;
674
    }
675
676
    /* Do one last smaller batch with the remaining words, if there are enough
677
       to pay for the combination of CRCs. */
678
    last = num / 3;
679
    if (last >= Z_BATCH_MIN) {
680
        last2 = last << 1;
681
        crc1 = 0;
682
        crc2 = 0;
683
        for (i = 0; i < last; i++) {
684
            val0 = word[i];
685
            val1 = word[i + last];
686
            val2 = word[i + last2];
687
            __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc) : "r"(val0));
688
            __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc1) : "r"(val1));
689
            __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc2) : "r"(val2));
690
        }
691
        word += 3 * last;
692
        num -= 3 * last;
693
        val = x2nmodp(last, 6);
694
        crc = multmodp(val, crc) ^ crc1;
695
        crc = multmodp(val, crc) ^ crc2;
696
    }
697
698
    /* Compute the CRC on any remaining words. */
699
    for (i = 0; i < num; i++) {
700
        val0 = word[i];
701
        __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc) : "r"(val0));
702
    }
703
    word += num;
704
705
    /* Complete the CRC on any remaining bytes. */
706
    buf = (const unsigned char FAR *)word;
707
    while (len) {
708
        len--;
709
        val = *buf++;
710
        __asm__ volatile("crc32b %w0, %w0, %w1" : "+r"(crc) : "r"(val));
711
    }
712
713
    /* Return the CRC, post-conditioned. */
714
    return crc ^ 0xffffffff;
715
}
716
717
#else
718
719
#ifdef W
720
721
/*
722
  Return the CRC of the W bytes in the word_t data, taking the
723
  least-significant byte of the word as the first byte of data, without any pre
724
  or post conditioning. This is used to combine the CRCs of each braid.
725
 */
726
local z_crc_t crc_word(data)
727
    z_word_t data;
728
32.9k
{
729
32.9k
    int k;
730
296k
    for (k = 0; k < W; k++)
731
263k
        data = (data >> 8) ^ crc_table[data & 0xff];
732
32.9k
    return (z_crc_t)data;
733
32.9k
}
734
735
local z_word_t crc_word_big(data)
736
    z_word_t data;
737
0
{
738
0
    int k;
739
0
    for (k = 0; k < W; k++)
740
0
        data = (data << 8) ^
741
0
            crc_big_table[(data >> ((W - 1) << 3)) & 0xff];
742
0
    return data;
743
0
}
744
745
#endif
746
747
/* ========================================================================= */
748
unsigned long ZEXPORT crc32_z(crc, buf, len)
749
    unsigned long crc;
750
    const unsigned char FAR *buf;
751
    z_size_t len;
752
114k
{
753
    /* Return initial CRC, if requested. */
754
114k
    if (buf == Z_NULL) return 0;
755
756
#ifdef DYNAMIC_CRC_TABLE
757
    once(&made, make_crc_table);
758
#endif /* DYNAMIC_CRC_TABLE */
759
760
    /* Pre-condition the CRC */
761
74.5k
    crc = (~crc) & 0xffffffff;
762
763
74.5k
#ifdef W
764
765
    /* If provided enough bytes, do a braided CRC calculation. */
766
74.5k
    if (len >= N * W + W - 1) {
767
6.58k
        z_size_t blks;
768
6.58k
        z_word_t const *words;
769
6.58k
        unsigned endian;
770
6.58k
        int k;
771
772
        /* Compute the CRC up to a z_word_t boundary. */
773
16.2k
        while (len && ((z_size_t)buf & (W - 1)) != 0) {
774
9.64k
            len--;
775
9.64k
            crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
776
9.64k
        }
777
778
        /* Compute the CRC on as many N z_word_t blocks as are available. */
779
6.58k
        blks = len / (N * W);
780
6.58k
        len -= blks * N * W;
781
6.58k
        words = (z_word_t const *)buf;
782
783
        /* Do endian check at execution time instead of compile time, since ARM
784
           processors can change the endianess at execution time. If the
785
           compiler knows what the endianess will be, it can optimize out the
786
           check and the unused branch. */
787
6.58k
        endian = 1;
788
6.58k
        if (*(unsigned char *)&endian) {
789
            /* Little endian. */
790
791
6.58k
            z_crc_t crc0;
792
6.58k
            z_word_t word0;
793
6.58k
#if N > 1
794
6.58k
            z_crc_t crc1;
795
6.58k
            z_word_t word1;
796
6.58k
#if N > 2
797
6.58k
            z_crc_t crc2;
798
6.58k
            z_word_t word2;
799
6.58k
#if N > 3
800
6.58k
            z_crc_t crc3;
801
6.58k
            z_word_t word3;
802
6.58k
#if N > 4
803
6.58k
            z_crc_t crc4;
804
6.58k
            z_word_t word4;
805
#if N > 5
806
            z_crc_t crc5;
807
            z_word_t word5;
808
#endif
809
6.58k
#endif
810
6.58k
#endif
811
6.58k
#endif
812
6.58k
#endif
813
814
            /* Initialize the CRC for each braid. */
815
6.58k
            crc0 = crc;
816
6.58k
#if N > 1
817
6.58k
            crc1 = 0;
818
6.58k
#if N > 2
819
6.58k
            crc2 = 0;
820
6.58k
#if N > 3
821
6.58k
            crc3 = 0;
822
6.58k
#if N > 4
823
6.58k
            crc4 = 0;
824
#if N > 5
825
            crc5 = 0;
826
#endif
827
6.58k
#endif
828
6.58k
#endif
829
6.58k
#endif
830
6.58k
#endif
831
832
            /*
833
              Process the first blks-1 blocks, computing the CRCs on each braid
834
              independently.
835
             */
836
562k
            while (--blks) {
837
                /* Load the word for each braid into registers. */
838
555k
                word0 = crc0 ^ words[0];
839
555k
#if N > 1
840
555k
                word1 = crc1 ^ words[1];
841
555k
#if N > 2
842
555k
                word2 = crc2 ^ words[2];
843
555k
#if N > 3
844
555k
                word3 = crc3 ^ words[3];
845
555k
#if N > 4
846
555k
                word4 = crc4 ^ words[4];
847
#if N > 5
848
                word5 = crc5 ^ words[5];
849
#endif
850
555k
#endif
851
555k
#endif
852
555k
#endif
853
555k
#endif
854
555k
                words += N;
855
856
                /* Compute and update the CRC for each word. The loop should
857
                   get unrolled. */
858
555k
                crc0 = crc_braid_table[0][word0 & 0xff];
859
555k
#if N > 1
860
555k
                crc1 = crc_braid_table[0][word1 & 0xff];
861
555k
#if N > 2
862
555k
                crc2 = crc_braid_table[0][word2 & 0xff];
863
555k
#if N > 3
864
555k
                crc3 = crc_braid_table[0][word3 & 0xff];
865
555k
#if N > 4
866
555k
                crc4 = crc_braid_table[0][word4 & 0xff];
867
#if N > 5
868
                crc5 = crc_braid_table[0][word5 & 0xff];
869
#endif
870
555k
#endif
871
555k
#endif
872
555k
#endif
873
555k
#endif
874
4.44M
                for (k = 1; k < W; k++) {
875
3.89M
                    crc0 ^= crc_braid_table[k][(word0 >> (k << 3)) & 0xff];
876
3.89M
#if N > 1
877
3.89M
                    crc1 ^= crc_braid_table[k][(word1 >> (k << 3)) & 0xff];
878
3.89M
#if N > 2
879
3.89M
                    crc2 ^= crc_braid_table[k][(word2 >> (k << 3)) & 0xff];
880
3.89M
#if N > 3
881
3.89M
                    crc3 ^= crc_braid_table[k][(word3 >> (k << 3)) & 0xff];
882
3.89M
#if N > 4
883
3.89M
                    crc4 ^= crc_braid_table[k][(word4 >> (k << 3)) & 0xff];
884
#if N > 5
885
                    crc5 ^= crc_braid_table[k][(word5 >> (k << 3)) & 0xff];
886
#endif
887
3.89M
#endif
888
3.89M
#endif
889
3.89M
#endif
890
3.89M
#endif
891
3.89M
                }
892
555k
            }
893
894
            /*
895
              Process the last block, combining the CRCs of the N braids at the
896
              same time.
897
             */
898
6.58k
            crc = crc_word(crc0 ^ words[0]);
899
6.58k
#if N > 1
900
6.58k
            crc = crc_word(crc1 ^ words[1] ^ crc);
901
6.58k
#if N > 2
902
6.58k
            crc = crc_word(crc2 ^ words[2] ^ crc);
903
6.58k
#if N > 3
904
6.58k
            crc = crc_word(crc3 ^ words[3] ^ crc);
905
6.58k
#if N > 4
906
6.58k
            crc = crc_word(crc4 ^ words[4] ^ crc);
907
#if N > 5
908
            crc = crc_word(crc5 ^ words[5] ^ crc);
909
#endif
910
6.58k
#endif
911
6.58k
#endif
912
6.58k
#endif
913
6.58k
#endif
914
6.58k
            words += N;
915
6.58k
        }
916
0
        else {
917
            /* Big endian. */
918
919
0
            z_word_t crc0, word0, comb;
920
0
#if N > 1
921
0
            z_word_t crc1, word1;
922
0
#if N > 2
923
0
            z_word_t crc2, word2;
924
0
#if N > 3
925
0
            z_word_t crc3, word3;
926
0
#if N > 4
927
0
            z_word_t crc4, word4;
928
#if N > 5
929
            z_word_t crc5, word5;
930
#endif
931
0
#endif
932
0
#endif
933
0
#endif
934
0
#endif
935
936
            /* Initialize the CRC for each braid. */
937
0
            crc0 = byte_swap(crc);
938
0
#if N > 1
939
0
            crc1 = 0;
940
0
#if N > 2
941
0
            crc2 = 0;
942
0
#if N > 3
943
0
            crc3 = 0;
944
0
#if N > 4
945
0
            crc4 = 0;
946
#if N > 5
947
            crc5 = 0;
948
#endif
949
0
#endif
950
0
#endif
951
0
#endif
952
0
#endif
953
954
            /*
955
              Process the first blks-1 blocks, computing the CRCs on each braid
956
              independently.
957
             */
958
0
            while (--blks) {
959
                /* Load the word for each braid into registers. */
960
0
                word0 = crc0 ^ words[0];
961
0
#if N > 1
962
0
                word1 = crc1 ^ words[1];
963
0
#if N > 2
964
0
                word2 = crc2 ^ words[2];
965
0
#if N > 3
966
0
                word3 = crc3 ^ words[3];
967
0
#if N > 4
968
0
                word4 = crc4 ^ words[4];
969
#if N > 5
970
                word5 = crc5 ^ words[5];
971
#endif
972
0
#endif
973
0
#endif
974
0
#endif
975
0
#endif
976
0
                words += N;
977
978
                /* Compute and update the CRC for each word. The loop should
979
                   get unrolled. */
980
0
                crc0 = crc_braid_big_table[0][word0 & 0xff];
981
0
#if N > 1
982
0
                crc1 = crc_braid_big_table[0][word1 & 0xff];
983
0
#if N > 2
984
0
                crc2 = crc_braid_big_table[0][word2 & 0xff];
985
0
#if N > 3
986
0
                crc3 = crc_braid_big_table[0][word3 & 0xff];
987
0
#if N > 4
988
0
                crc4 = crc_braid_big_table[0][word4 & 0xff];
989
#if N > 5
990
                crc5 = crc_braid_big_table[0][word5 & 0xff];
991
#endif
992
0
#endif
993
0
#endif
994
0
#endif
995
0
#endif
996
0
                for (k = 1; k < W; k++) {
997
0
                    crc0 ^= crc_braid_big_table[k][(word0 >> (k << 3)) & 0xff];
998
0
#if N > 1
999
0
                    crc1 ^= crc_braid_big_table[k][(word1 >> (k << 3)) & 0xff];
1000
0
#if N > 2
1001
0
                    crc2 ^= crc_braid_big_table[k][(word2 >> (k << 3)) & 0xff];
1002
0
#if N > 3
1003
0
                    crc3 ^= crc_braid_big_table[k][(word3 >> (k << 3)) & 0xff];
1004
0
#if N > 4
1005
0
                    crc4 ^= crc_braid_big_table[k][(word4 >> (k << 3)) & 0xff];
1006
#if N > 5
1007
                    crc5 ^= crc_braid_big_table[k][(word5 >> (k << 3)) & 0xff];
1008
#endif
1009
0
#endif
1010
0
#endif
1011
0
#endif
1012
0
#endif
1013
0
                }
1014
0
            }
1015
1016
            /*
1017
              Process the last block, combining the CRCs of the N braids at the
1018
              same time.
1019
             */
1020
0
            comb = crc_word_big(crc0 ^ words[0]);
1021
0
#if N > 1
1022
0
            comb = crc_word_big(crc1 ^ words[1] ^ comb);
1023
0
#if N > 2
1024
0
            comb = crc_word_big(crc2 ^ words[2] ^ comb);
1025
0
#if N > 3
1026
0
            comb = crc_word_big(crc3 ^ words[3] ^ comb);
1027
0
#if N > 4
1028
0
            comb = crc_word_big(crc4 ^ words[4] ^ comb);
1029
#if N > 5
1030
            comb = crc_word_big(crc5 ^ words[5] ^ comb);
1031
#endif
1032
0
#endif
1033
0
#endif
1034
0
#endif
1035
0
#endif
1036
0
            words += N;
1037
0
            crc = byte_swap(comb);
1038
0
        }
1039
1040
        /*
1041
          Update the pointer to the remaining bytes to process.
1042
         */
1043
6.58k
        buf = (unsigned char const *)words;
1044
6.58k
    }
1045
1046
74.5k
#endif /* W */
1047
1048
    /* Complete the computation of the CRC on any remaining bytes. */
1049
123k
    while (len >= 8) {
1050
48.7k
        len -= 8;
1051
48.7k
        crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1052
48.7k
        crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1053
48.7k
        crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1054
48.7k
        crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1055
48.7k
        crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1056
48.7k
        crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1057
48.7k
        crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1058
48.7k
        crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1059
48.7k
    }
1060
364k
    while (len) {
1061
289k
        len--;
1062
289k
        crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1063
289k
    }
1064
1065
    /* Return the CRC, post-conditioned. */
1066
74.5k
    return crc ^ 0xffffffff;
1067
114k
}
1068
1069
#endif
1070
1071
/* ========================================================================= */
1072
unsigned long ZEXPORT crc32(crc, buf, len)
1073
    unsigned long crc;
1074
    const unsigned char FAR *buf;
1075
    uInt len;
1076
114k
{
1077
114k
    return crc32_z(crc, buf, len);
1078
114k
}
1079
1080
/* ========================================================================= */
1081
uLong ZEXPORT crc32_combine64(crc1, crc2, len2)
1082
    uLong crc1;
1083
    uLong crc2;
1084
    z_off64_t len2;
1085
0
{
1086
#ifdef DYNAMIC_CRC_TABLE
1087
    once(&made, make_crc_table);
1088
#endif /* DYNAMIC_CRC_TABLE */
1089
0
    return multmodp(x2nmodp(len2, 3), crc1) ^ (crc2 & 0xffffffff);
1090
0
}
1091
1092
/* ========================================================================= */
1093
uLong ZEXPORT crc32_combine(crc1, crc2, len2)
1094
    uLong crc1;
1095
    uLong crc2;
1096
    z_off_t len2;
1097
0
{
1098
0
    return crc32_combine64(crc1, crc2, (z_off64_t)len2);
1099
0
}
1100
1101
/* ========================================================================= */
1102
uLong ZEXPORT crc32_combine_gen64(len2)
1103
    z_off64_t len2;
1104
0
{
1105
#ifdef DYNAMIC_CRC_TABLE
1106
    once(&made, make_crc_table);
1107
#endif /* DYNAMIC_CRC_TABLE */
1108
0
    return x2nmodp(len2, 3);
1109
0
}
1110
1111
/* ========================================================================= */
1112
uLong ZEXPORT crc32_combine_gen(len2)
1113
    z_off_t len2;
1114
0
{
1115
0
    return crc32_combine_gen64((z_off64_t)len2);
1116
0
}
1117
1118
/* ========================================================================= */
1119
uLong ZEXPORT crc32_combine_op(crc1, crc2, op)
1120
    uLong crc1;
1121
    uLong crc2;
1122
    uLong op;
1123
0
{
1124
0
    return multmodp(op, crc1) ^ (crc2 & 0xffffffff);
1125
0
}