Coverage Report

Created: 2023-06-07 06:09

/src/varnish-cache/lib/libvgz/crc32.c
Line
Count
Source (jump to first uncovered line)
1
/* crc32.c -- compute the CRC-32 of a data stream
2
 * Copyright (C) 1995-2022 Mark Adler
3
 * For conditions of distribution and use, see copyright notice in zlib.h
4
 *
5
 * This interleaved implementation of a CRC makes use of pipelined multiple
6
 * arithmetic-logic units, commonly found in modern CPU cores. It is due to
7
 * Kadatch and Jenkins (2010). See doc/crc-doc.1.0.pdf in this distribution.
8
 */
9
10
/* @(#) $Id$ */
11
12
/*
13
  Note on the use of DYNAMIC_CRC_TABLE: there is no mutex or semaphore
14
  protection on the static variables used to control the first-use generation
15
  of the crc tables. Therefore, if you #define DYNAMIC_CRC_TABLE, you should
16
  first call get_crc_table() to initialize the tables before allowing more than
17
  one thread to use crc32().
18
19
  MAKECRCH can be #defined to write out crc32.h. A main() routine is also
20
  produced, so that this one source file can be compiled to an executable.
21
 */
22
23
#ifdef MAKECRCH
24
#  include <stdio.h>
25
#  ifndef DYNAMIC_CRC_TABLE
26
#    define DYNAMIC_CRC_TABLE
27
#  endif /* !DYNAMIC_CRC_TABLE */
28
#endif /* MAKECRCH */
29
30
#include "zutil.h"      /* for Z_U4, Z_U8, z_crc_t, and FAR definitions */
31
32
 /*
33
  A CRC of a message is computed on N braids of words in the message, where
34
  each word consists of W bytes (4 or 8). If N is 3, for example, then three
35
  running sparse CRCs are calculated respectively on each braid, at these
36
  indices in the array of words: 0, 3, 6, ..., 1, 4, 7, ..., and 2, 5, 8, ...
37
  This is done starting at a word boundary, and continues until as many blocks
38
  of N * W bytes as are available have been processed. The results are combined
39
  into a single CRC at the end. For this code, N must be in the range 1..6 and
40
  W must be 4 or 8. The upper limit on N can be increased if desired by adding
41
  more #if blocks, extending the patterns apparent in the code. In addition,
42
  crc32.h would need to be regenerated, if the maximum N value is increased.
43
44
  N and W are chosen empirically by benchmarking the execution time on a given
45
  processor. The choices for N and W below were based on testing on Intel Kaby
46
  Lake i7, AMD Ryzen 7, ARM Cortex-A57, Sparc64-VII, PowerPC POWER9, and MIPS64
47
  Octeon II processors. The Intel, AMD, and ARM processors were all fastest
48
  with N=5, W=8. The Sparc, PowerPC, and MIPS64 were all fastest at N=5, W=4.
49
  They were all tested with either gcc or clang, all using the -O3 optimization
50
  level. Your mileage may vary.
51
 */
52
53
/* Define N */
54
#ifdef Z_TESTN
55
#  define N Z_TESTN
56
#else
57
3.39M
#  define N 5
58
#endif
59
#if N < 1 || N > 6
60
#  error N must be in 1..6
61
#endif
62
63
/*
64
  z_crc_t must be at least 32 bits. z_word_t must be at least as long as
65
  z_crc_t. It is assumed here that z_word_t is either 32 bits or 64 bits, and
66
  that bytes are eight bits.
67
 */
68
69
/*
70
  Define W and the associated z_word_t type. If W is not defined, then a
71
  braided calculation is not used, and the associated tables and code are not
72
  compiled.
73
 */
74
#ifdef Z_TESTW
75
#  if Z_TESTW-1 != -1
76
#    define W Z_TESTW
77
#  endif
78
#else
79
#  ifdef MAKECRCH
80
#    define W 8         /* required for MAKECRCH */
81
#  else
82
#    if defined(__x86_64__) || defined(__aarch64__)
83
13.2M
#      define W 8
84
#    else
85
#      define W 4
86
#    endif
87
#  endif
88
#endif
89
#ifdef W
90
#  if W == 8 && defined(Z_U8)
91
     typedef Z_U8 z_word_t;
92
#  elif defined(Z_U4)
93
#    undef W
94
#    define W 4
95
     typedef Z_U4 z_word_t;
96
#  else
97
#    undef W
98
#  endif
99
#endif
100
101
/* If available, use the ARM processor CRC32 instruction. */
102
#if defined(__aarch64__) && defined(__ARM_FEATURE_CRC32) && W == 8
103
#  define ARMCRC32
104
#endif
105
106
/* Local functions. */
107
local z_crc_t multmodp OF((z_crc_t a, z_crc_t b));
108
local z_crc_t x2nmodp OF((z_off64_t n, unsigned k));
109
110
#if defined(W) && (!defined(ARMCRC32) || defined(DYNAMIC_CRC_TABLE))
111
   local z_word_t byte_swap OF((z_word_t word));
112
#endif
113
114
#if defined(W) && !defined(ARMCRC32)
115
    local z_crc_t crc_word OF((z_word_t data));
116
    local z_word_t crc_word_big OF((z_word_t data));
117
#endif
118
119
#if defined(W) && (!defined(ARMCRC32) || defined(DYNAMIC_CRC_TABLE))
120
/*
121
  Swap the bytes in a z_word_t to convert between little and big endian. Any
122
  self-respecting compiler will optimize this to a single machine byte-swap
123
  instruction, if one is available. This assumes that word_t is either 32 bits
124
  or 64 bits.
125
 */
126
local z_word_t byte_swap OF((z_word_t word));
127
128
local z_word_t byte_swap(word)
129
    z_word_t word;
130
0
{
131
0
#  if W == 8
132
0
    return
133
0
        (word & 0xff00000000000000) >> 56 |
134
0
        (word & 0xff000000000000) >> 40 |
135
0
        (word & 0xff0000000000) >> 24 |
136
0
        (word & 0xff00000000) >> 8 |
137
0
        (word & 0xff000000) << 8 |
138
0
        (word & 0xff0000) << 24 |
139
0
        (word & 0xff00) << 40 |
140
0
        (word & 0xff) << 56;
141
#  else   /* W == 4 */
142
    return
143
        (word & 0xff000000) >> 24 |
144
        (word & 0xff0000) >> 8 |
145
        (word & 0xff00) << 8 |
146
        (word & 0xff) << 24;
147
#  endif
148
0
}
149
#endif
150
151
/* CRC polynomial. */
152
7.79k
#define POLY 0xedb88320         /* p(x) reflected, with x^32 implied */
153
154
#ifdef DYNAMIC_CRC_TABLE
155
156
local z_crc_t FAR crc_table[256];
157
local z_crc_t FAR x2n_table[32];
158
local void make_crc_table OF((void));
159
#ifdef W
160
   local z_word_t FAR crc_big_table[256];
161
   local z_crc_t FAR crc_braid_table[W][256];
162
   local z_word_t FAR crc_braid_big_table[W][256];
163
   local void braid OF((z_crc_t [][256], z_word_t [][256], int, int));
164
#endif
165
#ifdef MAKECRCH
166
   local void write_table OF((FILE *, const z_crc_t FAR *, int));
167
   local void write_table32hi OF((FILE *, const z_word_t FAR *, int));
168
   local void write_table64 OF((FILE *, const z_word_t FAR *, int));
169
#endif /* MAKECRCH */
170
171
/*
172
  Define a once() function depending on the availability of atomics. If this is
173
  compiled with DYNAMIC_CRC_TABLE defined, and if CRCs will be computed in
174
  multiple threads, and if atomics are not available, then get_crc_table() must
175
  be called to initialize the tables and must return before any threads are
176
  allowed to compute or combine CRCs.
177
 */
178
179
/* Definition of once functionality. */
180
typedef struct once_s once_t;
181
local void once OF((once_t *, void (*)(void)));
182
183
/* Check for the availability of atomics. */
184
#if defined(__STDC__) && __STDC_VERSION__ >= 201112L && \
185
    !defined(__STDC_NO_ATOMICS__)
186
187
#include <stdatomic.h>
188
189
/* Structure for once(), which must be initialized with ONCE_INIT. */
190
struct once_s {
191
    atomic_flag begun;
192
    atomic_int done;
193
};
194
#define ONCE_INIT {ATOMIC_FLAG_INIT, 0}
195
196
/*
197
  Run the provided init() function exactly once, even if multiple threads
198
  invoke once() at the same time. The state must be a once_t initialized with
199
  ONCE_INIT.
200
 */
201
local void once(state, init)
202
    once_t *state;
203
    void (*init)(void);
204
{
205
    if (!atomic_load(&state->done)) {
206
        if (atomic_flag_test_and_set(&state->begun))
207
            while (!atomic_load(&state->done))
208
                ;
209
        else {
210
            init();
211
            atomic_store(&state->done, 1);
212
        }
213
    }
214
}
215
216
#else   /* no atomics */
217
218
/* Structure for once(), which must be initialized with ONCE_INIT. */
219
struct once_s {
220
    volatile int begun;
221
    volatile int done;
222
};
223
#define ONCE_INIT {0, 0}
224
225
/* Test and set. Alas, not atomic, but tries to minimize the period of
226
   vulnerability. */
227
local int test_and_set OF((int volatile *));
228
local int test_and_set(flag)
229
    int volatile *flag;
230
{
231
    int was;
232
233
    was = *flag;
234
    *flag = 1;
235
    return was;
236
}
237
238
/* Run the provided init() function once. This is not thread-safe. */
239
local void once(state, init)
240
    once_t *state;
241
    void (*init)(void);
242
{
243
    if (!state->done) {
244
        if (test_and_set(&state->begun))
245
            while (!state->done)
246
                ;
247
        else {
248
            init();
249
            state->done = 1;
250
        }
251
    }
252
}
253
254
#endif
255
256
/* State for once(). */
257
local once_t made = ONCE_INIT;
258
259
/*
260
  Generate tables for a byte-wise 32-bit CRC calculation on the polynomial:
261
  x^32+x^26+x^23+x^22+x^16+x^12+x^11+x^10+x^8+x^7+x^5+x^4+x^2+x+1.
262
263
  Polynomials over GF(2) are represented in binary, one bit per coefficient,
264
  with the lowest powers in the most significant bit. Then adding polynomials
265
  is just exclusive-or, and multiplying a polynomial by x is a right shift by
266
  one. If we call the above polynomial p, and represent a byte as the
267
  polynomial q, also with the lowest power in the most significant bit (so the
268
  byte 0xb1 is the polynomial x^7+x^3+x^2+1), then the CRC is (q*x^32) mod p,
269
  where a mod b means the remainder after dividing a by b.
270
271
  This calculation is done using the shift-register method of multiplying and
272
  taking the remainder. The register is initialized to zero, and for each
273
  incoming bit, x^32 is added mod p to the register if the bit is a one (where
274
  x^32 mod p is p+x^32 = x^26+...+1), and the register is multiplied mod p by x
275
  (which is shifting right by one and adding x^32 mod p if the bit shifted out
276
  is a one). We start with the highest power (least significant bit) of q and
277
  repeat for all eight bits of q.
278
279
  The table is simply the CRC of all possible eight bit values. This is all the
280
  information needed to generate CRCs on data a byte at a time for all
281
  combinations of CRC register values and incoming bytes.
282
 */
283
284
local void make_crc_table()
285
{
286
    unsigned i, j, n;
287
    z_crc_t p;
288
289
    /* initialize the CRC of bytes tables */
290
    for (i = 0; i < 256; i++) {
291
        p = i;
292
        for (j = 0; j < 8; j++)
293
            p = p & 1 ? (p >> 1) ^ POLY : p >> 1;
294
        crc_table[i] = p;
295
#ifdef W
296
        crc_big_table[i] = byte_swap(p);
297
#endif
298
    }
299
300
    /* initialize the x^2^n mod p(x) table */
301
    p = (z_crc_t)1 << 30;         /* x^1 */
302
    x2n_table[0] = p;
303
    for (n = 1; n < 32; n++)
304
        x2n_table[n] = p = multmodp(p, p);
305
306
#ifdef W
307
    /* initialize the braiding tables -- needs x2n_table[] */
308
    braid(crc_braid_table, crc_braid_big_table, N, W);
309
#endif
310
311
#ifdef MAKECRCH
312
    {
313
        /*
314
          The crc32.h header file contains tables for both 32-bit and 64-bit
315
          z_word_t's, and so requires a 64-bit type be available. In that case,
316
          z_word_t must be defined to be 64-bits. This code then also generates
317
          and writes out the tables for the case that z_word_t is 32 bits.
318
         */
319
#if !defined(W) || W != 8
320
#  error Need a 64-bit integer type in order to generate crc32.h.
321
#endif
322
        FILE *out;
323
        int k, n;
324
        z_crc_t ltl[8][256];
325
        z_word_t big[8][256];
326
327
        out = fopen("crc32.h", "w");
328
        if (out == NULL) return;
329
330
        /* write out little-endian CRC table to crc32.h */
331
        fprintf(out,
332
            "/* crc32.h -- tables for rapid CRC calculation\n"
333
            " * Generated automatically by crc32.c\n */\n"
334
            "\n"
335
            "local const z_crc_t FAR crc_table[] = {\n"
336
            "    ");
337
        write_table(out, crc_table, 256);
338
        fprintf(out,
339
            "};\n");
340
341
        /* write out big-endian CRC table for 64-bit z_word_t to crc32.h */
342
        fprintf(out,
343
            "\n"
344
            "#ifdef W\n"
345
            "\n"
346
            "#if W == 8\n"
347
            "\n"
348
            "local const z_word_t FAR crc_big_table[] = {\n"
349
            "    ");
350
        write_table64(out, crc_big_table, 256);
351
        fprintf(out,
352
            "};\n");
353
354
        /* write out big-endian CRC table for 32-bit z_word_t to crc32.h */
355
        fprintf(out,
356
            "\n"
357
            "#else /* W == 4 */\n"
358
            "\n"
359
            "local const z_word_t FAR crc_big_table[] = {\n"
360
            "    ");
361
        write_table32hi(out, crc_big_table, 256);
362
        fprintf(out,
363
            "};\n"
364
            "\n"
365
            "#endif\n");
366
367
        /* write out braid tables for each value of N */
368
        for (n = 1; n <= 6; n++) {
369
            fprintf(out,
370
            "\n"
371
            "#if N == %d\n", n);
372
373
            /* compute braid tables for this N and 64-bit word_t */
374
            braid(ltl, big, n, 8);
375
376
            /* write out braid tables for 64-bit z_word_t to crc32.h */
377
            fprintf(out,
378
            "\n"
379
            "#if W == 8\n"
380
            "\n"
381
            "local const z_crc_t FAR crc_braid_table[][256] = {\n");
382
            for (k = 0; k < 8; k++) {
383
                fprintf(out, "   {");
384
                write_table(out, ltl[k], 256);
385
                fprintf(out, "}%s", k < 7 ? ",\n" : "");
386
            }
387
            fprintf(out,
388
            "};\n"
389
            "\n"
390
            "local const z_word_t FAR crc_braid_big_table[][256] = {\n");
391
            for (k = 0; k < 8; k++) {
392
                fprintf(out, "   {");
393
                write_table64(out, big[k], 256);
394
                fprintf(out, "}%s", k < 7 ? ",\n" : "");
395
            }
396
            fprintf(out,
397
            "};\n");
398
399
            /* compute braid tables for this N and 32-bit word_t */
400
            braid(ltl, big, n, 4);
401
402
            /* write out braid tables for 32-bit z_word_t to crc32.h */
403
            fprintf(out,
404
            "\n"
405
            "#else /* W == 4 */\n"
406
            "\n"
407
            "local const z_crc_t FAR crc_braid_table[][256] = {\n");
408
            for (k = 0; k < 4; k++) {
409
                fprintf(out, "   {");
410
                write_table(out, ltl[k], 256);
411
                fprintf(out, "}%s", k < 3 ? ",\n" : "");
412
            }
413
            fprintf(out,
414
            "};\n"
415
            "\n"
416
            "local const z_word_t FAR crc_braid_big_table[][256] = {\n");
417
            for (k = 0; k < 4; k++) {
418
                fprintf(out, "   {");
419
                write_table32hi(out, big[k], 256);
420
                fprintf(out, "}%s", k < 3 ? ",\n" : "");
421
            }
422
            fprintf(out,
423
            "};\n"
424
            "\n"
425
            "#endif\n"
426
            "\n"
427
            "#endif\n");
428
        }
429
        fprintf(out,
430
            "\n"
431
            "#endif\n");
432
433
        /* write out zeros operator table to crc32.h */
434
        fprintf(out,
435
            "\n"
436
            "local const z_crc_t FAR x2n_table[] = {\n"
437
            "    ");
438
        write_table(out, x2n_table, 32);
439
        fprintf(out,
440
            "};\n");
441
        fclose(out);
442
    }
443
#endif /* MAKECRCH */
444
}
445
446
#ifdef MAKECRCH
447
448
/*
449
   Write the 32-bit values in table[0..k-1] to out, five per line in
450
   hexadecimal separated by commas.
451
 */
452
local void write_table(out, table, k)
453
    FILE *out;
454
    const z_crc_t FAR *table;
455
    int k;
456
{
457
    int n;
458
459
    for (n = 0; n < k; n++)
460
        fprintf(out, "%s0x%08lx%s", n == 0 || n % 5 ? "" : "    ",
461
                (unsigned long)(table[n]),
462
                n == k - 1 ? "" : (n % 5 == 4 ? ",\n" : ", "));
463
}
464
465
/*
466
   Write the high 32-bits of each value in table[0..k-1] to out, five per line
467
   in hexadecimal separated by commas.
468
 */
469
local void write_table32hi(out, table, k)
470
FILE *out;
471
const z_word_t FAR *table;
472
int k;
473
{
474
    int n;
475
476
    for (n = 0; n < k; n++)
477
        fprintf(out, "%s0x%08lx%s", n == 0 || n % 5 ? "" : "    ",
478
                (unsigned long)(table[n] >> 32),
479
                n == k - 1 ? "" : (n % 5 == 4 ? ",\n" : ", "));
480
}
481
482
/*
483
  Write the 64-bit values in table[0..k-1] to out, three per line in
484
  hexadecimal separated by commas. This assumes that if there is a 64-bit
485
  type, then there is also a long long integer type, and it is at least 64
486
  bits. If not, then the type cast and format string can be adjusted
487
  accordingly.
488
 */
489
local void write_table64(out, table, k)
490
    FILE *out;
491
    const z_word_t FAR *table;
492
    int k;
493
{
494
    int n;
495
496
    for (n = 0; n < k; n++)
497
        fprintf(out, "%s0x%016llx%s", n == 0 || n % 3 ? "" : "    ",
498
                (unsigned long long)(table[n]),
499
                n == k - 1 ? "" : (n % 3 == 2 ? ",\n" : ", "));
500
}
501
502
/* Actually do the deed. */
503
int main()
504
{
505
    make_crc_table();
506
    return 0;
507
}
508
509
#endif /* MAKECRCH */
510
511
#ifdef W
512
/*
513
  Generate the little and big-endian braid tables for the given n and z_word_t
514
  size w. Each array must have room for w blocks of 256 elements.
515
 */
516
local void braid(ltl, big, n, w)
517
    z_crc_t ltl[][256];
518
    z_word_t big[][256];
519
    int n;
520
    int w;
521
{
522
    int k;
523
    z_crc_t i, p, q;
524
    for (k = 0; k < w; k++) {
525
        p = x2nmodp((n * w + 3 - k) << 3, 0);
526
        ltl[k][0] = 0;
527
        big[w - 1 - k][0] = 0;
528
        for (i = 1; i < 256; i++) {
529
            ltl[k][i] = q = multmodp(i << 24, p);
530
            big[w - 1 - k][i] = byte_swap(q);
531
        }
532
    }
533
}
534
#endif
535
536
#else /* !DYNAMIC_CRC_TABLE */
537
/* ========================================================================
538
 * Tables for byte-wise and braided CRC-32 calculations, and a table of powers
539
 * of x for combining CRC-32s, all made by make_crc_table().
540
 */
541
#include "crc32.h"
542
#endif /* DYNAMIC_CRC_TABLE */
543
544
/* ========================================================================
545
 * Routines used for CRC calculation. Some are also required for the table
546
 * generation above.
547
 */
548
549
/*
550
  Return a(x) multiplied by b(x) modulo p(x), where p(x) is the CRC polynomial,
551
  reflected. For speed, this requires that a not be zero.
552
 */
553
local z_crc_t multmodp(a, b)
554
    z_crc_t a;
555
    z_crc_t b;
556
803
{
557
803
    z_crc_t m, p;
558
559
803
    m = (z_crc_t)1 << 31;
560
803
    p = 0;
561
19.1k
    for (;;) {
562
19.1k
        if (a & m) {
563
8.65k
            p ^= b;
564
8.65k
            if ((a & (m - 1)) == 0)
565
803
                break;
566
8.65k
        }
567
18.3k
        m >>= 1;
568
18.3k
        b = b & 1 ? (b >> 1) ^ POLY : b >> 1;
569
18.3k
    }
570
803
    return p;
571
803
}
572
573
/*
574
  Return x^(n * 2^k) modulo p(x). Requires that x2n_table[] has been
575
  initialized.
576
 */
577
local z_crc_t x2nmodp(n, k)
578
    z_off64_t n;
579
    unsigned k;
580
159
{
581
159
    z_crc_t p;
582
583
159
    p = (z_crc_t)1 << 31;           /* x^0 == 1 */
584
957
    while (n) {
585
798
        if (n & 1)
586
644
            p = multmodp(x2n_table[k & 31], p);
587
798
        n >>= 1;
588
798
        k++;
589
798
    }
590
159
    return p;
591
159
}
592
593
/* =========================================================================
594
 * This function can be used by asm versions of crc32(), and to force the
595
 * generation of the CRC tables in a threaded application.
596
 */
597
const z_crc_t FAR * ZEXPORT get_crc_table()
598
0
{
599
#ifdef DYNAMIC_CRC_TABLE
600
    once(&made, make_crc_table);
601
#endif /* DYNAMIC_CRC_TABLE */
602
0
    return (const z_crc_t FAR *)crc_table;
603
0
}
604
605
/* =========================================================================
606
 * Use ARM machine instructions if available. This will compute the CRC about
607
 * ten times faster than the braided calculation. This code does not check for
608
 * the presence of the CRC instruction at run time. __ARM_FEATURE_CRC32 will
609
 * only be defined if the compilation specifies an ARM processor architecture
610
 * that has the instructions. For example, compiling with -march=armv8.1-a or
611
 * -march=armv8-a+crc, or -march=native if the compile machine has the crc32
612
 * instructions.
613
 */
614
#ifdef ARMCRC32
615
616
/*
617
   Constants empirically determined to maximize speed. These values are from
618
   measurements on a Cortex-A57. Your mileage may vary.
619
 */
620
#define Z_BATCH 3990                /* number of words in a batch */
621
#define Z_BATCH_ZEROS 0xa10d3d0c    /* computed from Z_BATCH = 3990 */
622
#define Z_BATCH_MIN 800             /* fewest words in a final batch */
623
624
unsigned long ZEXPORT crc32_z(crc, buf, len)
625
    unsigned long crc;
626
    const unsigned char FAR *buf;
627
    z_size_t len;
628
{
629
    z_crc_t val;
630
    z_word_t crc1, crc2;
631
    const z_word_t *word;
632
    z_word_t val0, val1, val2;
633
    z_size_t last, last2, i;
634
    z_size_t num;
635
636
    /* Return initial CRC, if requested. */
637
    if (buf == Z_NULL) return 0;
638
639
#ifdef DYNAMIC_CRC_TABLE
640
    once(&made, make_crc_table);
641
#endif /* DYNAMIC_CRC_TABLE */
642
643
    /* Pre-condition the CRC */
644
    crc = (~crc) & 0xffffffff;
645
646
    /* Compute the CRC up to a word boundary. */
647
    while (len && ((z_size_t)buf & 7) != 0) {
648
        len--;
649
        val = *buf++;
650
        __asm__ volatile("crc32b %w0, %w0, %w1" : "+r"(crc) : "r"(val));
651
    }
652
653
    /* Prepare to compute the CRC on full 64-bit words word[0..num-1]. */
654
    word = (z_word_t const *)(intptr_t)buf;
655
    num = len >> 3;
656
    len &= 7;
657
658
    /* Do three interleaved CRCs to realize the throughput of one crc32x
659
       instruction per cycle. Each CRC is calculated on Z_BATCH words. The
660
       three CRCs are combined into a single CRC after each set of batches. */
661
    while (num >= 3 * Z_BATCH) {
662
        crc1 = 0;
663
        crc2 = 0;
664
        for (i = 0; i < Z_BATCH; i++) {
665
            val0 = word[i];
666
            val1 = word[i + Z_BATCH];
667
            val2 = word[i + 2 * Z_BATCH];
668
            __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc) : "r"(val0));
669
            __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc1) : "r"(val1));
670
            __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc2) : "r"(val2));
671
        }
672
        word += 3 * Z_BATCH;
673
        num -= 3 * Z_BATCH;
674
        crc = multmodp(Z_BATCH_ZEROS, crc) ^ crc1;
675
        crc = multmodp(Z_BATCH_ZEROS, crc) ^ crc2;
676
    }
677
678
    /* Do one last smaller batch with the remaining words, if there are enough
679
       to pay for the combination of CRCs. */
680
    last = num / 3;
681
    if (last >= Z_BATCH_MIN) {
682
        last2 = last << 1;
683
        crc1 = 0;
684
        crc2 = 0;
685
        for (i = 0; i < last; i++) {
686
            val0 = word[i];
687
            val1 = word[i + last];
688
            val2 = word[i + last2];
689
            __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc) : "r"(val0));
690
            __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc1) : "r"(val1));
691
            __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc2) : "r"(val2));
692
        }
693
        word += 3 * last;
694
        num -= 3 * last;
695
        val = x2nmodp(last, 6);
696
        crc = multmodp(val, crc) ^ crc1;
697
        crc = multmodp(val, crc) ^ crc2;
698
    }
699
700
    /* Compute the CRC on any remaining words. */
701
    for (i = 0; i < num; i++) {
702
        val0 = word[i];
703
        __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc) : "r"(val0));
704
    }
705
    word += num;
706
707
    /* Complete the CRC on any remaining bytes. */
708
    buf = (const unsigned char FAR *)word;
709
    while (len) {
710
        len--;
711
        val = *buf++;
712
        __asm__ volatile("crc32b %w0, %w0, %w1" : "+r"(crc) : "r"(val));
713
    }
714
715
    /* Return the CRC, post-conditioned. */
716
    return crc ^ 0xffffffff;
717
}
718
719
#else
720
721
#ifdef W
722
723
/*
724
  Return the CRC of the W bytes in the word_t data, taking the
725
  least-significant byte of the word as the first byte of data, without any pre
726
  or post conditioning. This is used to combine the CRCs of each braid.
727
 */
728
local z_crc_t crc_word OF((z_word_t data));
729
730
local z_crc_t crc_word(data)
731
    z_word_t data;
732
32.6k
{
733
32.6k
    int k;
734
293k
    for (k = 0; k < W; k++)
735
261k
        data = (data >> 8) ^ crc_table[data & 0xff];
736
32.6k
    return (z_crc_t)data;
737
32.6k
}
738
739
local z_word_t crc_word_big OF((z_word_t data));
740
741
local z_word_t crc_word_big(data)
742
    z_word_t data;
743
0
{
744
0
    int k;
745
0
    for (k = 0; k < W; k++)
746
0
        data = (data << 8) ^
747
0
            crc_big_table[(data >> ((W - 1) << 3)) & 0xff];
748
0
    return data;
749
0
}
750
751
#endif
752
753
/* ========================================================================= */
754
unsigned long ZEXPORT crc32_z(crc, buf, len)
755
    unsigned long crc;
756
    const unsigned char FAR *buf;
757
    z_size_t len;
758
4.65M
{
759
    /* Return initial CRC, if requested. */
760
4.65M
    if (buf == Z_NULL) return 0;
761
762
#ifdef DYNAMIC_CRC_TABLE
763
    once(&made, make_crc_table);
764
#endif /* DYNAMIC_CRC_TABLE */
765
766
    /* Pre-condition the CRC */
767
2.35M
    crc = (~crc) & 0xffffffff;
768
769
2.35M
#ifdef W
770
771
    /* If provided enough bytes, do a braided CRC calculation. */
772
2.35M
    if (len >= N * W + W - 1) {
773
6.53k
        z_size_t blks;
774
6.53k
        z_word_t const *words;
775
6.53k
        unsigned endian;
776
6.53k
        int k;
777
778
        /* Compute the CRC up to a z_word_t boundary. */
779
28.1k
        while (len && ((z_size_t)buf & (W - 1)) != 0) {
780
21.5k
            len--;
781
21.5k
            crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
782
21.5k
        }
783
784
        /* Compute the CRC on as many N z_word_t blocks as are available. */
785
6.53k
        blks = len / (N * W);
786
6.53k
        len -= blks * N * W;
787
6.53k
        words = (z_word_t const *)(intptr_t)buf;
788
789
        /* Do endian check at execution time instead of compile time, since ARM
790
           processors can change the endianess at execution time. If the
791
           compiler knows what the endianess will be, it can optimize out the
792
           check and the unused branch. */
793
6.53k
        endian = 1;
794
6.53k
        if (*(unsigned char *)&endian) {
795
            /* Little endian. */
796
797
6.53k
            z_crc_t crc0;
798
6.53k
            z_word_t word0;
799
6.53k
#if N > 1
800
6.53k
            z_crc_t crc1;
801
6.53k
            z_word_t word1;
802
6.53k
#if N > 2
803
6.53k
            z_crc_t crc2;
804
6.53k
            z_word_t word2;
805
6.53k
#if N > 3
806
6.53k
            z_crc_t crc3;
807
6.53k
            z_word_t word3;
808
6.53k
#if N > 4
809
6.53k
            z_crc_t crc4;
810
6.53k
            z_word_t word4;
811
#if N > 5
812
            z_crc_t crc5;
813
            z_word_t word5;
814
#endif
815
6.53k
#endif
816
6.53k
#endif
817
6.53k
#endif
818
6.53k
#endif
819
820
            /* Initialize the CRC for each braid. */
821
6.53k
            crc0 = crc;
822
6.53k
#if N > 1
823
6.53k
            crc1 = 0;
824
6.53k
#if N > 2
825
6.53k
            crc2 = 0;
826
6.53k
#if N > 3
827
6.53k
            crc3 = 0;
828
6.53k
#if N > 4
829
6.53k
            crc4 = 0;
830
#if N > 5
831
            crc5 = 0;
832
#endif
833
6.53k
#endif
834
6.53k
#endif
835
6.53k
#endif
836
6.53k
#endif
837
838
            /*
839
              Process the first blks-1 blocks, computing the CRCs on each braid
840
              independently.
841
             */
842
1.03M
            while (--blks) {
843
                /* Load the word for each braid into registers. */
844
1.02M
                word0 = crc0 ^ words[0];
845
1.02M
#if N > 1
846
1.02M
                word1 = crc1 ^ words[1];
847
1.02M
#if N > 2
848
1.02M
                word2 = crc2 ^ words[2];
849
1.02M
#if N > 3
850
1.02M
                word3 = crc3 ^ words[3];
851
1.02M
#if N > 4
852
1.02M
                word4 = crc4 ^ words[4];
853
#if N > 5
854
                word5 = crc5 ^ words[5];
855
#endif
856
1.02M
#endif
857
1.02M
#endif
858
1.02M
#endif
859
1.02M
#endif
860
1.02M
                words += N;
861
862
                /* Compute and update the CRC for each word. The loop should
863
                   get unrolled. */
864
1.02M
                crc0 = crc_braid_table[0][word0 & 0xff];
865
1.02M
#if N > 1
866
1.02M
                crc1 = crc_braid_table[0][word1 & 0xff];
867
1.02M
#if N > 2
868
1.02M
                crc2 = crc_braid_table[0][word2 & 0xff];
869
1.02M
#if N > 3
870
1.02M
                crc3 = crc_braid_table[0][word3 & 0xff];
871
1.02M
#if N > 4
872
1.02M
                crc4 = crc_braid_table[0][word4 & 0xff];
873
#if N > 5
874
                crc5 = crc_braid_table[0][word5 & 0xff];
875
#endif
876
1.02M
#endif
877
1.02M
#endif
878
1.02M
#endif
879
1.02M
#endif
880
8.21M
                for (k = 1; k < W; k++) {
881
7.18M
                    crc0 ^= crc_braid_table[k][(word0 >> (k << 3)) & 0xff];
882
7.18M
#if N > 1
883
7.18M
                    crc1 ^= crc_braid_table[k][(word1 >> (k << 3)) & 0xff];
884
7.18M
#if N > 2
885
7.18M
                    crc2 ^= crc_braid_table[k][(word2 >> (k << 3)) & 0xff];
886
7.18M
#if N > 3
887
7.18M
                    crc3 ^= crc_braid_table[k][(word3 >> (k << 3)) & 0xff];
888
7.18M
#if N > 4
889
7.18M
                    crc4 ^= crc_braid_table[k][(word4 >> (k << 3)) & 0xff];
890
#if N > 5
891
                    crc5 ^= crc_braid_table[k][(word5 >> (k << 3)) & 0xff];
892
#endif
893
7.18M
#endif
894
7.18M
#endif
895
7.18M
#endif
896
7.18M
#endif
897
7.18M
                }
898
1.02M
            }
899
900
            /*
901
              Process the last block, combining the CRCs of the N braids at the
902
              same time.
903
             */
904
6.53k
            crc = crc_word(crc0 ^ words[0]);
905
6.53k
#if N > 1
906
6.53k
            crc = crc_word(crc1 ^ words[1] ^ crc);
907
6.53k
#if N > 2
908
6.53k
            crc = crc_word(crc2 ^ words[2] ^ crc);
909
6.53k
#if N > 3
910
6.53k
            crc = crc_word(crc3 ^ words[3] ^ crc);
911
6.53k
#if N > 4
912
6.53k
            crc = crc_word(crc4 ^ words[4] ^ crc);
913
#if N > 5
914
            crc = crc_word(crc5 ^ words[5] ^ crc);
915
#endif
916
6.53k
#endif
917
6.53k
#endif
918
6.53k
#endif
919
6.53k
#endif
920
6.53k
            words += N;
921
6.53k
        }
922
0
        else {
923
            /* Big endian. */
924
925
0
            z_word_t crc0, word0, comb;
926
0
#if N > 1
927
0
            z_word_t crc1, word1;
928
0
#if N > 2
929
0
            z_word_t crc2, word2;
930
0
#if N > 3
931
0
            z_word_t crc3, word3;
932
0
#if N > 4
933
0
            z_word_t crc4, word4;
934
#if N > 5
935
            z_word_t crc5, word5;
936
#endif
937
0
#endif
938
0
#endif
939
0
#endif
940
0
#endif
941
942
            /* Initialize the CRC for each braid. */
943
0
            crc0 = byte_swap(crc);
944
0
#if N > 1
945
0
            crc1 = 0;
946
0
#if N > 2
947
0
            crc2 = 0;
948
0
#if N > 3
949
0
            crc3 = 0;
950
0
#if N > 4
951
0
            crc4 = 0;
952
#if N > 5
953
            crc5 = 0;
954
#endif
955
0
#endif
956
0
#endif
957
0
#endif
958
0
#endif
959
960
            /*
961
              Process the first blks-1 blocks, computing the CRCs on each braid
962
              independently.
963
             */
964
0
            while (--blks) {
965
                /* Load the word for each braid into registers. */
966
0
                word0 = crc0 ^ words[0];
967
0
#if N > 1
968
0
                word1 = crc1 ^ words[1];
969
0
#if N > 2
970
0
                word2 = crc2 ^ words[2];
971
0
#if N > 3
972
0
                word3 = crc3 ^ words[3];
973
0
#if N > 4
974
0
                word4 = crc4 ^ words[4];
975
#if N > 5
976
                word5 = crc5 ^ words[5];
977
#endif
978
0
#endif
979
0
#endif
980
0
#endif
981
0
#endif
982
0
                words += N;
983
984
                /* Compute and update the CRC for each word. The loop should
985
                   get unrolled. */
986
0
                crc0 = crc_braid_big_table[0][word0 & 0xff];
987
0
#if N > 1
988
0
                crc1 = crc_braid_big_table[0][word1 & 0xff];
989
0
#if N > 2
990
0
                crc2 = crc_braid_big_table[0][word2 & 0xff];
991
0
#if N > 3
992
0
                crc3 = crc_braid_big_table[0][word3 & 0xff];
993
0
#if N > 4
994
0
                crc4 = crc_braid_big_table[0][word4 & 0xff];
995
#if N > 5
996
                crc5 = crc_braid_big_table[0][word5 & 0xff];
997
#endif
998
0
#endif
999
0
#endif
1000
0
#endif
1001
0
#endif
1002
0
                for (k = 1; k < W; k++) {
1003
0
                    crc0 ^= crc_braid_big_table[k][(word0 >> (k << 3)) & 0xff];
1004
0
#if N > 1
1005
0
                    crc1 ^= crc_braid_big_table[k][(word1 >> (k << 3)) & 0xff];
1006
0
#if N > 2
1007
0
                    crc2 ^= crc_braid_big_table[k][(word2 >> (k << 3)) & 0xff];
1008
0
#if N > 3
1009
0
                    crc3 ^= crc_braid_big_table[k][(word3 >> (k << 3)) & 0xff];
1010
0
#if N > 4
1011
0
                    crc4 ^= crc_braid_big_table[k][(word4 >> (k << 3)) & 0xff];
1012
#if N > 5
1013
                    crc5 ^= crc_braid_big_table[k][(word5 >> (k << 3)) & 0xff];
1014
#endif
1015
0
#endif
1016
0
#endif
1017
0
#endif
1018
0
#endif
1019
0
                }
1020
0
            }
1021
1022
            /*
1023
              Process the last block, combining the CRCs of the N braids at the
1024
              same time.
1025
             */
1026
0
            comb = crc_word_big(crc0 ^ words[0]);
1027
0
#if N > 1
1028
0
            comb = crc_word_big(crc1 ^ words[1] ^ comb);
1029
0
#if N > 2
1030
0
            comb = crc_word_big(crc2 ^ words[2] ^ comb);
1031
0
#if N > 3
1032
0
            comb = crc_word_big(crc3 ^ words[3] ^ comb);
1033
0
#if N > 4
1034
0
            comb = crc_word_big(crc4 ^ words[4] ^ comb);
1035
#if N > 5
1036
            comb = crc_word_big(crc5 ^ words[5] ^ comb);
1037
#endif
1038
0
#endif
1039
0
#endif
1040
0
#endif
1041
0
#endif
1042
0
            words += N;
1043
0
            crc = byte_swap(comb);
1044
0
        }
1045
1046
        /*
1047
          Update the pointer to the remaining bytes to process.
1048
         */
1049
6.53k
        buf = (unsigned char const *)words;
1050
6.53k
    }
1051
1052
2.35M
#endif /* W */
1053
1054
    /* Complete the computation of the CRC on any remaining bytes. */
1055
2.39M
    while (len >= 8) {
1056
48.7k
        len -= 8;
1057
48.7k
        crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1058
48.7k
        crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1059
48.7k
        crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1060
48.7k
        crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1061
48.7k
        crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1062
48.7k
        crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1063
48.7k
        crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1064
48.7k
        crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1065
48.7k
    }
1066
9.39M
    while (len) {
1067
7.04M
        len--;
1068
7.04M
        crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1069
7.04M
    }
1070
1071
    /* Return the CRC, post-conditioned. */
1072
2.35M
    return crc ^ 0xffffffff;
1073
4.65M
}
1074
1075
#endif
1076
1077
/* ========================================================================= */
1078
unsigned long ZEXPORT crc32(crc, buf, len)
1079
    unsigned long crc;
1080
    const unsigned char FAR *buf;
1081
    uInt len;
1082
4.65M
{
1083
4.65M
    return crc32_z(crc, buf, len);
1084
4.65M
}
1085
1086
/* ========================================================================= */
1087
uLong ZEXPORT crc32_combine64(crc1, crc2, len2)
1088
    uLong crc1;
1089
    uLong crc2;
1090
    z_off64_t len2;
1091
159
{
1092
#ifdef DYNAMIC_CRC_TABLE
1093
    once(&made, make_crc_table);
1094
#endif /* DYNAMIC_CRC_TABLE */
1095
159
    return multmodp(x2nmodp(len2, 3), crc1) ^ (crc2 & 0xffffffff);
1096
159
}
1097
1098
/* ========================================================================= */
1099
uLong ZEXPORT crc32_combine(crc1, crc2, len2)
1100
    uLong crc1;
1101
    uLong crc2;
1102
    z_off_t len2;
1103
159
{
1104
159
    return crc32_combine64(crc1, crc2, (z_off64_t)len2);
1105
159
}
1106
1107
/* ========================================================================= */
1108
uLong ZEXPORT crc32_combine_gen64(len2)
1109
    z_off64_t len2;
1110
0
{
1111
#ifdef DYNAMIC_CRC_TABLE
1112
    once(&made, make_crc_table);
1113
#endif /* DYNAMIC_CRC_TABLE */
1114
0
    return x2nmodp(len2, 3);
1115
0
}
1116
1117
/* ========================================================================= */
1118
uLong ZEXPORT crc32_combine_gen(len2)
1119
    z_off_t len2;
1120
0
{
1121
0
    return crc32_combine_gen64((z_off64_t)len2);
1122
0
}
1123
1124
/* ========================================================================= */
1125
uLong ZEXPORT crc32_combine_op(crc1, crc2, op)
1126
    uLong crc1;
1127
    uLong crc2;
1128
    uLong op;
1129
0
{
1130
0
    return multmodp(op, crc1) ^ (crc2 & 0xffffffff);
1131
0
}