Coverage Report

Created: 2023-06-07 06:31

/src/libgit2/deps/zlib/crc32.c
Line
Count
Source (jump to first uncovered line)
1
/* crc32.c -- compute the CRC-32 of a data stream
2
 * Copyright (C) 1995-2022 Mark Adler
3
 * For conditions of distribution and use, see copyright notice in zlib.h
4
 *
5
 * This interleaved implementation of a CRC makes use of pipelined multiple
6
 * arithmetic-logic units, commonly found in modern CPU cores. It is due to
7
 * Kadatch and Jenkins (2010). See doc/crc-doc.1.0.pdf in this distribution.
8
 */
9
10
/* @(#) $Id$ */
11
12
/*
13
  Note on the use of DYNAMIC_CRC_TABLE: there is no mutex or semaphore
14
  protection on the static variables used to control the first-use generation
15
  of the crc tables. Therefore, if you #define DYNAMIC_CRC_TABLE, you should
16
  first call get_crc_table() to initialize the tables before allowing more than
17
  one thread to use crc32().
18
19
  MAKECRCH can be #defined to write out crc32.h. A main() routine is also
20
  produced, so that this one source file can be compiled to an executable.
21
 */
22
23
#ifdef MAKECRCH
24
#  include <stdio.h>
25
#  ifndef DYNAMIC_CRC_TABLE
26
#    define DYNAMIC_CRC_TABLE
27
#  endif /* !DYNAMIC_CRC_TABLE */
28
#endif /* MAKECRCH */
29
30
#include "zutil.h"      /* for Z_U4, Z_U8, z_crc_t, and FAR definitions */
31
32
 /*
33
  A CRC of a message is computed on N braids of words in the message, where
34
  each word consists of W bytes (4 or 8). If N is 3, for example, then three
35
  running sparse CRCs are calculated respectively on each braid, at these
36
  indices in the array of words: 0, 3, 6, ..., 1, 4, 7, ..., and 2, 5, 8, ...
37
  This is done starting at a word boundary, and continues until as many blocks
38
  of N * W bytes as are available have been processed. The results are combined
39
  into a single CRC at the end. For this code, N must be in the range 1..6 and
40
  W must be 4 or 8. The upper limit on N can be increased if desired by adding
41
  more #if blocks, extending the patterns apparent in the code. In addition,
42
  crc32.h would need to be regenerated, if the maximum N value is increased.
43
44
  N and W are chosen empirically by benchmarking the execution time on a given
45
  processor. The choices for N and W below were based on testing on Intel Kaby
46
  Lake i7, AMD Ryzen 7, ARM Cortex-A57, Sparc64-VII, PowerPC POWER9, and MIPS64
47
  Octeon II processors. The Intel, AMD, and ARM processors were all fastest
48
  with N=5, W=8. The Sparc, PowerPC, and MIPS64 were all fastest at N=5, W=4.
49
  They were all tested with either gcc or clang, all using the -O3 optimization
50
  level. Your mileage may vary.
51
 */
52
53
/* Define N */
54
#ifdef Z_TESTN
55
#  define N Z_TESTN
56
#else
57
0
#  define N 5
58
#endif
59
#if N < 1 || N > 6
60
#  error N must be in 1..6
61
#endif
62
63
/*
64
  z_crc_t must be at least 32 bits. z_word_t must be at least as long as
65
  z_crc_t. It is assumed here that z_word_t is either 32 bits or 64 bits, and
66
  that bytes are eight bits.
67
 */
68
69
/*
70
  Define W and the associated z_word_t type. If W is not defined, then a
71
  braided calculation is not used, and the associated tables and code are not
72
  compiled.
73
 */
74
#ifdef Z_TESTW
75
#  if Z_TESTW-1 != -1
76
#    define W Z_TESTW
77
#  endif
78
#else
79
#  ifdef MAKECRCH
80
#    define W 8         /* required for MAKECRCH */
81
#  else
82
#    if defined(__x86_64__) || defined(__aarch64__)
83
0
#      define W 8
84
#    else
85
#      define W 4
86
#    endif
87
#  endif
88
#endif
89
#ifdef W
90
#  if W == 8 && defined(Z_U8)
91
     typedef Z_U8 z_word_t;
92
#  elif defined(Z_U4)
93
#    undef W
94
#    define W 4
95
     typedef Z_U4 z_word_t;
96
#  else
97
#    undef W
98
#  endif
99
#endif
100
101
/* Local functions. */
102
local z_crc_t multmodp OF((z_crc_t a, z_crc_t b));
103
local z_crc_t x2nmodp OF((z_off64_t n, unsigned k));
104
105
/* If available, use the ARM processor CRC32 instruction. */
106
#if defined(__aarch64__) && defined(__ARM_FEATURE_CRC32) && W == 8
107
#  define ARMCRC32
108
#endif
109
110
#if defined(W) && (!defined(ARMCRC32) || defined(DYNAMIC_CRC_TABLE))
111
/*
112
  Swap the bytes in a z_word_t to convert between little and big endian. Any
113
  self-respecting compiler will optimize this to a single machine byte-swap
114
  instruction, if one is available. This assumes that word_t is either 32 bits
115
  or 64 bits.
116
 */
117
118
local z_word_t byte_swap(z_word_t word);
119
120
local z_word_t byte_swap(word)
121
    z_word_t word;
122
0
{
123
0
#  if W == 8
124
0
    return
125
0
        (word & 0xff00000000000000) >> 56 |
126
0
        (word & 0xff000000000000) >> 40 |
127
0
        (word & 0xff0000000000) >> 24 |
128
0
        (word & 0xff00000000) >> 8 |
129
0
        (word & 0xff000000) << 8 |
130
0
        (word & 0xff0000) << 24 |
131
0
        (word & 0xff00) << 40 |
132
0
        (word & 0xff) << 56;
133
#  else   /* W == 4 */
134
    return
135
        (word & 0xff000000) >> 24 |
136
        (word & 0xff0000) >> 8 |
137
        (word & 0xff00) << 8 |
138
        (word & 0xff) << 24;
139
#  endif
140
0
}
141
#endif
142
143
/* CRC polynomial. */
144
0
#define POLY 0xedb88320         /* p(x) reflected, with x^32 implied */
145
146
#ifdef DYNAMIC_CRC_TABLE
147
148
local z_crc_t FAR crc_table[256];
149
local z_crc_t FAR x2n_table[32];
150
local void make_crc_table OF((void));
151
#ifdef W
152
   local z_word_t FAR crc_big_table[256];
153
   local z_crc_t FAR crc_braid_table[W][256];
154
   local z_word_t FAR crc_braid_big_table[W][256];
155
   local void braid OF((z_crc_t [][256], z_word_t [][256], int, int));
156
#endif
157
#ifdef MAKECRCH
158
   local void write_table OF((FILE *, const z_crc_t FAR *, int));
159
   local void write_table32hi OF((FILE *, const z_word_t FAR *, int));
160
   local void write_table64 OF((FILE *, const z_word_t FAR *, int));
161
#endif /* MAKECRCH */
162
163
/*
164
  Define a once() function depending on the availability of atomics. If this is
165
  compiled with DYNAMIC_CRC_TABLE defined, and if CRCs will be computed in
166
  multiple threads, and if atomics are not available, then get_crc_table() must
167
  be called to initialize the tables and must return before any threads are
168
  allowed to compute or combine CRCs.
169
 */
170
171
/* Definition of once functionality. */
172
typedef struct once_s once_t;
173
local void once OF((once_t *, void (*)(void)));
174
175
/* Check for the availability of atomics. */
176
#if defined(__STDC__) && __STDC_VERSION__ >= 201112L && \
177
    !defined(__STDC_NO_ATOMICS__)
178
179
#include <stdatomic.h>
180
181
/* Structure for once(), which must be initialized with ONCE_INIT. */
182
struct once_s {
183
    atomic_flag begun;
184
    atomic_int done;
185
};
186
#define ONCE_INIT {ATOMIC_FLAG_INIT, 0}
187
188
/*
189
  Run the provided init() function exactly once, even if multiple threads
190
  invoke once() at the same time. The state must be a once_t initialized with
191
  ONCE_INIT.
192
 */
193
local void once(state, init)
194
    once_t *state;
195
    void (*init)(void);
196
{
197
    if (!atomic_load(&state->done)) {
198
        if (atomic_flag_test_and_set(&state->begun))
199
            while (!atomic_load(&state->done))
200
                ;
201
        else {
202
            init();
203
            atomic_store(&state->done, 1);
204
        }
205
    }
206
}
207
208
#else   /* no atomics */
209
210
/* Structure for once(), which must be initialized with ONCE_INIT. */
211
struct once_s {
212
    volatile int begun;
213
    volatile int done;
214
};
215
#define ONCE_INIT {0, 0}
216
217
/* Test and set. Alas, not atomic, but tries to minimize the period of
218
   vulnerability. */
219
local int test_and_set OF((int volatile *));
220
local int test_and_set(flag)
221
    int volatile *flag;
222
{
223
    int was;
224
225
    was = *flag;
226
    *flag = 1;
227
    return was;
228
}
229
230
/* Run the provided init() function once. This is not thread-safe. */
231
local void once(state, init)
232
    once_t *state;
233
    void (*init)(void);
234
{
235
    if (!state->done) {
236
        if (test_and_set(&state->begun))
237
            while (!state->done)
238
                ;
239
        else {
240
            init();
241
            state->done = 1;
242
        }
243
    }
244
}
245
246
#endif
247
248
/* State for once(). */
249
local once_t made = ONCE_INIT;
250
251
/*
252
  Generate tables for a byte-wise 32-bit CRC calculation on the polynomial:
253
  x^32+x^26+x^23+x^22+x^16+x^12+x^11+x^10+x^8+x^7+x^5+x^4+x^2+x+1.
254
255
  Polynomials over GF(2) are represented in binary, one bit per coefficient,
256
  with the lowest powers in the most significant bit. Then adding polynomials
257
  is just exclusive-or, and multiplying a polynomial by x is a right shift by
258
  one. If we call the above polynomial p, and represent a byte as the
259
  polynomial q, also with the lowest power in the most significant bit (so the
260
  byte 0xb1 is the polynomial x^7+x^3+x^2+1), then the CRC is (q*x^32) mod p,
261
  where a mod b means the remainder after dividing a by b.
262
263
  This calculation is done using the shift-register method of multiplying and
264
  taking the remainder. The register is initialized to zero, and for each
265
  incoming bit, x^32 is added mod p to the register if the bit is a one (where
266
  x^32 mod p is p+x^32 = x^26+...+1), and the register is multiplied mod p by x
267
  (which is shifting right by one and adding x^32 mod p if the bit shifted out
268
  is a one). We start with the highest power (least significant bit) of q and
269
  repeat for all eight bits of q.
270
271
  The table is simply the CRC of all possible eight bit values. This is all the
272
  information needed to generate CRCs on data a byte at a time for all
273
  combinations of CRC register values and incoming bytes.
274
 */
275
276
local void make_crc_table()
277
{
278
    unsigned i, j, n;
279
    z_crc_t p;
280
281
    /* initialize the CRC of bytes tables */
282
    for (i = 0; i < 256; i++) {
283
        p = i;
284
        for (j = 0; j < 8; j++)
285
            p = p & 1 ? (p >> 1) ^ POLY : p >> 1;
286
        crc_table[i] = p;
287
#ifdef W
288
        crc_big_table[i] = byte_swap(p);
289
#endif
290
    }
291
292
    /* initialize the x^2^n mod p(x) table */
293
    p = (z_crc_t)1 << 30;         /* x^1 */
294
    x2n_table[0] = p;
295
    for (n = 1; n < 32; n++)
296
        x2n_table[n] = p = multmodp(p, p);
297
298
#ifdef W
299
    /* initialize the braiding tables -- needs x2n_table[] */
300
    braid(crc_braid_table, crc_braid_big_table, N, W);
301
#endif
302
303
#ifdef MAKECRCH
304
    {
305
        /*
306
          The crc32.h header file contains tables for both 32-bit and 64-bit
307
          z_word_t's, and so requires a 64-bit type be available. In that case,
308
          z_word_t must be defined to be 64-bits. This code then also generates
309
          and writes out the tables for the case that z_word_t is 32 bits.
310
         */
311
#if !defined(W) || W != 8
312
#  error Need a 64-bit integer type in order to generate crc32.h.
313
#endif
314
        FILE *out;
315
        int k, n;
316
        z_crc_t ltl[8][256];
317
        z_word_t big[8][256];
318
319
        out = fopen("crc32.h", "w");
320
        if (out == NULL) return;
321
322
        /* write out little-endian CRC table to crc32.h */
323
        fprintf(out,
324
            "/* crc32.h -- tables for rapid CRC calculation\n"
325
            " * Generated automatically by crc32.c\n */\n"
326
            "\n"
327
            "local const z_crc_t FAR crc_table[] = {\n"
328
            "    ");
329
        write_table(out, crc_table, 256);
330
        fprintf(out,
331
            "};\n");
332
333
        /* write out big-endian CRC table for 64-bit z_word_t to crc32.h */
334
        fprintf(out,
335
            "\n"
336
            "#ifdef W\n"
337
            "\n"
338
            "#if W == 8\n"
339
            "\n"
340
            "local const z_word_t FAR crc_big_table[] = {\n"
341
            "    ");
342
        write_table64(out, crc_big_table, 256);
343
        fprintf(out,
344
            "};\n");
345
346
        /* write out big-endian CRC table for 32-bit z_word_t to crc32.h */
347
        fprintf(out,
348
            "\n"
349
            "#else /* W == 4 */\n"
350
            "\n"
351
            "local const z_word_t FAR crc_big_table[] = {\n"
352
            "    ");
353
        write_table32hi(out, crc_big_table, 256);
354
        fprintf(out,
355
            "};\n"
356
            "\n"
357
            "#endif\n");
358
359
        /* write out braid tables for each value of N */
360
        for (n = 1; n <= 6; n++) {
361
            fprintf(out,
362
            "\n"
363
            "#if N == %d\n", n);
364
365
            /* compute braid tables for this N and 64-bit word_t */
366
            braid(ltl, big, n, 8);
367
368
            /* write out braid tables for 64-bit z_word_t to crc32.h */
369
            fprintf(out,
370
            "\n"
371
            "#if W == 8\n"
372
            "\n"
373
            "local const z_crc_t FAR crc_braid_table[][256] = {\n");
374
            for (k = 0; k < 8; k++) {
375
                fprintf(out, "   {");
376
                write_table(out, ltl[k], 256);
377
                fprintf(out, "}%s", k < 7 ? ",\n" : "");
378
            }
379
            fprintf(out,
380
            "};\n"
381
            "\n"
382
            "local const z_word_t FAR crc_braid_big_table[][256] = {\n");
383
            for (k = 0; k < 8; k++) {
384
                fprintf(out, "   {");
385
                write_table64(out, big[k], 256);
386
                fprintf(out, "}%s", k < 7 ? ",\n" : "");
387
            }
388
            fprintf(out,
389
            "};\n");
390
391
            /* compute braid tables for this N and 32-bit word_t */
392
            braid(ltl, big, n, 4);
393
394
            /* write out braid tables for 32-bit z_word_t to crc32.h */
395
            fprintf(out,
396
            "\n"
397
            "#else /* W == 4 */\n"
398
            "\n"
399
            "local const z_crc_t FAR crc_braid_table[][256] = {\n");
400
            for (k = 0; k < 4; k++) {
401
                fprintf(out, "   {");
402
                write_table(out, ltl[k], 256);
403
                fprintf(out, "}%s", k < 3 ? ",\n" : "");
404
            }
405
            fprintf(out,
406
            "};\n"
407
            "\n"
408
            "local const z_word_t FAR crc_braid_big_table[][256] = {\n");
409
            for (k = 0; k < 4; k++) {
410
                fprintf(out, "   {");
411
                write_table32hi(out, big[k], 256);
412
                fprintf(out, "}%s", k < 3 ? ",\n" : "");
413
            }
414
            fprintf(out,
415
            "};\n"
416
            "\n"
417
            "#endif\n"
418
            "\n"
419
            "#endif\n");
420
        }
421
        fprintf(out,
422
            "\n"
423
            "#endif\n");
424
425
        /* write out zeros operator table to crc32.h */
426
        fprintf(out,
427
            "\n"
428
            "local const z_crc_t FAR x2n_table[] = {\n"
429
            "    ");
430
        write_table(out, x2n_table, 32);
431
        fprintf(out,
432
            "};\n");
433
        fclose(out);
434
    }
435
#endif /* MAKECRCH */
436
}
437
438
#ifdef MAKECRCH
439
440
/*
441
   Write the 32-bit values in table[0..k-1] to out, five per line in
442
   hexadecimal separated by commas.
443
 */
444
local void write_table(out, table, k)
445
    FILE *out;
446
    const z_crc_t FAR *table;
447
    int k;
448
{
449
    int n;
450
451
    for (n = 0; n < k; n++)
452
        fprintf(out, "%s0x%08lx%s", n == 0 || n % 5 ? "" : "    ",
453
                (unsigned long)(table[n]),
454
                n == k - 1 ? "" : (n % 5 == 4 ? ",\n" : ", "));
455
}
456
457
/*
458
   Write the high 32-bits of each value in table[0..k-1] to out, five per line
459
   in hexadecimal separated by commas.
460
 */
461
local void write_table32hi(out, table, k)
462
FILE *out;
463
const z_word_t FAR *table;
464
int k;
465
{
466
    int n;
467
468
    for (n = 0; n < k; n++)
469
        fprintf(out, "%s0x%08lx%s", n == 0 || n % 5 ? "" : "    ",
470
                (unsigned long)(table[n] >> 32),
471
                n == k - 1 ? "" : (n % 5 == 4 ? ",\n" : ", "));
472
}
473
474
/*
475
  Write the 64-bit values in table[0..k-1] to out, three per line in
476
  hexadecimal separated by commas. This assumes that if there is a 64-bit
477
  type, then there is also a long long integer type, and it is at least 64
478
  bits. If not, then the type cast and format string can be adjusted
479
  accordingly.
480
 */
481
local void write_table64(out, table, k)
482
    FILE *out;
483
    const z_word_t FAR *table;
484
    int k;
485
{
486
    int n;
487
488
    for (n = 0; n < k; n++)
489
        fprintf(out, "%s0x%016llx%s", n == 0 || n % 3 ? "" : "    ",
490
                (unsigned long long)(table[n]),
491
                n == k - 1 ? "" : (n % 3 == 2 ? ",\n" : ", "));
492
}
493
494
/* Actually do the deed. */
495
int main()
496
{
497
    make_crc_table();
498
    return 0;
499
}
500
501
#endif /* MAKECRCH */
502
503
#ifdef W
504
/*
505
  Generate the little and big-endian braid tables for the given n and z_word_t
506
  size w. Each array must have room for w blocks of 256 elements.
507
 */
508
local void braid(ltl, big, n, w)
509
    z_crc_t ltl[][256];
510
    z_word_t big[][256];
511
    int n;
512
    int w;
513
{
514
    int k;
515
    z_crc_t i, p, q;
516
    for (k = 0; k < w; k++) {
517
        p = x2nmodp((n * w + 3 - k) << 3, 0);
518
        ltl[k][0] = 0;
519
        big[w - 1 - k][0] = 0;
520
        for (i = 1; i < 256; i++) {
521
            ltl[k][i] = q = multmodp(i << 24, p);
522
            big[w - 1 - k][i] = byte_swap(q);
523
        }
524
    }
525
}
526
#endif
527
528
#else /* !DYNAMIC_CRC_TABLE */
529
/* ========================================================================
530
 * Tables for byte-wise and braided CRC-32 calculations, and a table of powers
531
 * of x for combining CRC-32s, all made by make_crc_table().
532
 */
533
#include "crc32.h"
534
#endif /* DYNAMIC_CRC_TABLE */
535
536
/* ========================================================================
537
 * Routines used for CRC calculation. Some are also required for the table
538
 * generation above.
539
 */
540
541
/*
542
  Return a(x) multiplied by b(x) modulo p(x), where p(x) is the CRC polynomial,
543
  reflected. For speed, this requires that a not be zero.
544
 */
545
local z_crc_t multmodp(a, b)
546
    z_crc_t a;
547
    z_crc_t b;
548
0
{
549
0
    z_crc_t m, p;
550
551
0
    m = (z_crc_t)1 << 31;
552
0
    p = 0;
553
0
    for (;;) {
554
0
        if (a & m) {
555
0
            p ^= b;
556
0
            if ((a & (m - 1)) == 0)
557
0
                break;
558
0
        }
559
0
        m >>= 1;
560
0
        b = b & 1 ? (b >> 1) ^ POLY : b >> 1;
561
0
    }
562
0
    return p;
563
0
}
564
565
/*
566
  Return x^(n * 2^k) modulo p(x). Requires that x2n_table[] has been
567
  initialized.
568
 */
569
local z_crc_t x2nmodp(n, k)
570
    z_off64_t n;
571
    unsigned k;
572
0
{
573
0
    z_crc_t p;
574
575
0
    p = (z_crc_t)1 << 31;           /* x^0 == 1 */
576
0
    while (n) {
577
0
        if (n & 1)
578
0
            p = multmodp(x2n_table[k & 31], p);
579
0
        n >>= 1;
580
0
        k++;
581
0
    }
582
0
    return p;
583
0
}
584
585
/* =========================================================================
586
 * This function can be used by asm versions of crc32(), and to force the
587
 * generation of the CRC tables in a threaded application.
588
 */
589
const z_crc_t FAR * ZEXPORT get_crc_table()
590
0
{
591
#ifdef DYNAMIC_CRC_TABLE
592
    once(&made, make_crc_table);
593
#endif /* DYNAMIC_CRC_TABLE */
594
0
    return (const z_crc_t FAR *)crc_table;
595
0
}
596
597
/* =========================================================================
598
 * Use ARM machine instructions if available. This will compute the CRC about
599
 * ten times faster than the braided calculation. This code does not check for
600
 * the presence of the CRC instruction at run time. __ARM_FEATURE_CRC32 will
601
 * only be defined if the compilation specifies an ARM processor architecture
602
 * that has the instructions. For example, compiling with -march=armv8.1-a or
603
 * -march=armv8-a+crc, or -march=native if the compile machine has the crc32
604
 * instructions.
605
 */
606
#ifdef ARMCRC32
607
608
/*
609
   Constants empirically determined to maximize speed. These values are from
610
   measurements on a Cortex-A57. Your mileage may vary.
611
 */
612
#define Z_BATCH 3990                /* number of words in a batch */
613
#define Z_BATCH_ZEROS 0xa10d3d0c    /* computed from Z_BATCH = 3990 */
614
#define Z_BATCH_MIN 800             /* fewest words in a final batch */
615
616
unsigned long ZEXPORT crc32_z(crc, buf, len)
617
    unsigned long crc;
618
    const unsigned char FAR *buf;
619
    z_size_t len;
620
{
621
    z_crc_t val;
622
    z_word_t crc1, crc2;
623
    const z_word_t *word;
624
    z_word_t val0, val1, val2;
625
    z_size_t last, last2, i;
626
    z_size_t num;
627
628
    /* Return initial CRC, if requested. */
629
    if (buf == Z_NULL) return 0;
630
631
#ifdef DYNAMIC_CRC_TABLE
632
    once(&made, make_crc_table);
633
#endif /* DYNAMIC_CRC_TABLE */
634
635
    /* Pre-condition the CRC */
636
    crc ^= 0xffffffff;
637
638
    /* Compute the CRC up to a word boundary. */
639
    while (len && ((z_size_t)buf & 7) != 0) {
640
        len--;
641
        val = *buf++;
642
        __asm__ volatile("crc32b %w0, %w0, %w1" : "+r"(crc) : "r"(val));
643
    }
644
645
    /* Prepare to compute the CRC on full 64-bit words word[0..num-1]. */
646
    word = (z_word_t const *)buf;
647
    num = len >> 3;
648
    len &= 7;
649
650
    /* Do three interleaved CRCs to realize the throughput of one crc32x
651
       instruction per cycle. Each CRC is calcuated on Z_BATCH words. The three
652
       CRCs are combined into a single CRC after each set of batches. */
653
    while (num >= 3 * Z_BATCH) {
654
        crc1 = 0;
655
        crc2 = 0;
656
        for (i = 0; i < Z_BATCH; i++) {
657
            val0 = word[i];
658
            val1 = word[i + Z_BATCH];
659
            val2 = word[i + 2 * Z_BATCH];
660
            __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc) : "r"(val0));
661
            __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc1) : "r"(val1));
662
            __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc2) : "r"(val2));
663
        }
664
        word += 3 * Z_BATCH;
665
        num -= 3 * Z_BATCH;
666
        crc = multmodp(Z_BATCH_ZEROS, crc) ^ crc1;
667
        crc = multmodp(Z_BATCH_ZEROS, crc) ^ crc2;
668
    }
669
670
    /* Do one last smaller batch with the remaining words, if there are enough
671
       to pay for the combination of CRCs. */
672
    last = num / 3;
673
    if (last >= Z_BATCH_MIN) {
674
        last2 = last << 1;
675
        crc1 = 0;
676
        crc2 = 0;
677
        for (i = 0; i < last; i++) {
678
            val0 = word[i];
679
            val1 = word[i + last];
680
            val2 = word[i + last2];
681
            __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc) : "r"(val0));
682
            __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc1) : "r"(val1));
683
            __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc2) : "r"(val2));
684
        }
685
        word += 3 * last;
686
        num -= 3 * last;
687
        val = x2nmodp(last, 6);
688
        crc = multmodp(val, crc) ^ crc1;
689
        crc = multmodp(val, crc) ^ crc2;
690
    }
691
692
    /* Compute the CRC on any remaining words. */
693
    for (i = 0; i < num; i++) {
694
        val0 = word[i];
695
        __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc) : "r"(val0));
696
    }
697
    word += num;
698
699
    /* Complete the CRC on any remaining bytes. */
700
    buf = (const unsigned char FAR *)word;
701
    while (len) {
702
        len--;
703
        val = *buf++;
704
        __asm__ volatile("crc32b %w0, %w0, %w1" : "+r"(crc) : "r"(val));
705
    }
706
707
    /* Return the CRC, post-conditioned. */
708
    return crc ^ 0xffffffff;
709
}
710
711
#else
712
713
#ifdef W
714
715
local z_crc_t crc_word(z_word_t data);
716
local z_word_t crc_word_big(z_word_t data);
717
718
/*
719
  Return the CRC of the W bytes in the word_t data, taking the
720
  least-significant byte of the word as the first byte of data, without any pre
721
  or post conditioning. This is used to combine the CRCs of each braid.
722
 */
723
local z_crc_t crc_word(data)
724
    z_word_t data;
725
0
{
726
0
    int k;
727
0
    for (k = 0; k < W; k++)
728
0
        data = (data >> 8) ^ crc_table[data & 0xff];
729
0
    return (z_crc_t)data;
730
0
}
731
732
local z_word_t crc_word_big(data)
733
    z_word_t data;
734
0
{
735
0
    int k;
736
0
    for (k = 0; k < W; k++)
737
0
        data = (data << 8) ^
738
0
            crc_big_table[(data >> ((W - 1) << 3)) & 0xff];
739
0
    return data;
740
0
}
741
742
#endif
743
744
/* ========================================================================= */
745
unsigned long ZEXPORT crc32_z(crc, buf, len)
746
    unsigned long crc;
747
    const unsigned char FAR *buf;
748
    z_size_t len;
749
0
{
750
    /* Return initial CRC, if requested. */
751
0
    if (buf == Z_NULL) return 0;
752
753
#ifdef DYNAMIC_CRC_TABLE
754
    once(&made, make_crc_table);
755
#endif /* DYNAMIC_CRC_TABLE */
756
757
    /* Pre-condition the CRC */
758
0
    crc ^= 0xffffffff;
759
760
0
#ifdef W
761
762
    /* If provided enough bytes, do a braided CRC calculation. */
763
0
    if (len >= N * W + W - 1) {
764
0
        z_size_t blks;
765
0
        z_word_t const *words;
766
0
        unsigned endian;
767
0
        int k;
768
769
        /* Compute the CRC up to a z_word_t boundary. */
770
0
        while (len && ((z_size_t)buf & (W - 1)) != 0) {
771
0
            len--;
772
0
            crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
773
0
        }
774
775
        /* Compute the CRC on as many N z_word_t blocks as are available. */
776
0
        blks = len / (N * W);
777
0
        len -= blks * N * W;
778
0
        words = (z_word_t const *)buf;
779
780
        /* Do endian check at execution time instead of compile time, since ARM
781
           processors can change the endianess at execution time. If the
782
           compiler knows what the endianess will be, it can optimize out the
783
           check and the unused branch. */
784
0
        endian = 1;
785
0
        if (*(unsigned char *)&endian) {
786
            /* Little endian. */
787
788
0
            z_crc_t crc0;
789
0
            z_word_t word0;
790
0
#if N > 1
791
0
            z_crc_t crc1;
792
0
            z_word_t word1;
793
0
#if N > 2
794
0
            z_crc_t crc2;
795
0
            z_word_t word2;
796
0
#if N > 3
797
0
            z_crc_t crc3;
798
0
            z_word_t word3;
799
0
#if N > 4
800
0
            z_crc_t crc4;
801
0
            z_word_t word4;
802
#if N > 5
803
            z_crc_t crc5;
804
            z_word_t word5;
805
#endif
806
0
#endif
807
0
#endif
808
0
#endif
809
0
#endif
810
811
            /* Initialize the CRC for each braid. */
812
0
            crc0 = crc;
813
0
#if N > 1
814
0
            crc1 = 0;
815
0
#if N > 2
816
0
            crc2 = 0;
817
0
#if N > 3
818
0
            crc3 = 0;
819
0
#if N > 4
820
0
            crc4 = 0;
821
#if N > 5
822
            crc5 = 0;
823
#endif
824
0
#endif
825
0
#endif
826
0
#endif
827
0
#endif
828
829
            /*
830
              Process the first blks-1 blocks, computing the CRCs on each braid
831
              independently.
832
             */
833
0
            while (--blks) {
834
                /* Load the word for each braid into registers. */
835
0
                word0 = crc0 ^ words[0];
836
0
#if N > 1
837
0
                word1 = crc1 ^ words[1];
838
0
#if N > 2
839
0
                word2 = crc2 ^ words[2];
840
0
#if N > 3
841
0
                word3 = crc3 ^ words[3];
842
0
#if N > 4
843
0
                word4 = crc4 ^ words[4];
844
#if N > 5
845
                word5 = crc5 ^ words[5];
846
#endif
847
0
#endif
848
0
#endif
849
0
#endif
850
0
#endif
851
0
                words += N;
852
853
                /* Compute and update the CRC for each word. The loop should
854
                   get unrolled. */
855
0
                crc0 = crc_braid_table[0][word0 & 0xff];
856
0
#if N > 1
857
0
                crc1 = crc_braid_table[0][word1 & 0xff];
858
0
#if N > 2
859
0
                crc2 = crc_braid_table[0][word2 & 0xff];
860
0
#if N > 3
861
0
                crc3 = crc_braid_table[0][word3 & 0xff];
862
0
#if N > 4
863
0
                crc4 = crc_braid_table[0][word4 & 0xff];
864
#if N > 5
865
                crc5 = crc_braid_table[0][word5 & 0xff];
866
#endif
867
0
#endif
868
0
#endif
869
0
#endif
870
0
#endif
871
0
                for (k = 1; k < W; k++) {
872
0
                    crc0 ^= crc_braid_table[k][(word0 >> (k << 3)) & 0xff];
873
0
#if N > 1
874
0
                    crc1 ^= crc_braid_table[k][(word1 >> (k << 3)) & 0xff];
875
0
#if N > 2
876
0
                    crc2 ^= crc_braid_table[k][(word2 >> (k << 3)) & 0xff];
877
0
#if N > 3
878
0
                    crc3 ^= crc_braid_table[k][(word3 >> (k << 3)) & 0xff];
879
0
#if N > 4
880
0
                    crc4 ^= crc_braid_table[k][(word4 >> (k << 3)) & 0xff];
881
#if N > 5
882
                    crc5 ^= crc_braid_table[k][(word5 >> (k << 3)) & 0xff];
883
#endif
884
0
#endif
885
0
#endif
886
0
#endif
887
0
#endif
888
0
                }
889
0
            }
890
891
            /*
892
              Process the last block, combining the CRCs of the N braids at the
893
              same time.
894
             */
895
0
            crc = crc_word(crc0 ^ words[0]);
896
0
#if N > 1
897
0
            crc = crc_word(crc1 ^ words[1] ^ crc);
898
0
#if N > 2
899
0
            crc = crc_word(crc2 ^ words[2] ^ crc);
900
0
#if N > 3
901
0
            crc = crc_word(crc3 ^ words[3] ^ crc);
902
0
#if N > 4
903
0
            crc = crc_word(crc4 ^ words[4] ^ crc);
904
#if N > 5
905
            crc = crc_word(crc5 ^ words[5] ^ crc);
906
#endif
907
0
#endif
908
0
#endif
909
0
#endif
910
0
#endif
911
0
            words += N;
912
0
        }
913
0
        else {
914
            /* Big endian. */
915
916
0
            z_word_t crc0, word0, comb;
917
0
#if N > 1
918
0
            z_word_t crc1, word1;
919
0
#if N > 2
920
0
            z_word_t crc2, word2;
921
0
#if N > 3
922
0
            z_word_t crc3, word3;
923
0
#if N > 4
924
0
            z_word_t crc4, word4;
925
#if N > 5
926
            z_word_t crc5, word5;
927
#endif
928
0
#endif
929
0
#endif
930
0
#endif
931
0
#endif
932
933
            /* Initialize the CRC for each braid. */
934
0
            crc0 = byte_swap(crc);
935
0
#if N > 1
936
0
            crc1 = 0;
937
0
#if N > 2
938
0
            crc2 = 0;
939
0
#if N > 3
940
0
            crc3 = 0;
941
0
#if N > 4
942
0
            crc4 = 0;
943
#if N > 5
944
            crc5 = 0;
945
#endif
946
0
#endif
947
0
#endif
948
0
#endif
949
0
#endif
950
951
            /*
952
              Process the first blks-1 blocks, computing the CRCs on each braid
953
              independently.
954
             */
955
0
            while (--blks) {
956
                /* Load the word for each braid into registers. */
957
0
                word0 = crc0 ^ words[0];
958
0
#if N > 1
959
0
                word1 = crc1 ^ words[1];
960
0
#if N > 2
961
0
                word2 = crc2 ^ words[2];
962
0
#if N > 3
963
0
                word3 = crc3 ^ words[3];
964
0
#if N > 4
965
0
                word4 = crc4 ^ words[4];
966
#if N > 5
967
                word5 = crc5 ^ words[5];
968
#endif
969
0
#endif
970
0
#endif
971
0
#endif
972
0
#endif
973
0
                words += N;
974
975
                /* Compute and update the CRC for each word. The loop should
976
                   get unrolled. */
977
0
                crc0 = crc_braid_big_table[0][word0 & 0xff];
978
0
#if N > 1
979
0
                crc1 = crc_braid_big_table[0][word1 & 0xff];
980
0
#if N > 2
981
0
                crc2 = crc_braid_big_table[0][word2 & 0xff];
982
0
#if N > 3
983
0
                crc3 = crc_braid_big_table[0][word3 & 0xff];
984
0
#if N > 4
985
0
                crc4 = crc_braid_big_table[0][word4 & 0xff];
986
#if N > 5
987
                crc5 = crc_braid_big_table[0][word5 & 0xff];
988
#endif
989
0
#endif
990
0
#endif
991
0
#endif
992
0
#endif
993
0
                for (k = 1; k < W; k++) {
994
0
                    crc0 ^= crc_braid_big_table[k][(word0 >> (k << 3)) & 0xff];
995
0
#if N > 1
996
0
                    crc1 ^= crc_braid_big_table[k][(word1 >> (k << 3)) & 0xff];
997
0
#if N > 2
998
0
                    crc2 ^= crc_braid_big_table[k][(word2 >> (k << 3)) & 0xff];
999
0
#if N > 3
1000
0
                    crc3 ^= crc_braid_big_table[k][(word3 >> (k << 3)) & 0xff];
1001
0
#if N > 4
1002
0
                    crc4 ^= crc_braid_big_table[k][(word4 >> (k << 3)) & 0xff];
1003
#if N > 5
1004
                    crc5 ^= crc_braid_big_table[k][(word5 >> (k << 3)) & 0xff];
1005
#endif
1006
0
#endif
1007
0
#endif
1008
0
#endif
1009
0
#endif
1010
0
                }
1011
0
            }
1012
1013
            /*
1014
              Process the last block, combining the CRCs of the N braids at the
1015
              same time.
1016
             */
1017
0
            comb = crc_word_big(crc0 ^ words[0]);
1018
0
#if N > 1
1019
0
            comb = crc_word_big(crc1 ^ words[1] ^ comb);
1020
0
#if N > 2
1021
0
            comb = crc_word_big(crc2 ^ words[2] ^ comb);
1022
0
#if N > 3
1023
0
            comb = crc_word_big(crc3 ^ words[3] ^ comb);
1024
0
#if N > 4
1025
0
            comb = crc_word_big(crc4 ^ words[4] ^ comb);
1026
#if N > 5
1027
            comb = crc_word_big(crc5 ^ words[5] ^ comb);
1028
#endif
1029
0
#endif
1030
0
#endif
1031
0
#endif
1032
0
#endif
1033
0
            words += N;
1034
0
            crc = byte_swap(comb);
1035
0
        }
1036
1037
        /*
1038
          Update the pointer to the remaining bytes to process.
1039
         */
1040
0
        buf = (unsigned char const *)words;
1041
0
    }
1042
1043
0
#endif /* W */
1044
1045
    /* Complete the computation of the CRC on any remaining bytes. */
1046
0
    while (len >= 8) {
1047
0
        len -= 8;
1048
0
        crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1049
0
        crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1050
0
        crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1051
0
        crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1052
0
        crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1053
0
        crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1054
0
        crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1055
0
        crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1056
0
    }
1057
0
    while (len) {
1058
0
        len--;
1059
0
        crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1060
0
    }
1061
1062
    /* Return the CRC, post-conditioned. */
1063
0
    return crc ^ 0xffffffff;
1064
0
}
1065
1066
#endif
1067
1068
/* ========================================================================= */
1069
unsigned long ZEXPORT crc32(crc, buf, len)
1070
    unsigned long crc;
1071
    const unsigned char FAR *buf;
1072
    uInt len;
1073
0
{
1074
0
    return crc32_z(crc, buf, len);
1075
0
}
1076
1077
/* ========================================================================= */
1078
uLong ZEXPORT crc32_combine64(crc1, crc2, len2)
1079
    uLong crc1;
1080
    uLong crc2;
1081
    z_off64_t len2;
1082
0
{
1083
#ifdef DYNAMIC_CRC_TABLE
1084
    once(&made, make_crc_table);
1085
#endif /* DYNAMIC_CRC_TABLE */
1086
0
    return multmodp(x2nmodp(len2, 3), crc1) ^ crc2;
1087
0
}
1088
1089
/* ========================================================================= */
1090
uLong ZEXPORT crc32_combine(crc1, crc2, len2)
1091
    uLong crc1;
1092
    uLong crc2;
1093
    z_off_t len2;
1094
0
{
1095
0
    return crc32_combine64(crc1, crc2, len2);
1096
0
}
1097
1098
/* ========================================================================= */
1099
uLong ZEXPORT crc32_combine_gen64(len2)
1100
    z_off64_t len2;
1101
0
{
1102
#ifdef DYNAMIC_CRC_TABLE
1103
    once(&made, make_crc_table);
1104
#endif /* DYNAMIC_CRC_TABLE */
1105
0
    return x2nmodp(len2, 3);
1106
0
}
1107
1108
/* ========================================================================= */
1109
uLong ZEXPORT crc32_combine_gen(len2)
1110
    z_off_t len2;
1111
0
{
1112
0
    return crc32_combine_gen64(len2);
1113
0
}
1114
1115
/* ========================================================================= */
1116
uLong ZEXPORT crc32_combine_op(crc1, crc2, op)
1117
    uLong crc1;
1118
    uLong crc2;
1119
    uLong op;
1120
0
{
1121
0
    return multmodp(op, crc1) ^ crc2;
1122
0
}