Coverage Report

Created: 2025-10-10 06:41

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/hdf5/src/H5checksum.c
Line
Count
Source
1
/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2
 * Copyright by The HDF Group.                                               *
3
 * All rights reserved.                                                      *
4
 *                                                                           *
5
 * This file is part of HDF5.  The full HDF5 copyright notice, including     *
6
 * terms governing use, modification, and redistribution, is contained in    *
7
 * the LICENSE file, which can be found at the root of the source code       *
8
 * distribution tree, or in https://www.hdfgroup.org/licenses.               *
9
 * If you do not have access to either file, you may request a copy from     *
10
 * help@hdfgroup.org.                                                        *
11
 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
12
13
/*-------------------------------------------------------------------------
14
 *
15
 * Created:   H5checksum.c
16
 *
17
 * Purpose:   Internal code for computing fletcher32 checksums
18
 *
19
 *-------------------------------------------------------------------------
20
 */
21
22
/****************/
23
/* Module Setup */
24
/****************/
25
#include "H5module.h" /* This source code file is part of the H5 module */
26
27
/***********/
28
/* Headers */
29
/***********/
30
#include "H5private.h" /* Generic Functions     */
31
32
/****************/
33
/* Local Macros */
34
/****************/
35
36
/* Polynomial quotient */
37
/* (same as the IEEE 802.3 (Ethernet) quotient) */
38
0
#define H5_CRC_QUOTIENT 0x04C11DB7
39
40
/******************/
41
/* Local Typedefs */
42
/******************/
43
44
/********************/
45
/* Package Typedefs */
46
/********************/
47
48
/********************/
49
/* Local Prototypes */
50
/********************/
51
52
/*********************/
53
/* Package Variables */
54
/*********************/
55
56
/*****************************/
57
/* Library Private Variables */
58
/*****************************/
59
60
/*******************/
61
/* Local Variables */
62
/*******************/
63
64
/* Table of CRCs of all 8-bit messages. */
65
static uint32_t H5_crc_table[256];
66
67
/* Flag: has the table been computed? */
68
static bool H5_crc_table_computed = false;
69
70
/*-------------------------------------------------------------------------
71
 * Function:  H5_checksum_fletcher32
72
 *
73
 * Purpose: This routine provides a generic, fast checksum algorithm for
74
 *              use in the library.
75
 *
76
 * Note:        See the Wikipedia page for Fletcher's checksum:
77
 *                  http://en.wikipedia.org/wiki/Fletcher%27s_checksum
78
 *              for more details, etc.
79
 *
80
 * Note #2:     Per the information in RFC 3309:
81
 *                      (http://tools.ietf.org/html/rfc3309)
82
 *              Fletcher's checksum is not reliable for small buffers.
83
 *
84
 * Note #3:     The algorithm below differs from that given in the Wikipedia
85
 *              page by copying the data into 'sum1' in a more portable way
86
 *              and also by initializing 'sum1' and 'sum2' to 0 instead of
87
 *              0xffff (for backward compatibility reasons with earlier
88
 *              HDF5 fletcher32 I/O filter routine, mostly).
89
 *
90
 * Return:  32-bit fletcher checksum of input buffer (can't fail)
91
 *
92
 *-------------------------------------------------------------------------
93
 */
94
uint32_t
95
H5_checksum_fletcher32(const void *_data, size_t _len)
96
0
{
97
0
    const uint8_t *data = (const uint8_t *)_data; /* Pointer to the data to be summed */
98
0
    size_t         len  = _len / 2;               /* Length in 16-bit words */
99
0
    uint32_t       sum1 = 0, sum2 = 0;
100
101
0
    FUNC_ENTER_NOAPI_NOINIT_NOERR
102
103
    /* Sanity check */
104
0
    assert(_data);
105
0
    assert(_len > 0);
106
107
    /* Compute checksum for pairs of bytes */
108
    /* (the magic "360" value is the largest number of sums that can be
109
     *  performed without numeric overflow)
110
     */
111
0
    while (len) {
112
0
        size_t tlen = len > 360 ? 360 : len;
113
0
        len -= tlen;
114
0
        do {
115
0
            sum1 += (uint32_t)(((uint16_t)data[0]) << 8) | ((uint16_t)data[1]);
116
0
            data += 2;
117
0
            sum2 += sum1;
118
0
        } while (--tlen);
119
0
        sum1 = (sum1 & 0xffff) + (sum1 >> 16);
120
0
        sum2 = (sum2 & 0xffff) + (sum2 >> 16);
121
0
    }
122
123
    /* Check for odd # of bytes */
124
0
    if (_len % 2) {
125
0
        sum1 += (uint32_t)(((uint16_t)*data) << 8);
126
0
        sum2 += sum1;
127
0
        sum1 = (sum1 & 0xffff) + (sum1 >> 16);
128
0
        sum2 = (sum2 & 0xffff) + (sum2 >> 16);
129
0
    } /* end if */
130
131
    /* Second reduction step to reduce sums to 16 bits */
132
0
    sum1 = (sum1 & 0xffff) + (sum1 >> 16);
133
0
    sum2 = (sum2 & 0xffff) + (sum2 >> 16);
134
135
0
    FUNC_LEAVE_NOAPI((sum2 << 16) | sum1)
136
0
} /* end H5_checksum_fletcher32() */
137
138
/*-------------------------------------------------------------------------
139
 * Function:  H5__checksum_crc_make_table
140
 *
141
 * Purpose: Compute the CRC table for the CRC checksum algorithm
142
 *
143
 * Return:  none
144
 *
145
 *-------------------------------------------------------------------------
146
 */
147
static void
148
H5__checksum_crc_make_table(void)
149
0
{
150
0
    uint32_t c;    /* Checksum for each byte value */
151
0
    unsigned n, k; /* Local index variables */
152
153
0
    FUNC_ENTER_PACKAGE_NOERR
154
155
    /* Compute the checksum for each possible byte value */
156
0
    for (n = 0; n < 256; n++) {
157
0
        c = (uint32_t)n;
158
0
        for (k = 0; k < 8; k++)
159
0
            if (c & 1)
160
0
                c = H5_CRC_QUOTIENT ^ (c >> 1);
161
0
            else
162
0
                c = c >> 1;
163
0
        H5_crc_table[n] = c;
164
0
    }
165
0
    H5_crc_table_computed = true;
166
167
0
    FUNC_LEAVE_NOAPI_VOID
168
0
} /* end H5__checksum_crc_make_table() */
169
170
/*-------------------------------------------------------------------------
171
 * Function:  H5__checksum_crc_update
172
 *
173
 * Purpose: Update a running CRC with the bytes buf[0..len-1]--the CRC
174
 *              should be initialized to all 1's, and the transmitted value
175
 *              is the 1's complement of the final running CRC (see the
176
 *              H5_checksum_crc() routine below)).
177
 *
178
 * Return:  32-bit CRC checksum of input buffer (can't fail)
179
 *
180
 *-------------------------------------------------------------------------
181
 */
182
static uint32_t
183
H5__checksum_crc_update(uint32_t crc, const uint8_t *buf, size_t len)
184
0
{
185
0
    size_t n; /* Local index variable */
186
187
0
    FUNC_ENTER_PACKAGE_NOERR
188
189
    /* Initialize the CRC table if necessary */
190
0
    if (!H5_crc_table_computed)
191
0
        H5__checksum_crc_make_table();
192
193
    /* Update the CRC with the results from this buffer */
194
0
    for (n = 0; n < len; n++)
195
0
        crc = H5_crc_table[(crc ^ buf[n]) & 0xff] ^ (crc >> 8);
196
197
0
    FUNC_LEAVE_NOAPI(crc)
198
0
} /* end H5__checksum_crc_update() */
199
200
/*-------------------------------------------------------------------------
201
 * Function:  H5_checksum_crc
202
 *
203
 * Purpose: This routine provides a generic checksum algorithm for
204
 *              use in the library.
205
 *
206
 * Note:        This algorithm was based on the implementation described
207
 *              in the document describing the PNG image format:
208
 *                  http://www.w3.org/TR/PNG/#D-CRCAppendix
209
 *
210
 * Return:  32-bit CRC checksum of input buffer (can't fail)
211
 *
212
 *-------------------------------------------------------------------------
213
 */
214
uint32_t
215
H5_checksum_crc(const void *_data, size_t len)
216
0
{
217
0
    FUNC_ENTER_NOAPI_NOINIT_NOERR
218
219
    /* Sanity check */
220
0
    assert(_data);
221
0
    assert(len > 0);
222
223
0
    FUNC_LEAVE_NOAPI(H5__checksum_crc_update((uint32_t)0xffffffffL, (const uint8_t *)_data, len) ^
224
0
                     0xffffffffL)
225
0
} /* end H5_checksum_crc() */
226
227
/*
228
-------------------------------------------------------------------------------
229
H5_lookup3_mix -- mix 3 32-bit values reversibly.
230
231
This is reversible, so any information in (a,b,c) before mix() is
232
still in (a,b,c) after mix().
233
234
If four pairs of (a,b,c) inputs are run through mix(), or through
235
mix() in reverse, there are at least 32 bits of the output that
236
are sometimes the same for one pair and different for another pair.
237
This was tested for:
238
* pairs that differed by one bit, by two bits, in any combination
239
  of top bits of (a,b,c), or in any combination of bottom bits of
240
  (a,b,c).
241
* "differ" is defined as +, -, ^, or ~^.  For + and -, I transformed
242
  the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
243
  is commonly produced by subtraction) look like a single 1-bit
244
  difference.
245
* the base values were pseudorandom, all zero but one bit set, or
246
  all zero plus a counter that starts at zero.
247
248
Some k values for my "a-=c; a^=rot(c,k); c+=b;" arrangement that
249
satisfy this are
250
    4  6  8 16 19  4
251
    9 15  3 18 27 15
252
   14  9  3  7 17  3
253
Well, "9 15 3 18 27 15" didn't quite get 32 bits diffing
254
for "differ" defined as + with a one-bit base and a two-bit delta.  I
255
used http://burtleburtle.net/bob/hash/avalanche.html to choose
256
the operations, constants, and arrangements of the variables.
257
258
This does not achieve avalanche.  There are input bits of (a,b,c)
259
that fail to affect some output bits of (a,b,c), especially of a.  The
260
most thoroughly mixed value is c, but it doesn't really even achieve
261
avalanche in c.
262
263
This allows some parallelism.  Read-after-writes are good at doubling
264
the number of bits affected, so the goal of mixing pulls in the opposite
265
direction as the goal of parallelism.  I did what I could.  Rotates
266
seem to cost as much as shifts on every machine I could lay my hands
267
on, and rotates are much kinder to the top and bottom bits, so I used
268
rotates.
269
-------------------------------------------------------------------------------
270
*/
271
75.3k
#define H5_lookup3_rot(x, k) (((x) << (k)) ^ ((x) >> (32 - (k))))
272
#define H5_lookup3_mix(a, b, c)                                                                              \
273
10.1k
    do {                                                                                                     \
274
10.1k
        a -= c;                                                                                              \
275
10.1k
        a ^= H5_lookup3_rot(c, 4);                                                                           \
276
10.1k
        c += b;                                                                                              \
277
10.1k
        b -= a;                                                                                              \
278
10.1k
        b ^= H5_lookup3_rot(a, 6);                                                                           \
279
10.1k
        a += c;                                                                                              \
280
10.1k
        c -= b;                                                                                              \
281
10.1k
        c ^= H5_lookup3_rot(b, 8);                                                                           \
282
10.1k
        b += a;                                                                                              \
283
10.1k
        a -= c;                                                                                              \
284
10.1k
        a ^= H5_lookup3_rot(c, 16);                                                                          \
285
10.1k
        c += b;                                                                                              \
286
10.1k
        b -= a;                                                                                              \
287
10.1k
        b ^= H5_lookup3_rot(a, 19);                                                                          \
288
10.1k
        a += c;                                                                                              \
289
10.1k
        c -= b;                                                                                              \
290
10.1k
        c ^= H5_lookup3_rot(b, 4);                                                                           \
291
10.1k
        b += a;                                                                                              \
292
10.1k
    } while (0)
293
294
/*
295
-------------------------------------------------------------------------------
296
H5_lookup3_final -- final mixing of 3 32-bit values (a,b,c) into c
297
298
Pairs of (a,b,c) values differing in only a few bits will usually
299
produce values of c that look totally different.  This was tested for
300
* pairs that differed by one bit, by two bits, in any combination
301
  of top bits of (a,b,c), or in any combination of bottom bits of
302
  (a,b,c).
303
* "differ" is defined as +, -, ^, or ~^.  For + and -, I transformed
304
  the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
305
  is commonly produced by subtraction) look like a single 1-bit
306
  difference.
307
* the base values were pseudorandom, all zero but one bit set, or
308
  all zero plus a counter that starts at zero.
309
310
These constants passed:
311
 14 11 25 16 4 14 24
312
 12 14 25 16 4 14 24
313
and these came close:
314
  4  8 15 26 3 22 24
315
 10  8 15 26 3 22 24
316
 11  8 15 26 3 22 24
317
-------------------------------------------------------------------------------
318
*/
319
#define H5_lookup3_final(a, b, c)                                                                            \
320
2.08k
    do {                                                                                                     \
321
2.08k
        c ^= b;                                                                                              \
322
2.08k
        c -= H5_lookup3_rot(b, 14);                                                                          \
323
2.08k
        a ^= c;                                                                                              \
324
2.08k
        a -= H5_lookup3_rot(c, 11);                                                                          \
325
2.08k
        b ^= a;                                                                                              \
326
2.08k
        b -= H5_lookup3_rot(a, 25);                                                                          \
327
2.08k
        c ^= b;                                                                                              \
328
2.08k
        c -= H5_lookup3_rot(b, 16);                                                                          \
329
2.08k
        a ^= c;                                                                                              \
330
2.08k
        a -= H5_lookup3_rot(c, 4);                                                                           \
331
2.08k
        b ^= a;                                                                                              \
332
2.08k
        b -= H5_lookup3_rot(a, 14);                                                                          \
333
2.08k
        c ^= b;                                                                                              \
334
2.08k
        c -= H5_lookup3_rot(b, 24);                                                                          \
335
2.08k
    } while (0)
336
337
/*
338
-------------------------------------------------------------------------------
339
H5_checksum_lookup3() -- hash a variable-length key into a 32-bit value
340
  k       : the key (the unaligned variable-length array of bytes)
341
  length  : the length of the key, counting by bytes
342
  initval : can be any 4-byte value
343
Returns a 32-bit value.  Every bit of the key affects every bit of
344
the return value.  Two keys differing by one or two bits will have
345
totally different hash values.
346
347
The best hash table sizes are powers of 2.  There is no need to do
348
mod a prime (mod is so slow!).  If you need less than 32 bits,
349
use a bitmask.  For example, if you need only 10 bits, do
350
  h = (h & hashmask(10));
351
In which case, the hash table should have hashsize(10) elements.
352
353
If you are hashing n strings (uint8_t **)k, do it like this:
354
  for (i=0, h=0; i<n; ++i) h = H5_checksum_lookup( k[i], len[i], h);
355
356
By Bob Jenkins, 2006.  bob_jenkins@burtleburtle.net.  You may use this
357
code any way you wish, private, educational, or commercial.  It's free.
358
359
Use for hash table lookup, or anything where one collision in 2^^32 is
360
acceptable.  Do NOT use for cryptographic purposes.
361
-------------------------------------------------------------------------------
362
*/
363
364
uint32_t
365
H5_checksum_lookup3(const void *key, size_t length, uint32_t initval)
366
2.08k
{
367
2.08k
    const uint8_t *k = (const uint8_t *)key;
368
2.08k
    uint32_t       a, b, c = 0; /* internal state */
369
370
2.08k
    FUNC_ENTER_NOAPI_NOINIT_NOERR
371
372
    /* Sanity check */
373
2.08k
    assert(key);
374
2.08k
    assert(length > 0);
375
376
    /* Set up the internal state */
377
2.08k
    a = b = c = 0xdeadbeef + ((uint32_t)length) + initval;
378
379
    /*--------------- all but the last block: affect some 32 bits of (a,b,c) */
380
12.2k
    while (length > 12) {
381
10.1k
        a += k[0];
382
10.1k
        a += ((uint32_t)k[1]) << 8;
383
10.1k
        a += ((uint32_t)k[2]) << 16;
384
10.1k
        a += ((uint32_t)k[3]) << 24;
385
10.1k
        b += k[4];
386
10.1k
        b += ((uint32_t)k[5]) << 8;
387
10.1k
        b += ((uint32_t)k[6]) << 16;
388
10.1k
        b += ((uint32_t)k[7]) << 24;
389
10.1k
        c += k[8];
390
10.1k
        c += ((uint32_t)k[9]) << 8;
391
10.1k
        c += ((uint32_t)k[10]) << 16;
392
10.1k
        c += ((uint32_t)k[11]) << 24;
393
10.1k
        H5_lookup3_mix(a, b, c);
394
10.1k
        length -= 12;
395
10.1k
        k += 12;
396
10.1k
    }
397
398
    /*-------------------------------- last block: affect all 32 bits of (c) */
399
2.08k
    switch (length) /* all the case statements fall through */
400
2.08k
    {
401
16
        case 12:
402
16
            c += ((uint32_t)k[11]) << 24;
403
            /* FALLTHROUGH */
404
16
            H5_ATTR_FALLTHROUGH
405
54
        case 11:
406
54
            c += ((uint32_t)k[10]) << 16;
407
            /* FALLTHROUGH */
408
54
            H5_ATTR_FALLTHROUGH
409
89
        case 10:
410
89
            c += ((uint32_t)k[9]) << 8;
411
            /* FALLTHROUGH */
412
89
            H5_ATTR_FALLTHROUGH
413
157
        case 9:
414
157
            c += k[8];
415
            /* FALLTHROUGH */
416
157
            H5_ATTR_FALLTHROUGH
417
1.77k
        case 8:
418
1.77k
            b += ((uint32_t)k[7]) << 24;
419
            /* FALLTHROUGH */
420
1.77k
            H5_ATTR_FALLTHROUGH
421
1.89k
        case 7:
422
1.89k
            b += ((uint32_t)k[6]) << 16;
423
            /* FALLTHROUGH */
424
1.89k
            H5_ATTR_FALLTHROUGH
425
2.01k
        case 6:
426
2.01k
            b += ((uint32_t)k[5]) << 8;
427
            /* FALLTHROUGH */
428
2.01k
            H5_ATTR_FALLTHROUGH
429
2.01k
        case 5:
430
2.01k
            b += k[4];
431
            /* FALLTHROUGH */
432
2.01k
            H5_ATTR_FALLTHROUGH
433
2.04k
        case 4:
434
2.04k
            a += ((uint32_t)k[3]) << 24;
435
            /* FALLTHROUGH */
436
2.04k
            H5_ATTR_FALLTHROUGH
437
2.07k
        case 3:
438
2.07k
            a += ((uint32_t)k[2]) << 16;
439
            /* FALLTHROUGH */
440
2.07k
            H5_ATTR_FALLTHROUGH
441
2.07k
        case 2:
442
2.07k
            a += ((uint32_t)k[1]) << 8;
443
            /* FALLTHROUGH */
444
2.07k
            H5_ATTR_FALLTHROUGH
445
2.08k
        case 1:
446
2.08k
            a += k[0];
447
2.08k
            break;
448
0
        case 0:
449
0
            goto done;
450
0
        default:
451
0
            assert(0 && "This Should never be executed!");
452
2.08k
    }
453
454
2.08k
    H5_lookup3_final(a, b, c);
455
456
2.08k
done:
457
2.08k
    FUNC_LEAVE_NOAPI(c)
458
2.08k
} /* end H5_checksum_lookup3() */
459
460
/*-------------------------------------------------------------------------
461
 * Function:  H5_checksum_metadata
462
 *
463
 * Purpose: Provide a more abstract routine for checksumming metadata
464
 *              in a file, where the policy of which algorithm to choose
465
 *              is centralized.
466
 *
467
 * Return:  checksum of input buffer (can't fail)
468
 *
469
 *-------------------------------------------------------------------------
470
 */
471
uint32_t
472
H5_checksum_metadata(const void *data, size_t len, uint32_t initval)
473
2.08k
{
474
2.08k
    FUNC_ENTER_NOAPI_NOINIT_NOERR
475
476
    /* Sanity check */
477
2.08k
    assert(data);
478
2.08k
    assert(len > 0);
479
480
    /* Choose the appropriate checksum routine */
481
    /* (use Bob Jenkin's "lookup3" algorithm for all buffer sizes) */
482
2.08k
    FUNC_LEAVE_NOAPI(H5_checksum_lookup3(data, len, initval))
483
2.08k
} /* end H5_checksum_metadata() */
484
485
/*-------------------------------------------------------------------------
486
 * Function:  H5_hash_string
487
 *
488
 * Purpose: Provide a simple & fast routine for hashing strings
489
 *
490
 * Note:        This algorithm is the 'djb2' algorithm described on this page:
491
 *              http://www.cse.yorku.ca/~oz/hash.html
492
 *
493
 * Return:  hash of input string (can't fail)
494
 *
495
 *-------------------------------------------------------------------------
496
 */
497
uint32_t
498
H5_hash_string(const char *str)
499
492k
{
500
492k
    uint32_t hash = 5381;
501
492k
    int      c;
502
503
492k
    FUNC_ENTER_NOAPI_NOINIT_NOERR
504
505
    /* Sanity check */
506
492k
    assert(str);
507
508
7.46M
    while ((c = *str++))
509
6.97M
        hash = ((hash << 5) + hash) + (uint32_t)c; /* hash * 33 + c */
510
511
492k
    FUNC_LEAVE_NOAPI(hash)
512
492k
} /* end H5_hash_string() */