/src/libdeflate/lib/adler32.c

Source (jump to first uncovered line)
/*
 * adler32.c - Adler-32 checksum algorithm
 *
 * Copyright 2016 Eric Biggers
 *
 * Permission is hereby granted, free of charge, to any person
 * obtaining a copy of this software and associated documentation
 * files (the "Software"), to deal in the Software without
 * restriction, including without limitation the rights to use,
 * copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following
 * conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 */

#include "lib_common.h"

/* The Adler-32 divisor, or "base", value */
#define DIVISOR 65521

/*
 * MAX_CHUNK_LEN is the most bytes that can be processed without the possibility
 * of s2 overflowing when it is represented as an unsigned 32-bit integer.  This
 * value was computed using the following Python script:
 *
 *  divisor = 65521
 *  count = 0
 *  s1 = divisor - 1
 *  s2 = divisor - 1
 *  while True:
 *    s1 += 0xFF
 *    s2 += s1
 *    if s2 > 0xFFFFFFFF:
 *      break
 *    count += 1
 *  print(count)
 *
 * Note that to get the correct worst-case value, we must assume that every byte
 * has value 0xFF and that s1 and s2 started with the highest possible values
 * modulo the divisor.
 */
#define MAX_CHUNK_LEN 5552

/*
 * Update the Adler-32 values s1 and s2 using n bytes from p, update p to p + n,
 * update n to 0, and reduce s1 and s2 mod DIVISOR.  It is assumed that neither
 * s1 nor s2 can overflow before the reduction at the end, i.e. n plus any bytes
 * already processed after the last reduction must not exceed MAX_CHUNK_LEN.
 *
 * This uses only portable C code.  This is used as a fallback when a vectorized
 * implementation of Adler-32 (e.g. AVX2) is unavailable on the platform.
 *
 * Some of the vectorized implementations also use this to handle the end of the
 * data when the data isn't evenly divisible by the length the vectorized code
 * works on.  To avoid compiler errors about target-specific option mismatches
 * when this is used in that way, this is a macro rather than a function.
 *
 * Although this is unvectorized, this does include an optimization where the
 * main loop processes four bytes at a time using a strategy similar to that
 * used by vectorized implementations.  This provides increased instruction-
 * level parallelism compared to the traditional 's1 += *p++; s2 += s1;'.
 */
#define ADLER32_CHUNK(s1, s2, p, n)         \
do {                 \
  if (n >= 4) {             \
    u32 s1_sum = 0;           \
    u32 byte_0_sum = 0;         \
    u32 byte_1_sum = 0;         \
    u32 byte_2_sum = 0;         \
    u32 byte_3_sum = 0;         \
                  \
    do {             \
      s1_sum += s1;         \
      s1 += p[0] + p[1] + p[2] + p[3];    \
      byte_0_sum += p[0];       \
      byte_1_sum += p[1];       \
      byte_2_sum += p[2];       \
      byte_3_sum += p[3];       \
      p += 4;           \
      n -= 4;           \
    } while (n >= 4);          \
    s2 += (4 * (s1_sum + byte_0_sum)) + (3 * byte_1_sum) +  \
          (2 * byte_2_sum) + byte_3_sum;      \
  }                \
  for (; n; n--, p++) {           \
    s1 += *p;           \
    s2 += s1;           \
  }                \
  s1 %= DIVISOR;             \
  s2 %= DIVISOR;             \
} while (0)

static u32 MAYBE_UNUSED
adler32_generic(u32 adler, const u8 *p, size_t len)
{
  u32 s1 = adler & 0xFFFF;
  u32 s2 = adler >> 16;

  while (len) {
    size_t n = MIN(len, MAX_CHUNK_LEN & ~3);

    len -= n;
    ADLER32_CHUNK(s1, s2, p, n);
  }

  return (s2 << 16) | s1;
}

/* Include architecture-specific implementation(s) if available. */
#undef DEFAULT_IMPL
#undef arch_select_adler32_func
typedef u32 (*adler32_func_t)(u32 adler, const u8 *p, size_t len);
#if defined(ARCH_ARM32) || defined(ARCH_ARM64)
#  include "arm/adler32_impl.h"
#elif defined(ARCH_X86_32) || defined(ARCH_X86_64)
#  include "x86/adler32_impl.h"
#endif

#ifndef DEFAULT_IMPL
#  define DEFAULT_IMPL adler32_generic
#endif

#ifdef arch_select_adler32_func
static u32 dispatch_adler32(u32 adler, const u8 *p, size_t len);

static volatile adler32_func_t adler32_impl = dispatch_adler32;

/* Choose the best implementation at runtime. */
static u32 dispatch_adler32(u32 adler, const u8 *p, size_t len)
{
  adler32_func_t f = arch_select_adler32_func();

  if (f == NULL)
    f = DEFAULT_IMPL;

  adler32_impl = f;
  return f(adler, p, len);
}
#else
/* The best implementation is statically known, so call it directly. */
#define adler32_impl DEFAULT_IMPL
#endif

LIBDEFLATEAPI u32
libdeflate_adler32(u32 adler, const void *buffer, size_t len)
{
  if (buffer == NULL) /* Return initial value. */
    return 1;
  return adler32_impl(adler, buffer, len);
}

Line	Count	Source (jump to first uncovered line)
1		/*
2		* adler32.c - Adler-32 checksum algorithm
3		*
4		* Copyright 2016 Eric Biggers
5		*
6		* Permission is hereby granted, free of charge, to any person
7		* obtaining a copy of this software and associated documentation
8		* files (the "Software"), to deal in the Software without
9		* restriction, including without limitation the rights to use,
10		* copy, modify, merge, publish, distribute, sublicense, and/or sell
11		* copies of the Software, and to permit persons to whom the
12		* Software is furnished to do so, subject to the following
13		* conditions:
14		*
15		* The above copyright notice and this permission notice shall be
16		* included in all copies or substantial portions of the Software.
17		*
18		* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19		* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
20		* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
21		* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
22		* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
23		* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24		* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
25		* OTHER DEALINGS IN THE SOFTWARE.
26		*/
27
28		#include "lib_common.h"
29
30		/* The Adler-32 divisor, or "base", value */
31	7.94k	#define DIVISOR 65521
32
33		/*
34		* MAX_CHUNK_LEN is the most bytes that can be processed without the possibility
35		* of s2 overflowing when it is represented as an unsigned 32-bit integer. This
36		* value was computed using the following Python script:
37		*
38		* divisor = 65521
39		* count = 0
40		* s1 = divisor - 1
41		* s2 = divisor - 1
42		* while True:
43		* s1 += 0xFF
44		* s2 += s1
45		* if s2 > 0xFFFFFFFF:
46		* break
47		* count += 1
48		* print(count)
49		*
50		* Note that to get the correct worst-case value, we must assume that every byte
51		* has value 0xFF and that s1 and s2 started with the highest possible values
52		* modulo the divisor.
53		*/
54		#define MAX_CHUNK_LEN 5552
55
56		/*
57		* Update the Adler-32 values s1 and s2 using n bytes from p, update p to p + n,
58		* update n to 0, and reduce s1 and s2 mod DIVISOR. It is assumed that neither
59		* s1 nor s2 can overflow before the reduction at the end, i.e. n plus any bytes
60		* already processed after the last reduction must not exceed MAX_CHUNK_LEN.
61		*
62		* This uses only portable C code. This is used as a fallback when a vectorized
63		* implementation of Adler-32 (e.g. AVX2) is unavailable on the platform.
64		*
65		* Some of the vectorized implementations also use this to handle the end of the
66		* data when the data isn't evenly divisible by the length the vectorized code
67		* works on. To avoid compiler errors about target-specific option mismatches
68		* when this is used in that way, this is a macro rather than a function.
69		*
70		* Although this is unvectorized, this does include an optimization where the
71		* main loop processes four bytes at a time using a strategy similar to that
72		* used by vectorized implementations. This provides increased instruction-
73		* level parallelism compared to the traditional 's1 += *p++; s2 += s1;'.
74		*/
75	3.93k	#define ADLER32_CHUNK(s1, s2, p, n) \
76	3.93k	do { \
77	3.93k	if (n >= 4) { \
78	276	u32 s1_sum = 0; \
79	276	u32 byte_0_sum = 0; \
80	276	u32 byte_1_sum = 0; \
81	276	u32 byte_2_sum = 0; \
82	276	u32 byte_3_sum = 0; \
83	276	\
84	2.33k	do { \
85	2.33k	s1_sum += s1; \
86	2.33k	s1 += p[0] + p[1] + p[2] + p[3]; \
87	2.33k	byte_0_sum += p[0]; \
88	2.33k	byte_1_sum += p[1]; \
89	2.33k	byte_2_sum += p[2]; \
90	2.33k	byte_3_sum += p[3]; \
91	2.33k	p += 4; \
92	2.33k	n -= 4; \
93	2.33k	} while (n >= 4); \
94	276	s2 += (4 * (s1_sum + byte_0_sum)) + (3 * byte_1_sum) + \
95	276	(2 * byte_2_sum) + byte_3_sum; \
96	276	} \
97	4.00k	for (; n; n--, p++) { \
98	69	s1 += *p; \
99	69	s2 += s1; \
100	69	} \
101	3.93k	s1 %= DIVISOR; \
102	3.93k	s2 %= DIVISOR; \
103	3.93k	} while (0)
104
105		static u32 MAYBE_UNUSED
106		adler32_generic(u32 adler, const u8 *p, size_t len)
107	0	{
108	0	u32 s1 = adler & 0xFFFF;
109	0	u32 s2 = adler >> 16;
110
111	0	while (len) {
112	0	size_t n = MIN(len, MAX_CHUNK_LEN & ~3);
113
114	0	len -= n;
115	0	ADLER32_CHUNK(s1, s2, p, n);
116	0	}
117
118	0	return (s2 << 16) \| s1;
119	0	}
120
121		/* Include architecture-specific implementation(s) if available. */
122		#undef DEFAULT_IMPL
123		#undef arch_select_adler32_func
124		typedef u32 (adler32_func_t)(u32 adler, const u8 p, size_t len);
125		#if defined(ARCH_ARM32) \|\| defined(ARCH_ARM64)
126		# include "arm/adler32_impl.h"
127		#elif defined(ARCH_X86_32) \|\| defined(ARCH_X86_64)
128		# include "x86/adler32_impl.h"
129		#endif
130
131		#ifndef DEFAULT_IMPL
132	0	# define DEFAULT_IMPL adler32_generic
133		#endif
134
135		#ifdef arch_select_adler32_func
136		static u32 dispatch_adler32(u32 adler, const u8 *p, size_t len);
137
138		static volatile adler32_func_t adler32_impl = dispatch_adler32;
139
140		/* Choose the best implementation at runtime. */
141		static u32 dispatch_adler32(u32 adler, const u8 *p, size_t len)
142	6	{
143	6	adler32_func_t f = arch_select_adler32_func();
144
145	6	if (f == NULL)
146	0	f = DEFAULT_IMPL;
147
148	6	adler32_impl = f;
149	6	return f(adler, p, len);
150	6	}
151		#else
152		/* The best implementation is statically known, so call it directly. */
153		#define adler32_impl DEFAULT_IMPL
154		#endif
155
156		LIBDEFLATEAPI u32
157		libdeflate_adler32(u32 adler, const void *buffer, size_t len)
158	1.04k	{
159	1.04k	if (buffer == NULL) /* Return initial value. */
160	0	return 1;
161	1.04k	return adler32_impl(adler, buffer, len);
162	1.04k	}

Coverage Report

Created: 2025-06-16 07:00