/src/zstd/lib/common/zstd_internal.h
Line | Count | Source |
1 | | /* |
2 | | * Copyright (c) Meta Platforms, Inc. and affiliates. |
3 | | * All rights reserved. |
4 | | * |
5 | | * This source code is licensed under both the BSD-style license (found in the |
6 | | * LICENSE file in the root directory of this source tree) and the GPLv2 (found |
7 | | * in the COPYING file in the root directory of this source tree). |
8 | | * You may select, at your option, one of the above-listed licenses. |
9 | | */ |
10 | | |
11 | | #ifndef ZSTD_CCOMMON_H_MODULE |
12 | | #define ZSTD_CCOMMON_H_MODULE |
13 | | |
14 | | /* this module contains definitions which must be identical |
15 | | * across compression, decompression and dictBuilder. |
16 | | * It also contains a few functions useful to at least 2 of them |
17 | | * and which benefit from being inlined */ |
18 | | |
19 | | /*-************************************* |
20 | | * Dependencies |
21 | | ***************************************/ |
22 | | #include "compiler.h" |
23 | | #include "cpu.h" |
24 | | #include "mem.h" |
25 | | #include "debug.h" /* assert, DEBUGLOG, RAWLOG, g_debuglevel */ |
26 | | #include "error_private.h" |
27 | | #define ZSTD_STATIC_LINKING_ONLY |
28 | | #include "../zstd.h" |
29 | | #define FSE_STATIC_LINKING_ONLY |
30 | | #include "fse.h" |
31 | | #include "huf.h" |
32 | | #ifndef XXH_STATIC_LINKING_ONLY |
33 | | # define XXH_STATIC_LINKING_ONLY /* XXH64_state_t */ |
34 | | #endif |
35 | | #include "xxhash.h" /* XXH_reset, update, digest */ |
36 | | #ifndef ZSTD_NO_TRACE |
37 | | # include "zstd_trace.h" |
38 | | #else |
39 | | # define ZSTD_TRACE 0 |
40 | | #endif |
41 | | |
42 | | /* ---- static assert (debug) --- */ |
43 | 1.40G | #define ZSTD_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c) |
44 | 355M | #define ZSTD_isError ERR_isError /* for inlining */ |
45 | 364k | #define FSE_isError ERR_isError |
46 | 121k | #define HUF_isError ERR_isError |
47 | | |
48 | | |
49 | | /*-************************************* |
50 | | * shared macros |
51 | | ***************************************/ |
52 | | #undef MIN |
53 | | #undef MAX |
54 | 5.07G | #define MIN(a,b) ((a)<(b) ? (a) : (b)) |
55 | 479M | #define MAX(a,b) ((a)>(b) ? (a) : (b)) |
56 | 67.6M | #define BOUNDED(min,val,max) (MAX(min,MIN(val,max))) |
57 | | |
58 | | |
59 | | /*-************************************* |
60 | | * Common constants |
61 | | ***************************************/ |
62 | 582M | #define ZSTD_OPT_NUM (1<<12) |
63 | | |
64 | 4.97G | #define ZSTD_REP_NUM 3 /* number of repcodes */ |
65 | | static UNUSED_ATTR const U32 repStartValue[ZSTD_REP_NUM] = { 1, 4, 8 }; |
66 | | |
67 | 103M | #define KB *(1 <<10) |
68 | 58.7M | #define MB *(1 <<20) |
69 | 0 | #define GB *(1U<<30) |
70 | | |
71 | | #define BIT7 128 |
72 | | #define BIT6 64 |
73 | | #define BIT5 32 |
74 | | #define BIT4 16 |
75 | | #define BIT1 2 |
76 | | #define BIT0 1 |
77 | | |
78 | 7.13M | #define ZSTD_WINDOWLOG_ABSOLUTEMIN 10 |
79 | | static UNUSED_ATTR const size_t ZSTD_fcs_fieldSize[4] = { 0, 2, 4, 8 }; |
80 | | static UNUSED_ATTR const size_t ZSTD_did_fieldSize[4] = { 0, 1, 2, 4 }; |
81 | | |
82 | 54.4k | #define ZSTD_FRAMEIDSIZE 4 /* magic number size */ |
83 | | |
84 | 23.4k | #define ZSTD_BLOCKHEADERSIZE 3 /* C standard doesn't allow `static const` variable to be init using another `static const` variable */ |
85 | | static UNUSED_ATTR const size_t ZSTD_blockHeaderSize = ZSTD_BLOCKHEADERSIZE; |
86 | | typedef enum { bt_raw, bt_rle, bt_compressed, bt_reserved } blockType_e; |
87 | | |
88 | | #define ZSTD_FRAMECHECKSUMSIZE 4 |
89 | | |
90 | | #define MIN_SEQUENCES_SIZE 1 /* nbSeq==0 */ |
91 | 46.7M | #define MIN_CBLOCK_SIZE (1 /*litCSize*/ + 1 /* RLE or RAW */) /* for a non-null block */ |
92 | | #define MIN_LITERALS_FOR_4_STREAMS 6 |
93 | | |
94 | | typedef enum { set_basic, set_rle, set_compressed, set_repeat } SymbolEncodingType_e; |
95 | | |
96 | 562k | #define LONGNBSEQ 0x7F00 |
97 | | |
98 | 5.49G | #define MINMATCH 3 |
99 | | |
100 | 9.90M | #define Litbits 8 |
101 | 2.72M | #define LitHufLog 11 |
102 | 4.71M | #define MaxLit ((1<<Litbits) - 1) |
103 | 30.3M | #define MaxML 52 |
104 | 24.5M | #define MaxLL 35 |
105 | 9.39M | #define DefaultMaxOff 28 |
106 | 14.4M | #define MaxOff 31 |
107 | 11.3M | #define MaxSeq MAX(MaxLL, MaxML) /* Assumption : MaxOff < MaxLL,MaxML */ |
108 | 224M | #define MLFSELog 9 |
109 | 224M | #define LLFSELog 9 |
110 | 224M | #define OffFSELog 8 |
111 | | #define MaxFSELog MAX(MAX(MLFSELog, LLFSELog), OffFSELog) |
112 | | #define MaxMLBits 16 |
113 | | #define MaxLLBits 16 |
114 | | |
115 | | #define ZSTD_MAX_HUF_HEADER_SIZE 128 /* header + <= 127 byte tree description */ |
116 | | /* Each table cannot take more than #symbols * FSELog bits */ |
117 | | #define ZSTD_MAX_FSE_HEADERS_SIZE (((MaxML + 1) * MLFSELog + (MaxLL + 1) * LLFSELog + (MaxOff + 1) * OffFSELog + 7) / 8) |
118 | | |
119 | | static UNUSED_ATTR const U8 LL_bits[MaxLL+1] = { |
120 | | 0, 0, 0, 0, 0, 0, 0, 0, |
121 | | 0, 0, 0, 0, 0, 0, 0, 0, |
122 | | 1, 1, 1, 1, 2, 2, 3, 3, |
123 | | 4, 6, 7, 8, 9,10,11,12, |
124 | | 13,14,15,16 |
125 | | }; |
126 | | static UNUSED_ATTR const S16 LL_defaultNorm[MaxLL+1] = { |
127 | | 4, 3, 2, 2, 2, 2, 2, 2, |
128 | | 2, 2, 2, 2, 2, 1, 1, 1, |
129 | | 2, 2, 2, 2, 2, 2, 2, 2, |
130 | | 2, 3, 2, 1, 1, 1, 1, 1, |
131 | | -1,-1,-1,-1 |
132 | | }; |
133 | | #define LL_DEFAULTNORMLOG 6 /* for static allocation */ |
134 | | static UNUSED_ATTR const U32 LL_defaultNormLog = LL_DEFAULTNORMLOG; |
135 | | |
136 | | static UNUSED_ATTR const U8 ML_bits[MaxML+1] = { |
137 | | 0, 0, 0, 0, 0, 0, 0, 0, |
138 | | 0, 0, 0, 0, 0, 0, 0, 0, |
139 | | 0, 0, 0, 0, 0, 0, 0, 0, |
140 | | 0, 0, 0, 0, 0, 0, 0, 0, |
141 | | 1, 1, 1, 1, 2, 2, 3, 3, |
142 | | 4, 4, 5, 7, 8, 9,10,11, |
143 | | 12,13,14,15,16 |
144 | | }; |
145 | | static UNUSED_ATTR const S16 ML_defaultNorm[MaxML+1] = { |
146 | | 1, 4, 3, 2, 2, 2, 2, 2, |
147 | | 2, 1, 1, 1, 1, 1, 1, 1, |
148 | | 1, 1, 1, 1, 1, 1, 1, 1, |
149 | | 1, 1, 1, 1, 1, 1, 1, 1, |
150 | | 1, 1, 1, 1, 1, 1, 1, 1, |
151 | | 1, 1, 1, 1, 1, 1,-1,-1, |
152 | | -1,-1,-1,-1,-1 |
153 | | }; |
154 | | #define ML_DEFAULTNORMLOG 6 /* for static allocation */ |
155 | | static UNUSED_ATTR const U32 ML_defaultNormLog = ML_DEFAULTNORMLOG; |
156 | | |
157 | | static UNUSED_ATTR const S16 OF_defaultNorm[DefaultMaxOff+1] = { |
158 | | 1, 1, 1, 1, 1, 1, 2, 2, |
159 | | 2, 1, 1, 1, 1, 1, 1, 1, |
160 | | 1, 1, 1, 1, 1, 1, 1, 1, |
161 | | -1,-1,-1,-1,-1 |
162 | | }; |
163 | | #define OF_DEFAULTNORMLOG 5 /* for static allocation */ |
164 | | static UNUSED_ATTR const U32 OF_defaultNormLog = OF_DEFAULTNORMLOG; |
165 | | |
166 | | |
167 | | /*-******************************************* |
168 | | * Shared functions to include for inlining |
169 | | *********************************************/ |
170 | 465M | static void ZSTD_copy8(void* dst, const void* src) { |
171 | | #if defined(ZSTD_ARCH_ARM_NEON) && !defined(__aarch64__) |
172 | | vst1_u8((uint8_t*)dst, vld1_u8((const uint8_t*)src)); |
173 | | #else |
174 | 465M | ZSTD_memcpy(dst, src, 8); |
175 | 465M | #endif |
176 | 465M | } Unexecuted instantiation: sequence_producer.c:ZSTD_copy8 Unexecuted instantiation: zstd_common.c:ZSTD_copy8 Unexecuted instantiation: zstd_compress.c:ZSTD_copy8 Unexecuted instantiation: zstd_compress_literals.c:ZSTD_copy8 Unexecuted instantiation: zstd_compress_sequences.c:ZSTD_copy8 Unexecuted instantiation: zstd_compress_superblock.c:ZSTD_copy8 Unexecuted instantiation: zstd_double_fast.c:ZSTD_copy8 Unexecuted instantiation: zstd_fast.c:ZSTD_copy8 Unexecuted instantiation: zstd_lazy.c:ZSTD_copy8 Unexecuted instantiation: zstd_ldm.c:ZSTD_copy8 Unexecuted instantiation: zstd_opt.c:ZSTD_copy8 Unexecuted instantiation: zstd_preSplit.c:ZSTD_copy8 Unexecuted instantiation: zstdmt_compress.c:ZSTD_copy8 Unexecuted instantiation: huf_decompress.c:ZSTD_copy8 Unexecuted instantiation: zstd_ddict.c:ZSTD_copy8 Unexecuted instantiation: zstd_decompress.c:ZSTD_copy8 zstd_decompress_block.c:ZSTD_copy8 Line | Count | Source | 170 | 465M | static void ZSTD_copy8(void* dst, const void* src) { | 171 | | #if defined(ZSTD_ARCH_ARM_NEON) && !defined(__aarch64__) | 172 | | vst1_u8((uint8_t*)dst, vld1_u8((const uint8_t*)src)); | 173 | | #else | 174 | 465M | ZSTD_memcpy(dst, src, 8); | 175 | 465M | #endif | 176 | 465M | } |
Unexecuted instantiation: cover.c:ZSTD_copy8 Unexecuted instantiation: fastcover.c:ZSTD_copy8 Unexecuted instantiation: zdict.c:ZSTD_copy8 |
177 | 452M | #define COPY8(d,s) do { ZSTD_copy8(d,s); d+=8; s+=8; } while (0) |
178 | | |
179 | | /* Need to use memmove here since the literal buffer can now be located within |
180 | | the dst buffer. In circumstances where the op "catches up" to where the |
181 | | literal buffer is, there can be partial overlaps in this call on the final |
182 | | copy if the literal is being shifted by less than 16 bytes. */ |
183 | 1.31G | static void ZSTD_copy16(void* dst, const void* src) { |
184 | | #if defined(ZSTD_ARCH_ARM_NEON) |
185 | | vst1q_u8((uint8_t*)dst, vld1q_u8((const uint8_t*)src)); |
186 | | #elif defined(ZSTD_ARCH_X86_SSE2) |
187 | | _mm_storeu_si128((__m128i*)dst, _mm_loadu_si128((const __m128i*)src)); |
188 | | #elif defined(ZSTD_ARCH_RISCV_RVV) |
189 | | __riscv_vse8_v_u8m1((uint8_t*)dst, __riscv_vle8_v_u8m1((const uint8_t*)src, 16), 16); |
190 | | #elif defined(__clang__) |
191 | | ZSTD_memmove(dst, src, 16); |
192 | | #else |
193 | | /* ZSTD_memmove is not inlined properly by gcc */ |
194 | | BYTE copy16_buf[16]; |
195 | | ZSTD_memcpy(copy16_buf, src, 16); |
196 | | ZSTD_memcpy(dst, copy16_buf, 16); |
197 | | #endif |
198 | 1.31G | } Unexecuted instantiation: sequence_producer.c:ZSTD_copy16 Unexecuted instantiation: zstd_common.c:ZSTD_copy16 zstd_compress.c:ZSTD_copy16 Line | Count | Source | 183 | 59.3M | static void ZSTD_copy16(void* dst, const void* src) { | 184 | | #if defined(ZSTD_ARCH_ARM_NEON) | 185 | | vst1q_u8((uint8_t*)dst, vld1q_u8((const uint8_t*)src)); | 186 | | #elif defined(ZSTD_ARCH_X86_SSE2) | 187 | | _mm_storeu_si128((__m128i*)dst, _mm_loadu_si128((const __m128i*)src)); | 188 | | #elif defined(ZSTD_ARCH_RISCV_RVV) | 189 | | __riscv_vse8_v_u8m1((uint8_t*)dst, __riscv_vle8_v_u8m1((const uint8_t*)src, 16), 16); | 190 | | #elif defined(__clang__) | 191 | | ZSTD_memmove(dst, src, 16); | 192 | | #else | 193 | | /* ZSTD_memmove is not inlined properly by gcc */ | 194 | | BYTE copy16_buf[16]; | 195 | | ZSTD_memcpy(copy16_buf, src, 16); | 196 | | ZSTD_memcpy(dst, copy16_buf, 16); | 197 | | #endif | 198 | 59.3M | } |
Unexecuted instantiation: zstd_compress_literals.c:ZSTD_copy16 Unexecuted instantiation: zstd_compress_sequences.c:ZSTD_copy16 Unexecuted instantiation: zstd_compress_superblock.c:ZSTD_copy16 zstd_double_fast.c:ZSTD_copy16 Line | Count | Source | 183 | 42.9M | static void ZSTD_copy16(void* dst, const void* src) { | 184 | | #if defined(ZSTD_ARCH_ARM_NEON) | 185 | | vst1q_u8((uint8_t*)dst, vld1q_u8((const uint8_t*)src)); | 186 | | #elif defined(ZSTD_ARCH_X86_SSE2) | 187 | | _mm_storeu_si128((__m128i*)dst, _mm_loadu_si128((const __m128i*)src)); | 188 | | #elif defined(ZSTD_ARCH_RISCV_RVV) | 189 | | __riscv_vse8_v_u8m1((uint8_t*)dst, __riscv_vle8_v_u8m1((const uint8_t*)src, 16), 16); | 190 | | #elif defined(__clang__) | 191 | | ZSTD_memmove(dst, src, 16); | 192 | | #else | 193 | | /* ZSTD_memmove is not inlined properly by gcc */ | 194 | | BYTE copy16_buf[16]; | 195 | | ZSTD_memcpy(copy16_buf, src, 16); | 196 | | ZSTD_memcpy(dst, copy16_buf, 16); | 197 | | #endif | 198 | 42.9M | } |
Line | Count | Source | 183 | 79.6M | static void ZSTD_copy16(void* dst, const void* src) { | 184 | | #if defined(ZSTD_ARCH_ARM_NEON) | 185 | | vst1q_u8((uint8_t*)dst, vld1q_u8((const uint8_t*)src)); | 186 | | #elif defined(ZSTD_ARCH_X86_SSE2) | 187 | | _mm_storeu_si128((__m128i*)dst, _mm_loadu_si128((const __m128i*)src)); | 188 | | #elif defined(ZSTD_ARCH_RISCV_RVV) | 189 | | __riscv_vse8_v_u8m1((uint8_t*)dst, __riscv_vle8_v_u8m1((const uint8_t*)src, 16), 16); | 190 | | #elif defined(__clang__) | 191 | | ZSTD_memmove(dst, src, 16); | 192 | | #else | 193 | | /* ZSTD_memmove is not inlined properly by gcc */ | 194 | | BYTE copy16_buf[16]; | 195 | | ZSTD_memcpy(copy16_buf, src, 16); | 196 | | ZSTD_memcpy(dst, copy16_buf, 16); | 197 | | #endif | 198 | 79.6M | } |
Line | Count | Source | 183 | 69.4M | static void ZSTD_copy16(void* dst, const void* src) { | 184 | | #if defined(ZSTD_ARCH_ARM_NEON) | 185 | | vst1q_u8((uint8_t*)dst, vld1q_u8((const uint8_t*)src)); | 186 | | #elif defined(ZSTD_ARCH_X86_SSE2) | 187 | | _mm_storeu_si128((__m128i*)dst, _mm_loadu_si128((const __m128i*)src)); | 188 | | #elif defined(ZSTD_ARCH_RISCV_RVV) | 189 | | __riscv_vse8_v_u8m1((uint8_t*)dst, __riscv_vle8_v_u8m1((const uint8_t*)src, 16), 16); | 190 | | #elif defined(__clang__) | 191 | | ZSTD_memmove(dst, src, 16); | 192 | | #else | 193 | | /* ZSTD_memmove is not inlined properly by gcc */ | 194 | | BYTE copy16_buf[16]; | 195 | | ZSTD_memcpy(copy16_buf, src, 16); | 196 | | ZSTD_memcpy(dst, copy16_buf, 16); | 197 | | #endif | 198 | 69.4M | } |
Line | Count | Source | 183 | 8.80M | static void ZSTD_copy16(void* dst, const void* src) { | 184 | | #if defined(ZSTD_ARCH_ARM_NEON) | 185 | | vst1q_u8((uint8_t*)dst, vld1q_u8((const uint8_t*)src)); | 186 | | #elif defined(ZSTD_ARCH_X86_SSE2) | 187 | | _mm_storeu_si128((__m128i*)dst, _mm_loadu_si128((const __m128i*)src)); | 188 | | #elif defined(ZSTD_ARCH_RISCV_RVV) | 189 | | __riscv_vse8_v_u8m1((uint8_t*)dst, __riscv_vle8_v_u8m1((const uint8_t*)src, 16), 16); | 190 | | #elif defined(__clang__) | 191 | | ZSTD_memmove(dst, src, 16); | 192 | | #else | 193 | | /* ZSTD_memmove is not inlined properly by gcc */ | 194 | | BYTE copy16_buf[16]; | 195 | | ZSTD_memcpy(copy16_buf, src, 16); | 196 | | ZSTD_memcpy(dst, copy16_buf, 16); | 197 | | #endif | 198 | 8.80M | } |
Line | Count | Source | 183 | 80.6M | static void ZSTD_copy16(void* dst, const void* src) { | 184 | | #if defined(ZSTD_ARCH_ARM_NEON) | 185 | | vst1q_u8((uint8_t*)dst, vld1q_u8((const uint8_t*)src)); | 186 | | #elif defined(ZSTD_ARCH_X86_SSE2) | 187 | | _mm_storeu_si128((__m128i*)dst, _mm_loadu_si128((const __m128i*)src)); | 188 | | #elif defined(ZSTD_ARCH_RISCV_RVV) | 189 | | __riscv_vse8_v_u8m1((uint8_t*)dst, __riscv_vle8_v_u8m1((const uint8_t*)src, 16), 16); | 190 | | #elif defined(__clang__) | 191 | | ZSTD_memmove(dst, src, 16); | 192 | | #else | 193 | | /* ZSTD_memmove is not inlined properly by gcc */ | 194 | | BYTE copy16_buf[16]; | 195 | | ZSTD_memcpy(copy16_buf, src, 16); | 196 | | ZSTD_memcpy(dst, copy16_buf, 16); | 197 | | #endif | 198 | 80.6M | } |
Unexecuted instantiation: zstd_preSplit.c:ZSTD_copy16 Unexecuted instantiation: zstdmt_compress.c:ZSTD_copy16 Unexecuted instantiation: huf_decompress.c:ZSTD_copy16 Unexecuted instantiation: zstd_ddict.c:ZSTD_copy16 Unexecuted instantiation: zstd_decompress.c:ZSTD_copy16 zstd_decompress_block.c:ZSTD_copy16 Line | Count | Source | 183 | 975M | static void ZSTD_copy16(void* dst, const void* src) { | 184 | | #if defined(ZSTD_ARCH_ARM_NEON) | 185 | | vst1q_u8((uint8_t*)dst, vld1q_u8((const uint8_t*)src)); | 186 | | #elif defined(ZSTD_ARCH_X86_SSE2) | 187 | | _mm_storeu_si128((__m128i*)dst, _mm_loadu_si128((const __m128i*)src)); | 188 | | #elif defined(ZSTD_ARCH_RISCV_RVV) | 189 | | __riscv_vse8_v_u8m1((uint8_t*)dst, __riscv_vle8_v_u8m1((const uint8_t*)src, 16), 16); | 190 | | #elif defined(__clang__) | 191 | | ZSTD_memmove(dst, src, 16); | 192 | | #else | 193 | | /* ZSTD_memmove is not inlined properly by gcc */ | 194 | | BYTE copy16_buf[16]; | 195 | | ZSTD_memcpy(copy16_buf, src, 16); | 196 | | ZSTD_memcpy(dst, copy16_buf, 16); | 197 | | #endif | 198 | 975M | } |
Unexecuted instantiation: cover.c:ZSTD_copy16 Unexecuted instantiation: fastcover.c:ZSTD_copy16 Unexecuted instantiation: zdict.c:ZSTD_copy16 |
199 | 765M | #define COPY16(d,s) do { ZSTD_copy16(d,s); d+=16; s+=16; } while (0) |
200 | | |
201 | 429M | #define WILDCOPY_OVERLENGTH 32 |
202 | 13.7M | #define WILDCOPY_VECLEN 16 |
203 | | |
204 | | typedef enum { |
205 | | ZSTD_no_overlap, |
206 | | ZSTD_overlap_src_before_dst |
207 | | /* ZSTD_overlap_dst_before_src, */ |
208 | | } ZSTD_overlap_e; |
209 | | |
210 | | /*! ZSTD_wildcopy() : |
211 | | * Custom version of ZSTD_memcpy(), can over read/write up to WILDCOPY_OVERLENGTH bytes (if length==0) |
212 | | * @param ovtype controls the overlap detection |
213 | | * - ZSTD_no_overlap: The source and destination are guaranteed to be at least WILDCOPY_VECLEN bytes apart. |
214 | | * - ZSTD_overlap_src_before_dst: The src and dst may overlap, but they MUST be at least 8 bytes apart. |
215 | | * The src buffer must be before the dst buffer. |
216 | | */ |
217 | | MEM_STATIC FORCE_INLINE_ATTR |
218 | | void ZSTD_wildcopy(void* dst, const void* src, size_t length, ZSTD_overlap_e const ovtype) |
219 | 161M | { |
220 | 161M | ptrdiff_t diff = (BYTE*)dst - (const BYTE*)src; |
221 | 161M | const BYTE* ip = (const BYTE*)src; |
222 | 161M | BYTE* op = (BYTE*)dst; |
223 | 161M | BYTE* const oend = op + length; |
224 | | |
225 | 161M | if (ovtype == ZSTD_overlap_src_before_dst && diff < WILDCOPY_VECLEN) { |
226 | | /* Handle short offset copies. */ |
227 | 452M | do { |
228 | 452M | COPY8(op, ip); |
229 | 452M | } while (op < oend); |
230 | 148M | } else { |
231 | 148M | assert(diff >= WILDCOPY_VECLEN || diff <= -WILDCOPY_VECLEN); |
232 | | /* Separate out the first COPY16() call because the copy length is |
233 | | * almost certain to be short, so the branches have different |
234 | | * probabilities. Since it is almost certain to be short, only do |
235 | | * one COPY16() in the first call. Then, do two calls per loop since |
236 | | * at that point it is more likely to have a high trip count. |
237 | | */ |
238 | 148M | ZSTD_copy16(op, ip); |
239 | 148M | if (16 >= length) return; |
240 | 35.7M | op += 16; |
241 | 35.7M | ip += 16; |
242 | 382M | do { |
243 | 382M | COPY16(op, ip); |
244 | 382M | COPY16(op, ip); |
245 | 382M | } |
246 | 382M | while (op < oend); |
247 | 35.7M | } |
248 | 161M | } Unexecuted instantiation: sequence_producer.c:ZSTD_wildcopy Unexecuted instantiation: zstd_common.c:ZSTD_wildcopy zstd_compress.c:ZSTD_wildcopy Line | Count | Source | 219 | 1.50M | { | 220 | 1.50M | ptrdiff_t diff = (BYTE*)dst - (const BYTE*)src; | 221 | 1.50M | const BYTE* ip = (const BYTE*)src; | 222 | 1.50M | BYTE* op = (BYTE*)dst; | 223 | 1.50M | BYTE* const oend = op + length; | 224 | | | 225 | 1.50M | if (ovtype == ZSTD_overlap_src_before_dst && diff < WILDCOPY_VECLEN) { | 226 | | /* Handle short offset copies. */ | 227 | 0 | do { | 228 | 0 | COPY8(op, ip); | 229 | 0 | } while (op < oend); | 230 | 1.50M | } else { | 231 | 1.50M | assert(diff >= WILDCOPY_VECLEN || diff <= -WILDCOPY_VECLEN); | 232 | | /* Separate out the first COPY16() call because the copy length is | 233 | | * almost certain to be short, so the branches have different | 234 | | * probabilities. Since it is almost certain to be short, only do | 235 | | * one COPY16() in the first call. Then, do two calls per loop since | 236 | | * at that point it is more likely to have a high trip count. | 237 | | */ | 238 | 1.50M | ZSTD_copy16(op, ip); | 239 | 1.50M | if (16 >= length) return; | 240 | 823k | op += 16; | 241 | 823k | ip += 16; | 242 | 12.9M | do { | 243 | 12.9M | COPY16(op, ip); | 244 | 12.9M | COPY16(op, ip); | 245 | 12.9M | } | 246 | 12.9M | while (op < oend); | 247 | 823k | } | 248 | 1.50M | } |
Unexecuted instantiation: zstd_compress_literals.c:ZSTD_wildcopy Unexecuted instantiation: zstd_compress_sequences.c:ZSTD_wildcopy Unexecuted instantiation: zstd_compress_superblock.c:ZSTD_wildcopy zstd_double_fast.c:ZSTD_wildcopy Line | Count | Source | 219 | 1.93M | { | 220 | 1.93M | ptrdiff_t diff = (BYTE*)dst - (const BYTE*)src; | 221 | 1.93M | const BYTE* ip = (const BYTE*)src; | 222 | 1.93M | BYTE* op = (BYTE*)dst; | 223 | 1.93M | BYTE* const oend = op + length; | 224 | | | 225 | 1.93M | if (ovtype == ZSTD_overlap_src_before_dst && diff < WILDCOPY_VECLEN) { | 226 | | /* Handle short offset copies. */ | 227 | 0 | do { | 228 | 0 | COPY8(op, ip); | 229 | 0 | } while (op < oend); | 230 | 1.93M | } else { | 231 | 1.93M | assert(diff >= WILDCOPY_VECLEN || diff <= -WILDCOPY_VECLEN); | 232 | | /* Separate out the first COPY16() call because the copy length is | 233 | | * almost certain to be short, so the branches have different | 234 | | * probabilities. Since it is almost certain to be short, only do | 235 | | * one COPY16() in the first call. Then, do two calls per loop since | 236 | | * at that point it is more likely to have a high trip count. | 237 | | */ | 238 | 1.93M | ZSTD_copy16(op, ip); | 239 | 1.93M | if (16 >= length) return; | 240 | 989k | op += 16; | 241 | 989k | ip += 16; | 242 | 6.68M | do { | 243 | 6.68M | COPY16(op, ip); | 244 | 6.68M | COPY16(op, ip); | 245 | 6.68M | } | 246 | 6.68M | while (op < oend); | 247 | 989k | } | 248 | 1.93M | } |
zstd_fast.c:ZSTD_wildcopy Line | Count | Source | 219 | 2.82M | { | 220 | 2.82M | ptrdiff_t diff = (BYTE*)dst - (const BYTE*)src; | 221 | 2.82M | const BYTE* ip = (const BYTE*)src; | 222 | 2.82M | BYTE* op = (BYTE*)dst; | 223 | 2.82M | BYTE* const oend = op + length; | 224 | | | 225 | 2.82M | if (ovtype == ZSTD_overlap_src_before_dst && diff < WILDCOPY_VECLEN) { | 226 | | /* Handle short offset copies. */ | 227 | 0 | do { | 228 | 0 | COPY8(op, ip); | 229 | 0 | } while (op < oend); | 230 | 2.82M | } else { | 231 | 2.82M | assert(diff >= WILDCOPY_VECLEN || diff <= -WILDCOPY_VECLEN); | 232 | | /* Separate out the first COPY16() call because the copy length is | 233 | | * almost certain to be short, so the branches have different | 234 | | * probabilities. Since it is almost certain to be short, only do | 235 | | * one COPY16() in the first call. Then, do two calls per loop since | 236 | | * at that point it is more likely to have a high trip count. | 237 | | */ | 238 | 2.82M | ZSTD_copy16(op, ip); | 239 | 2.82M | if (16 >= length) return; | 240 | 1.68M | op += 16; | 241 | 1.68M | ip += 16; | 242 | 15.8M | do { | 243 | 15.8M | COPY16(op, ip); | 244 | 15.8M | COPY16(op, ip); | 245 | 15.8M | } | 246 | 15.8M | while (op < oend); | 247 | 1.68M | } | 248 | 2.82M | } |
zstd_lazy.c:ZSTD_wildcopy Line | Count | Source | 219 | 3.32M | { | 220 | 3.32M | ptrdiff_t diff = (BYTE*)dst - (const BYTE*)src; | 221 | 3.32M | const BYTE* ip = (const BYTE*)src; | 222 | 3.32M | BYTE* op = (BYTE*)dst; | 223 | 3.32M | BYTE* const oend = op + length; | 224 | | | 225 | 3.32M | if (ovtype == ZSTD_overlap_src_before_dst && diff < WILDCOPY_VECLEN) { | 226 | | /* Handle short offset copies. */ | 227 | 0 | do { | 228 | 0 | COPY8(op, ip); | 229 | 0 | } while (op < oend); | 230 | 3.32M | } else { | 231 | 3.32M | assert(diff >= WILDCOPY_VECLEN || diff <= -WILDCOPY_VECLEN); | 232 | | /* Separate out the first COPY16() call because the copy length is | 233 | | * almost certain to be short, so the branches have different | 234 | | * probabilities. Since it is almost certain to be short, only do | 235 | | * one COPY16() in the first call. Then, do two calls per loop since | 236 | | * at that point it is more likely to have a high trip count. | 237 | | */ | 238 | 3.32M | ZSTD_copy16(op, ip); | 239 | 3.32M | if (16 >= length) return; | 240 | 1.63M | op += 16; | 241 | 1.63M | ip += 16; | 242 | 8.92M | do { | 243 | 8.92M | COPY16(op, ip); | 244 | 8.92M | COPY16(op, ip); | 245 | 8.92M | } | 246 | 8.92M | while (op < oend); | 247 | 1.63M | } | 248 | 3.32M | } |
Line | Count | Source | 219 | 469k | { | 220 | 469k | ptrdiff_t diff = (BYTE*)dst - (const BYTE*)src; | 221 | 469k | const BYTE* ip = (const BYTE*)src; | 222 | 469k | BYTE* op = (BYTE*)dst; | 223 | 469k | BYTE* const oend = op + length; | 224 | | | 225 | 469k | if (ovtype == ZSTD_overlap_src_before_dst && diff < WILDCOPY_VECLEN) { | 226 | | /* Handle short offset copies. */ | 227 | 0 | do { | 228 | 0 | COPY8(op, ip); | 229 | 0 | } while (op < oend); | 230 | 469k | } else { | 231 | 469k | assert(diff >= WILDCOPY_VECLEN || diff <= -WILDCOPY_VECLEN); | 232 | | /* Separate out the first COPY16() call because the copy length is | 233 | | * almost certain to be short, so the branches have different | 234 | | * probabilities. Since it is almost certain to be short, only do | 235 | | * one COPY16() in the first call. Then, do two calls per loop since | 236 | | * at that point it is more likely to have a high trip count. | 237 | | */ | 238 | 469k | ZSTD_copy16(op, ip); | 239 | 469k | if (16 >= length) return; | 240 | 194k | op += 16; | 241 | 194k | ip += 16; | 242 | 604k | do { | 243 | 604k | COPY16(op, ip); | 244 | 604k | COPY16(op, ip); | 245 | 604k | } | 246 | 604k | while (op < oend); | 247 | 194k | } | 248 | 469k | } |
Line | Count | Source | 219 | 2.30M | { | 220 | 2.30M | ptrdiff_t diff = (BYTE*)dst - (const BYTE*)src; | 221 | 2.30M | const BYTE* ip = (const BYTE*)src; | 222 | 2.30M | BYTE* op = (BYTE*)dst; | 223 | 2.30M | BYTE* const oend = op + length; | 224 | | | 225 | 2.30M | if (ovtype == ZSTD_overlap_src_before_dst && diff < WILDCOPY_VECLEN) { | 226 | | /* Handle short offset copies. */ | 227 | 0 | do { | 228 | 0 | COPY8(op, ip); | 229 | 0 | } while (op < oend); | 230 | 2.30M | } else { | 231 | 2.30M | assert(diff >= WILDCOPY_VECLEN || diff <= -WILDCOPY_VECLEN); | 232 | | /* Separate out the first COPY16() call because the copy length is | 233 | | * almost certain to be short, so the branches have different | 234 | | * probabilities. Since it is almost certain to be short, only do | 235 | | * one COPY16() in the first call. Then, do two calls per loop since | 236 | | * at that point it is more likely to have a high trip count. | 237 | | */ | 238 | 2.30M | ZSTD_copy16(op, ip); | 239 | 2.30M | if (16 >= length) return; | 240 | 1.26M | op += 16; | 241 | 1.26M | ip += 16; | 242 | 6.94M | do { | 243 | 6.94M | COPY16(op, ip); | 244 | 6.94M | COPY16(op, ip); | 245 | 6.94M | } | 246 | 6.94M | while (op < oend); | 247 | 1.26M | } | 248 | 2.30M | } |
Unexecuted instantiation: zstd_preSplit.c:ZSTD_wildcopy Unexecuted instantiation: zstdmt_compress.c:ZSTD_wildcopy Unexecuted instantiation: huf_decompress.c:ZSTD_wildcopy Unexecuted instantiation: zstd_ddict.c:ZSTD_wildcopy Unexecuted instantiation: zstd_decompress.c:ZSTD_wildcopy zstd_decompress_block.c:ZSTD_wildcopy Line | Count | Source | 219 | 149M | { | 220 | 149M | ptrdiff_t diff = (BYTE*)dst - (const BYTE*)src; | 221 | 149M | const BYTE* ip = (const BYTE*)src; | 222 | 149M | BYTE* op = (BYTE*)dst; | 223 | 149M | BYTE* const oend = op + length; | 224 | | | 225 | 149M | if (ovtype == ZSTD_overlap_src_before_dst && diff < WILDCOPY_VECLEN) { | 226 | | /* Handle short offset copies. */ | 227 | 452M | do { | 228 | 452M | COPY8(op, ip); | 229 | 452M | } while (op < oend); | 230 | 135M | } else { | 231 | 135M | assert(diff >= WILDCOPY_VECLEN || diff <= -WILDCOPY_VECLEN); | 232 | | /* Separate out the first COPY16() call because the copy length is | 233 | | * almost certain to be short, so the branches have different | 234 | | * probabilities. Since it is almost certain to be short, only do | 235 | | * one COPY16() in the first call. Then, do two calls per loop since | 236 | | * at that point it is more likely to have a high trip count. | 237 | | */ | 238 | 135M | ZSTD_copy16(op, ip); | 239 | 135M | if (16 >= length) return; | 240 | 29.1M | op += 16; | 241 | 29.1M | ip += 16; | 242 | 330M | do { | 243 | 330M | COPY16(op, ip); | 244 | 330M | COPY16(op, ip); | 245 | 330M | } | 246 | 330M | while (op < oend); | 247 | 29.1M | } | 248 | 149M | } |
Unexecuted instantiation: cover.c:ZSTD_wildcopy Unexecuted instantiation: fastcover.c:ZSTD_wildcopy Unexecuted instantiation: zdict.c:ZSTD_wildcopy |
249 | | |
250 | | MEM_STATIC size_t ZSTD_limitCopy(void* dst, size_t dstCapacity, const void* src, size_t srcSize) |
251 | 262M | { |
252 | 262M | size_t const length = MIN(dstCapacity, srcSize); |
253 | 262M | if (length > 0) { |
254 | 224M | ZSTD_memcpy(dst, src, length); |
255 | 224M | } |
256 | 262M | return length; |
257 | 262M | } Unexecuted instantiation: sequence_producer.c:ZSTD_limitCopy Unexecuted instantiation: zstd_common.c:ZSTD_limitCopy zstd_compress.c:ZSTD_limitCopy Line | Count | Source | 251 | 235M | { | 252 | 235M | size_t const length = MIN(dstCapacity, srcSize); | 253 | 235M | if (length > 0) { | 254 | 196M | ZSTD_memcpy(dst, src, length); | 255 | 196M | } | 256 | 235M | return length; | 257 | 235M | } |
Unexecuted instantiation: zstd_compress_literals.c:ZSTD_limitCopy Unexecuted instantiation: zstd_compress_sequences.c:ZSTD_limitCopy Unexecuted instantiation: zstd_compress_superblock.c:ZSTD_limitCopy Unexecuted instantiation: zstd_double_fast.c:ZSTD_limitCopy Unexecuted instantiation: zstd_fast.c:ZSTD_limitCopy Unexecuted instantiation: zstd_lazy.c:ZSTD_limitCopy Unexecuted instantiation: zstd_ldm.c:ZSTD_limitCopy Unexecuted instantiation: zstd_opt.c:ZSTD_limitCopy Unexecuted instantiation: zstd_preSplit.c:ZSTD_limitCopy Unexecuted instantiation: zstdmt_compress.c:ZSTD_limitCopy Unexecuted instantiation: huf_decompress.c:ZSTD_limitCopy Unexecuted instantiation: zstd_ddict.c:ZSTD_limitCopy zstd_decompress.c:ZSTD_limitCopy Line | Count | Source | 251 | 27.4M | { | 252 | 27.4M | size_t const length = MIN(dstCapacity, srcSize); | 253 | 27.4M | if (length > 0) { | 254 | 27.4M | ZSTD_memcpy(dst, src, length); | 255 | 27.4M | } | 256 | 27.4M | return length; | 257 | 27.4M | } |
Unexecuted instantiation: zstd_decompress_block.c:ZSTD_limitCopy Unexecuted instantiation: cover.c:ZSTD_limitCopy Unexecuted instantiation: fastcover.c:ZSTD_limitCopy Unexecuted instantiation: zdict.c:ZSTD_limitCopy |
258 | | |
259 | | /* define "workspace is too large" as this number of times larger than needed */ |
260 | 7.91M | #define ZSTD_WORKSPACETOOLARGE_FACTOR 3 |
261 | | |
262 | | /* when workspace is continuously too large |
263 | | * during at least this number of times, |
264 | | * context's memory usage is considered wasteful, |
265 | | * because it's sized to handle a worst case scenario which rarely happens. |
266 | | * In which case, resize it down to free some memory */ |
267 | 88.9k | #define ZSTD_WORKSPACETOOLARGE_MAXDURATION 128 |
268 | | |
269 | | /* Controls whether the input/output buffer is buffered or stable. */ |
270 | | typedef enum { |
271 | | ZSTD_bm_buffered = 0, /* Buffer the input/output */ |
272 | | ZSTD_bm_stable = 1 /* ZSTD_inBuffer/ZSTD_outBuffer is stable */ |
273 | | } ZSTD_bufferMode_e; |
274 | | |
275 | | |
276 | | /*-******************************************* |
277 | | * Private declarations |
278 | | *********************************************/ |
279 | | |
280 | | /** |
281 | | * Contains the compressed frame size and an upper-bound for the decompressed frame size. |
282 | | * Note: before using `compressedSize`, check for errors using ZSTD_isError(). |
283 | | * similarly, before using `decompressedBound`, check for errors using: |
284 | | * `decompressedBound != ZSTD_CONTENTSIZE_ERROR` |
285 | | */ |
286 | | typedef struct { |
287 | | size_t nbBlocks; |
288 | | size_t compressedSize; |
289 | | unsigned long long decompressedBound; |
290 | | } ZSTD_frameSizeInfo; /* decompress & legacy */ |
291 | | |
292 | | /* ZSTD_invalidateRepCodes() : |
293 | | * ensures next compression will not use repcodes from previous block. |
294 | | * Note : only works with regular variant; |
295 | | * do not use with extDict variant ! */ |
296 | | void ZSTD_invalidateRepCodes(ZSTD_CCtx* cctx); /* zstdmt, adaptive_compression (shouldn't get this definition from here) */ |
297 | | |
298 | | |
299 | | typedef struct { |
300 | | blockType_e blockType; |
301 | | U32 lastBlock; |
302 | | U32 origSize; |
303 | | } blockProperties_t; /* declared here for decompress and fullbench */ |
304 | | |
305 | | /*! ZSTD_getcBlockSize() : |
306 | | * Provides the size of compressed block from block header `src` */ |
307 | | /* Used by: decompress, fullbench */ |
308 | | size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, |
309 | | blockProperties_t* bpPtr); |
310 | | |
311 | | /*! ZSTD_decodeSeqHeaders() : |
312 | | * decode sequence header from src */ |
313 | | /* Used by: zstd_decompress_block, fullbench */ |
314 | | size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr, |
315 | | const void* src, size_t srcSize); |
316 | | |
317 | | /** |
318 | | * @returns true iff the CPU supports dynamic BMI2 dispatch. |
319 | | */ |
320 | | MEM_STATIC int ZSTD_cpuSupportsBmi2(void) |
321 | 541k | { |
322 | 541k | ZSTD_cpuid_t cpuid = ZSTD_cpuid(); |
323 | 541k | return ZSTD_cpuid_bmi1(cpuid) && ZSTD_cpuid_bmi2(cpuid); |
324 | 541k | } Unexecuted instantiation: sequence_producer.c:ZSTD_cpuSupportsBmi2 Unexecuted instantiation: zstd_common.c:ZSTD_cpuSupportsBmi2 zstd_compress.c:ZSTD_cpuSupportsBmi2 Line | Count | Source | 321 | 280k | { | 322 | 280k | ZSTD_cpuid_t cpuid = ZSTD_cpuid(); | 323 | 280k | return ZSTD_cpuid_bmi1(cpuid) && ZSTD_cpuid_bmi2(cpuid); | 324 | 280k | } |
Unexecuted instantiation: zstd_compress_literals.c:ZSTD_cpuSupportsBmi2 Unexecuted instantiation: zstd_compress_sequences.c:ZSTD_cpuSupportsBmi2 Unexecuted instantiation: zstd_compress_superblock.c:ZSTD_cpuSupportsBmi2 Unexecuted instantiation: zstd_double_fast.c:ZSTD_cpuSupportsBmi2 Unexecuted instantiation: zstd_fast.c:ZSTD_cpuSupportsBmi2 Unexecuted instantiation: zstd_lazy.c:ZSTD_cpuSupportsBmi2 Unexecuted instantiation: zstd_ldm.c:ZSTD_cpuSupportsBmi2 Unexecuted instantiation: zstd_opt.c:ZSTD_cpuSupportsBmi2 Unexecuted instantiation: zstd_preSplit.c:ZSTD_cpuSupportsBmi2 Unexecuted instantiation: zstdmt_compress.c:ZSTD_cpuSupportsBmi2 Unexecuted instantiation: huf_decompress.c:ZSTD_cpuSupportsBmi2 Unexecuted instantiation: zstd_ddict.c:ZSTD_cpuSupportsBmi2 zstd_decompress.c:ZSTD_cpuSupportsBmi2 Line | Count | Source | 321 | 261k | { | 322 | 261k | ZSTD_cpuid_t cpuid = ZSTD_cpuid(); | 323 | 261k | return ZSTD_cpuid_bmi1(cpuid) && ZSTD_cpuid_bmi2(cpuid); | 324 | 261k | } |
Unexecuted instantiation: zstd_decompress_block.c:ZSTD_cpuSupportsBmi2 Unexecuted instantiation: cover.c:ZSTD_cpuSupportsBmi2 Unexecuted instantiation: fastcover.c:ZSTD_cpuSupportsBmi2 Unexecuted instantiation: zdict.c:ZSTD_cpuSupportsBmi2 |
325 | | |
326 | | #endif /* ZSTD_CCOMMON_H_MODULE */ |