/src/c-blosc2/blosc/stune.c
Line | Count | Source |
1 | | /********************************************************************* |
2 | | Blosc - Blocked Shuffling and Compression Library |
3 | | |
4 | | Copyright (c) 2021 Blosc Development Team <blosc@blosc.org> |
5 | | https://blosc.org |
6 | | License: BSD 3-Clause (see LICENSE.txt) |
7 | | |
8 | | See LICENSE.txt for details about copyright and rights to use. |
9 | | **********************************************************************/ |
10 | | |
11 | | #include "stune.h" |
12 | | #include "blosc2/codecs-registry.h" |
13 | | |
14 | | #include <stdbool.h> |
15 | | #include <stdio.h> |
16 | | |
17 | | |
18 | | /* Whether a codec is meant for High Compression Ratios |
19 | | Includes LZ4 + BITSHUFFLE here, but not BloscLZ + BITSHUFFLE because, |
20 | | for some reason, the latter does not work too well */ |
21 | 0 | static bool is_HCR(blosc2_context * context) { |
22 | 0 | switch (context->compcode) { |
23 | 0 | case BLOSC_BLOSCLZ : |
24 | 0 | return false; |
25 | 0 | case BLOSC_LZ4 : |
26 | | // return (context->filter_flags & BLOSC_DOBITSHUFFLE) ? true : false; |
27 | | // Do not treat LZ4 differently than BloscLZ here |
28 | 0 | return false; |
29 | 0 | case BLOSC_LZ4HC : |
30 | 0 | case BLOSC_ZLIB : |
31 | 0 | case BLOSC_ZSTD : |
32 | 0 | return true; |
33 | 0 | default : |
34 | 0 | return false; |
35 | 0 | } |
36 | 0 | } |
37 | | |
38 | 0 | int blosc_stune_init(void * config, blosc2_context* cctx, blosc2_context* dctx) { |
39 | 0 | BLOSC_UNUSED_PARAM(config); |
40 | 0 | BLOSC_UNUSED_PARAM(cctx); |
41 | 0 | BLOSC_UNUSED_PARAM(dctx); |
42 | |
|
43 | 0 | return BLOSC2_ERROR_SUCCESS; |
44 | 0 | } |
45 | | |
46 | | // Set the automatic blocksize 0 to its real value |
47 | 0 | int blosc_stune_next_blocksize(blosc2_context *context) { |
48 | 0 | int32_t clevel = context->clevel; |
49 | 0 | int32_t typesize = context->typesize; |
50 | 0 | int32_t nbytes = context->sourcesize; |
51 | 0 | int32_t user_blocksize = context->blocksize; |
52 | 0 | int32_t blocksize = nbytes; |
53 | | |
54 | | // Protection against very small buffers |
55 | 0 | if (nbytes < typesize) { |
56 | 0 | context->blocksize = 1; |
57 | 0 | return BLOSC2_ERROR_SUCCESS; |
58 | 0 | } |
59 | | |
60 | 0 | int splitmode = split_block(context, typesize, blocksize); |
61 | 0 | if (user_blocksize) { |
62 | 0 | blocksize = user_blocksize; |
63 | 0 | goto last; |
64 | 0 | } |
65 | | |
66 | 0 | if (nbytes >= L1) { |
67 | 0 | blocksize = L1; |
68 | | |
69 | | /* For HCR codecs, increase the block sizes by a factor of 2 because they |
70 | | are meant for compressing large blocks (i.e. they show a big overhead |
71 | | when compressing small ones). */ |
72 | 0 | if (is_HCR(context)) { |
73 | 0 | blocksize *= 2; |
74 | 0 | } |
75 | | |
76 | | // Choose a different blocksize depending on the compression level |
77 | 0 | switch (clevel) { |
78 | 0 | case 0: |
79 | | // Case of plain copy |
80 | 0 | blocksize /= 4; |
81 | 0 | break; |
82 | 0 | case 1: |
83 | 0 | blocksize /= 2; |
84 | 0 | break; |
85 | 0 | case 2: |
86 | 0 | blocksize *= 1; |
87 | 0 | break; |
88 | 0 | case 3: |
89 | 0 | blocksize *= 2; |
90 | 0 | break; |
91 | 0 | case 4: |
92 | 0 | case 5: |
93 | 0 | blocksize *= 4; |
94 | 0 | break; |
95 | 0 | case 6: |
96 | 0 | case 7: |
97 | 0 | case 8: |
98 | 0 | blocksize *= 8; |
99 | 0 | break; |
100 | 0 | case 9: |
101 | | // Do not exceed 256 KB for non HCR codecs |
102 | 0 | blocksize *= 8; |
103 | 0 | if (is_HCR(context)) { |
104 | 0 | blocksize *= 2; |
105 | 0 | } |
106 | 0 | break; |
107 | 0 | default: |
108 | 0 | break; |
109 | 0 | } |
110 | 0 | } |
111 | | |
112 | | /* Now the blocksize for splittable codecs */ |
113 | 0 | if (clevel > 0 && splitmode) { |
114 | | // For performance reasons, do not exceed 256 KB (it must fit in L2 cache) |
115 | 0 | switch (clevel) { |
116 | 0 | case 1: |
117 | 0 | case 2: |
118 | 0 | case 3: |
119 | 0 | blocksize = 32 * 1024; |
120 | 0 | break; |
121 | 0 | case 4: |
122 | 0 | case 5: |
123 | 0 | case 6: |
124 | 0 | blocksize = 64 * 1024; |
125 | 0 | break; |
126 | 0 | case 7: |
127 | 0 | blocksize = 128 * 1024; |
128 | 0 | break; |
129 | 0 | case 8: |
130 | 0 | blocksize = 256 * 1024; |
131 | 0 | break; |
132 | 0 | case 9: |
133 | 0 | default: |
134 | 0 | blocksize = 512 * 1024; |
135 | 0 | break; |
136 | 0 | } |
137 | | // Multiply by typesize to get proper split sizes |
138 | 0 | blocksize *= typesize; |
139 | | // But do not exceed 4 MB per thread (having this capacity in L3 is normal in modern CPUs) |
140 | 0 | if (blocksize > 4 * 1024 * 1024) { |
141 | 0 | blocksize = 4 * 1024 * 1024; |
142 | 0 | } |
143 | 0 | if (blocksize < 32 * 1024) { |
144 | | /* Do not use a too small blocksize (< 32 KB) when typesize is small */ |
145 | 0 | blocksize = 32 * 1024; |
146 | 0 | } |
147 | 0 | } |
148 | | |
149 | 0 | last: |
150 | | /* Check that blocksize is not too large */ |
151 | 0 | if (blocksize > nbytes) { |
152 | 0 | blocksize = nbytes; |
153 | 0 | } |
154 | | |
155 | | // blocksize *must absolutely* be a multiple of the typesize |
156 | 0 | if (blocksize > typesize) { |
157 | 0 | blocksize = blocksize / typesize * typesize; |
158 | 0 | } |
159 | |
|
160 | 0 | context->blocksize = blocksize; |
161 | 0 | BLOSC_INFO("compcode: %d, clevel: %d, blocksize: %d, splitmode: %d, typesize: %d", |
162 | 0 | context->compcode, context->clevel, blocksize, splitmode, typesize); |
163 | |
|
164 | 0 | return BLOSC2_ERROR_SUCCESS; |
165 | 0 | } |
166 | | |
167 | 0 | int blosc_stune_next_cparams(blosc2_context * context) { |
168 | 0 | BLOSC_UNUSED_PARAM(context); |
169 | |
|
170 | 0 | return BLOSC2_ERROR_SUCCESS; |
171 | 0 | } |
172 | | |
173 | 0 | int blosc_stune_update(blosc2_context * context, double ctime) { |
174 | 0 | BLOSC_UNUSED_PARAM(context); |
175 | 0 | BLOSC_UNUSED_PARAM(ctime); |
176 | |
|
177 | 0 | return BLOSC2_ERROR_SUCCESS; |
178 | 0 | } |
179 | | |
180 | 0 | int blosc_stune_free(blosc2_context * context) { |
181 | 0 | BLOSC_UNUSED_PARAM(context); |
182 | |
|
183 | 0 | return BLOSC2_ERROR_SUCCESS; |
184 | 0 | } |
185 | | |
186 | 0 | int split_block(blosc2_context *context, int32_t typesize, int32_t blocksize) { |
187 | 0 | switch (context->splitmode) { |
188 | 0 | case BLOSC_ALWAYS_SPLIT: |
189 | 0 | return 1; |
190 | 0 | case BLOSC_NEVER_SPLIT: |
191 | 0 | return 0; |
192 | 0 | case BLOSC_FORWARD_COMPAT_SPLIT: |
193 | 0 | case BLOSC_AUTO_SPLIT: |
194 | | // These cases will be handled later |
195 | 0 | break; |
196 | 0 | default: |
197 | 0 | BLOSC_TRACE_WARNING("Unrecognized split mode. Default to BLOSC_FORWARD_COMPAT_SPLIT"); |
198 | 0 | } |
199 | | |
200 | 0 | int compcode = context->compcode; |
201 | 0 | if (compcode == BLOSC_CODEC_OPENZL) { |
202 | | // Never split blocks for OpenZL, as it has its own block mechanisms |
203 | 0 | return 0; |
204 | 0 | } |
205 | 0 | return ( |
206 | | // Fast codecs like blosclz, lz4 seems to prefer to split |
207 | 0 | ((compcode == BLOSC_BLOSCLZ) || (compcode == BLOSC_LZ4) |
208 | | // and low levels of zstd too |
209 | 0 | || ((compcode == BLOSC_ZSTD) && (context->clevel <= 5)) |
210 | 0 | ) && |
211 | | // ...but split seems to harm cratio too much when not using shuffle |
212 | 0 | (context->filter_flags & BLOSC_DOSHUFFLE) && |
213 | 0 | (typesize <= MAX_STREAMS) && |
214 | 0 | (blocksize / typesize) >= BLOSC_MIN_BUFFERSIZE); |
215 | 0 | } |