Line | Count | Source (jump to first uncovered line) |
1 | | /* primesieve (BIT_ARRAY, N) -- Fills the BIT_ARRAY with a mask for primes up to N. |
2 | | |
3 | | Contributed to the GNU project by Marco Bodrato. |
4 | | |
5 | | THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. |
6 | | IT IS ONLY SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. |
7 | | IN FACT, IT IS ALMOST GUARANTEED THAT IT WILL CHANGE OR |
8 | | DISAPPEAR IN A FUTURE GNU MP RELEASE. |
9 | | |
10 | | Copyright 2010-2012, 2015, 2016, 2021, 2022 Free Software Foundation, Inc. |
11 | | |
12 | | This file is part of the GNU MP Library. |
13 | | |
14 | | The GNU MP Library is free software; you can redistribute it and/or modify |
15 | | it under the terms of either: |
16 | | |
17 | | * the GNU Lesser General Public License as published by the Free |
18 | | Software Foundation; either version 3 of the License, or (at your |
19 | | option) any later version. |
20 | | |
21 | | or |
22 | | |
23 | | * the GNU General Public License as published by the Free Software |
24 | | Foundation; either version 2 of the License, or (at your option) any |
25 | | later version. |
26 | | |
27 | | or both in parallel, as here. |
28 | | |
29 | | The GNU MP Library is distributed in the hope that it will be useful, but |
30 | | WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY |
31 | | or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
32 | | for more details. |
33 | | |
34 | | You should have received copies of the GNU General Public License and the |
35 | | GNU Lesser General Public License along with the GNU MP Library. If not, |
36 | | see https://www.gnu.org/licenses/. */ |
37 | | |
38 | | #include "gmp-impl.h" |
39 | | |
40 | | #if 0 |
41 | | static mp_limb_t |
42 | | bit_to_n (mp_limb_t bit) { return (bit*3+4)|1; } |
43 | | #endif |
44 | | |
45 | | /* id_to_n (x) = bit_to_n (x-1) = (id*3+1)|1*/ |
46 | | static mp_limb_t |
47 | 38.9k | id_to_n (mp_limb_t id) { return id*3+1+(id&1); } |
48 | | |
49 | | /* n_fto_bit (n) = ((n-1)&(-CNST_LIMB(2)))/3U-1 */ |
50 | | static mp_limb_t |
51 | 1.26k | n_fto_bit (mp_limb_t n) { return ((n-5)|1)/3U; } |
52 | | |
53 | | /* n_cto_bit (n) = ((n-2)&(-CNST_LIMB(2)))/3U */ |
54 | | static mp_limb_t |
55 | 1.24k | n_cto_bit (mp_limb_t n) { return (n|1)/3U-1; } |
56 | | |
57 | | #if 0 |
58 | | static mp_size_t |
59 | | primesieve_size (mp_limb_t n) { return n_fto_bit(n) / GMP_LIMB_BITS + 1; } |
60 | | #endif |
61 | | |
62 | | #define SET_OFF1(m1, m2, M1, M2, off, BITS) \ |
63 | 1.24k | if (off) { \ |
64 | 1.24k | if (off < GMP_LIMB_BITS) { \ |
65 | 1.24k | m1 = (M1 >> off) | (M2 << (GMP_LIMB_BITS - off)); \ |
66 | 1.24k | if (off <= BITS - GMP_LIMB_BITS) { \ |
67 | 1.24k | m2 = M1 << (BITS - GMP_LIMB_BITS - off) \ |
68 | 1.24k | | M2 >> off; \ |
69 | 1.24k | } else { \ |
70 | 0 | m1 |= M1 << (BITS - off); \ |
71 | 0 | m2 = M1 >> (off + GMP_LIMB_BITS - BITS); \ |
72 | 0 | } \ |
73 | 1.24k | } else { \ |
74 | 0 | m1 = M1 << (BITS - off) \ |
75 | 0 | | M2 >> (off - GMP_LIMB_BITS); \ |
76 | 0 | m2 = M2 << (BITS - off) \ |
77 | 0 | | M1 >> (off + GMP_LIMB_BITS - BITS); \ |
78 | 0 | } \ |
79 | 1.24k | } else { \ |
80 | 0 | m1 = M1; m2 = M2; \ |
81 | 0 | } |
82 | | |
83 | | #define SET_OFF2(m1, m2, m3, M1, M2, M3, off, BITS) \ |
84 | 1.24k | if (off) { \ |
85 | 1.24k | if (off <= GMP_LIMB_BITS) { \ |
86 | 0 | m1 = M2 << (GMP_LIMB_BITS - off); \ |
87 | 0 | m2 = M3 << (GMP_LIMB_BITS - off); \ |
88 | 0 | if (off != GMP_LIMB_BITS) { \ |
89 | 0 | m1 |= (M1 >> off); \ |
90 | 0 | m2 |= (M2 >> off); \ |
91 | 0 | } \ |
92 | 0 | if (off <= BITS - 2 * GMP_LIMB_BITS) { \ |
93 | 0 | m3 = M1 << (BITS - 2 * GMP_LIMB_BITS - off) \ |
94 | 0 | | M3 >> off; \ |
95 | 0 | } else { \ |
96 | 0 | m2 |= M1 << (BITS - GMP_LIMB_BITS - off); \ |
97 | 0 | m3 = M1 >> (off + 2 * GMP_LIMB_BITS - BITS); \ |
98 | 0 | } \ |
99 | 1.24k | } else if (off < 2 *GMP_LIMB_BITS) { \ |
100 | 0 | m1 = M2 >> (off - GMP_LIMB_BITS) \ |
101 | 0 | | M3 << (2 * GMP_LIMB_BITS - off); \ |
102 | 0 | if (off <= BITS - GMP_LIMB_BITS) { \ |
103 | 0 | m2 = M3 >> (off - GMP_LIMB_BITS) \ |
104 | 0 | | M1 << (BITS - GMP_LIMB_BITS - off); \ |
105 | 0 | m3 = M2 << (BITS - GMP_LIMB_BITS - off); \ |
106 | 0 | if (off != BITS - GMP_LIMB_BITS) { \ |
107 | 0 | m3 |= M1 >> (off + 2 * GMP_LIMB_BITS - BITS); \ |
108 | 0 | } \ |
109 | 0 | } else { \ |
110 | 0 | m1 |= M1 << (BITS - off); \ |
111 | 0 | m2 = M2 << (BITS - off) \ |
112 | 0 | | M1 >> (GMP_LIMB_BITS - BITS + off); \ |
113 | 0 | m3 = M2 >> (GMP_LIMB_BITS - BITS + off); \ |
114 | 0 | } \ |
115 | 1.24k | } else { \ |
116 | 1.24k | m1 = M1 << (BITS - off) \ |
117 | 1.24k | | M3 >> (off - 2 * GMP_LIMB_BITS); \ |
118 | 1.24k | m2 = M2 << (BITS - off) \ |
119 | 1.24k | | M1 >> (off + GMP_LIMB_BITS - BITS); \ |
120 | 1.24k | m3 = M3 << (BITS - off) \ |
121 | 1.24k | | M2 >> (off + GMP_LIMB_BITS - BITS); \ |
122 | 1.24k | } \ |
123 | 1.24k | } else { \ |
124 | 0 | m1 = M1; m2 = M2; m3 = M3; \ |
125 | 0 | } |
126 | | |
127 | | #define ROTATE1(m1, m2, BITS) \ |
128 | 131k | do { \ |
129 | 131k | mp_limb_t __tmp; \ |
130 | 131k | __tmp = m1 >> (2 * GMP_LIMB_BITS - BITS); \ |
131 | 131k | m1 = (m1 << (BITS - GMP_LIMB_BITS)) | m2; \ |
132 | 131k | m2 = __tmp; \ |
133 | 131k | } while (0) |
134 | | |
135 | | #define ROTATE2(m1, m2, m3, BITS) \ |
136 | 65.9k | do { \ |
137 | 65.9k | mp_limb_t __tmp; \ |
138 | 65.9k | __tmp = m2 >> (3 * GMP_LIMB_BITS - BITS); \ |
139 | 65.9k | m2 = m2 << (BITS - GMP_LIMB_BITS * 2) \ |
140 | 65.9k | | m1 >> (3 * GMP_LIMB_BITS - BITS); \ |
141 | 65.9k | m1 = m1 << (BITS - GMP_LIMB_BITS * 2) | m3; \ |
142 | 65.9k | m3 = __tmp; \ |
143 | 65.9k | } while (0) |
144 | | |
145 | | static mp_limb_t |
146 | | fill_bitpattern (mp_ptr bit_array, mp_size_t limbs, mp_limb_t offset) |
147 | 1.24k | { |
148 | 1.24k | #ifdef SIEVE_2MSK2 |
149 | 1.24k | mp_limb_t m11, m12, m21, m22, m23; |
150 | | |
151 | 1.24k | { /* correctly handle offset == 0... */ |
152 | 1.24k | mp_limb_t off1 = offset % (11 * 5 * 2); |
153 | 1.24k | SET_OFF1 (m11, m12, SIEVE_MASK1, SIEVE_MASKT, off1, 11 * 5 * 2); |
154 | 1.24k | offset %= 13 * 7 * 2; |
155 | 1.24k | SET_OFF2 (m21, m22, m23, SIEVE_2MSK1, SIEVE_2MSK2, SIEVE_2MSKT, offset, 13 * 7 * 2); |
156 | 1.24k | } |
157 | | /* THINK: Consider handling odd values of 'limbs' outside the loop, |
158 | | to have a single exit condition. */ |
159 | 66.5k | do { |
160 | 66.5k | bit_array[0] = m11 | m21; |
161 | 66.5k | if (--limbs == 0) |
162 | 611 | break; |
163 | 65.9k | ROTATE1 (m11, m12, 11 * 5 * 2); |
164 | 65.9k | bit_array[1] = m11 | m22; |
165 | 65.9k | bit_array += 2; |
166 | 65.9k | ROTATE1 (m11, m12, 11 * 5 * 2); |
167 | 65.9k | ROTATE2 (m21, m22, m23, 13 * 7 * 2); |
168 | 65.9k | } while (--limbs != 0); |
169 | 0 | return n_cto_bit (13 + 1); |
170 | | #else |
171 | | #ifdef SIEVE_MASK2 |
172 | | mp_limb_t mask, mask2, tail; |
173 | | |
174 | | { /* correctly handle offset == 0... */ |
175 | | offset %= 7 * 5 * 2; |
176 | | SET_OFF2 (mask, mask2, tail, SIEVE_MASK1, SIEVE_MASK2, SIEVE_MASKT, offset, 7 * 5 * 2); |
177 | | } |
178 | | /* THINK: Consider handling odd values of 'limbs' outside the loop, |
179 | | to have a single exit condition. */ |
180 | | do { |
181 | | bit_array[0] = mask; |
182 | | if (--limbs == 0) |
183 | | break; |
184 | | bit_array[1] = mask2; |
185 | | bit_array += 2; |
186 | | ROTATE2 (mask, mask2, tail, 7 * 5 * 2); |
187 | | } while (--limbs != 0); |
188 | | return n_cto_bit (7 + 1); |
189 | | #else |
190 | | MPN_FILL (bit_array, limbs, CNST_LIMB(0)); |
191 | | return 0; |
192 | | #endif |
193 | | #endif |
194 | 1.24k | } |
195 | | |
196 | | static void |
197 | | block_resieve (mp_ptr bit_array, mp_size_t limbs, mp_limb_t offset, |
198 | | mp_srcptr sieve) |
199 | 1.24k | { |
200 | 1.24k | mp_size_t bits, off = offset; |
201 | 1.24k | mp_limb_t mask, i; |
202 | | |
203 | 1.24k | ASSERT (limbs > 0); |
204 | | |
205 | 1.24k | bits = limbs * GMP_LIMB_BITS - 1; |
206 | | |
207 | 1.24k | i = fill_bitpattern (bit_array, limbs, offset); |
208 | | |
209 | 1.24k | ASSERT (i < GMP_LIMB_BITS); |
210 | | |
211 | 1.24k | mask = CNST_LIMB(1) << i; |
212 | 60.8k | do { |
213 | 60.8k | ++i; |
214 | 60.8k | if ((*sieve & mask) == 0) |
215 | 38.9k | { |
216 | 38.9k | mp_size_t step, lindex; |
217 | 38.9k | mp_limb_t lmask; |
218 | 38.9k | unsigned maskrot; |
219 | | |
220 | 38.9k | step = id_to_n(i); |
221 | | |
222 | | /* lindex = n_to_bit(id_to_n(i)*id_to_n(i)); */ |
223 | 38.9k | lindex = i*(step+1)-1+(-(i&1)&(i+1)); |
224 | | /* lindex = i*(step+1+(i&1))-1+(i&1); */ |
225 | 38.9k | if (lindex > bits + off) |
226 | 1.24k | break; |
227 | | |
228 | 37.7k | step <<= 1; |
229 | 37.7k | maskrot = step % GMP_LIMB_BITS; |
230 | | |
231 | 37.7k | if (lindex < off) |
232 | 18.7k | lindex += step * ((off - lindex - 1) / step + 1); |
233 | | |
234 | 37.7k | lindex -= off; |
235 | | |
236 | 37.7k | lmask = CNST_LIMB(1) << (lindex % GMP_LIMB_BITS); |
237 | 2.22M | for ( ; lindex <= bits; lindex += step) { |
238 | 2.18M | bit_array[lindex / GMP_LIMB_BITS] |= lmask; |
239 | 2.18M | lmask = lmask << maskrot | lmask >> (GMP_LIMB_BITS - maskrot); |
240 | 2.18M | }; |
241 | | |
242 | | /* lindex = n_to_bit(id_to_n(i)*bit_to_n(i)); */ |
243 | 37.7k | lindex = i*(i*3+6)+(i&1); |
244 | | |
245 | 37.7k | if (lindex < off) |
246 | 17.4k | lindex += step * ((off - lindex - 1) / step + 1); |
247 | | |
248 | 37.7k | lindex -= off; |
249 | | |
250 | 37.7k | lmask = CNST_LIMB(1) << (lindex % GMP_LIMB_BITS); |
251 | 2.22M | for ( ; lindex <= bits; lindex += step) { |
252 | 2.18M | bit_array[lindex / GMP_LIMB_BITS] |= lmask; |
253 | 2.18M | lmask = lmask << maskrot | lmask >> (GMP_LIMB_BITS - maskrot); |
254 | 2.18M | }; |
255 | 37.7k | } |
256 | 59.5k | mask = mask << 1 | mask >> (GMP_LIMB_BITS-1); |
257 | 59.5k | sieve += mask & 1; |
258 | 59.5k | } while (1); |
259 | 1.24k | } |
260 | | |
261 | 1.24k | #define BLOCK_SIZE 2048 |
262 | | |
263 | | /* Fills bit_array with the characteristic function of composite |
264 | | numbers up to the parameter n. I.e. a bit set to "1" represents a |
265 | | composite, a "0" represents a prime. |
266 | | |
267 | | The primesieve_size(n) limbs pointed to by bit_array are |
268 | | overwritten. The returned value counts prime integers in the |
269 | | interval [4, n]. Note that n > 4. |
270 | | |
271 | | Even numbers and multiples of 3 are excluded "a priori", only |
272 | | numbers equivalent to +/- 1 mod 6 have their bit in the array. |
273 | | |
274 | | Once sieved, if the bit b is ZERO it represent a prime, the |
275 | | represented prime is bit_to_n(b), if the LSbit is bit 0, or |
276 | | id_to_n(b), if you call "1" the first bit. |
277 | | */ |
278 | | |
279 | | mp_limb_t |
280 | | gmp_primesieve (mp_ptr bit_array, mp_limb_t n) |
281 | 1.26k | { |
282 | 1.26k | mp_size_t size; |
283 | 1.26k | mp_limb_t bits; |
284 | 1.26k | static mp_limb_t presieved[] = {PRIMESIEVE_INIT_TABLE}; |
285 | | |
286 | 1.26k | ASSERT (n > 4); |
287 | | |
288 | 1.26k | bits = n_fto_bit(n); |
289 | 1.26k | size = bits / GMP_LIMB_BITS + 1; |
290 | | |
291 | 1.26k | for (mp_size_t j = 0, lim = MIN (size, PRIMESIEVE_NUMBEROF_TABLE); |
292 | 36.6k | j < lim; ++j) |
293 | 35.3k | bit_array [j] = presieved [j]; /* memcopy? */ |
294 | | |
295 | 1.26k | if (size > PRIMESIEVE_NUMBEROF_TABLE) { |
296 | 1.24k | mp_size_t off; |
297 | 1.24k | off = size > 2 * BLOCK_SIZE ? BLOCK_SIZE + (size % BLOCK_SIZE) : size; |
298 | 1.24k | block_resieve (bit_array + PRIMESIEVE_NUMBEROF_TABLE, |
299 | 1.24k | off - PRIMESIEVE_NUMBEROF_TABLE, |
300 | 1.24k | GMP_LIMB_BITS * PRIMESIEVE_NUMBEROF_TABLE, bit_array); |
301 | 1.24k | for (; off < size; off += BLOCK_SIZE) |
302 | 0 | block_resieve (bit_array + off, BLOCK_SIZE, off * GMP_LIMB_BITS, bit_array); |
303 | 1.24k | } |
304 | | |
305 | 1.26k | if ((bits + 1) % GMP_LIMB_BITS != 0) |
306 | 1.25k | bit_array[size-1] |= MP_LIMB_T_MAX << ((bits + 1) % GMP_LIMB_BITS); |
307 | | |
308 | 1.26k | return size * GMP_LIMB_BITS - mpn_popcount (bit_array, size); |
309 | 1.26k | } |
310 | | |
311 | | #undef BLOCK_SIZE |
312 | | #undef SIEVE_MASK1 |
313 | | #undef SIEVE_MASK2 |
314 | | #undef SIEVE_MASKT |
315 | | #undef SIEVE_2MSK1 |
316 | | #undef SIEVE_2MSK2 |
317 | | #undef SIEVE_2MSKT |
318 | | #undef SET_OFF1 |
319 | | #undef SET_OFF2 |
320 | | #undef ROTATE1 |
321 | | #undef ROTATE2 |