Line data Source code
1 : /* Copyright (C) 2016 The PARI group.
2 :
3 : This file is part of the PARI/GP package.
4 :
5 : PARI/GP is free software; you can redistribute it and/or modify it under the
6 : terms of the GNU General Public License as published by the Free Software
7 : Foundation; either version 2 of the License, or (at your option) any later
8 : version. It is distributed in the hope that it will be useful, but WITHOUT
9 : ANY WARRANTY WHATSOEVER.
10 :
11 : Check the License for details. You should have received a copy of it, along
12 : with the package; see the file 'COPYING'. If not, write to the Free Software
13 : Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */
14 :
15 : #include "pari.h"
16 : #include "paripriv.h"
17 :
18 : /**********************************************************************/
19 : /*** ***/
20 : /*** Public prime table ***/
21 : /*** ***/
22 : /**********************************************************************/
23 :
24 : static ulong _maxprimelim = 0;
25 : static GEN _prodprimes, _prodprimeslim;
26 : typedef unsigned char *byteptr;
27 :
28 : /* Build/Rebuild table of prime differences. The actual work is done by the
29 : * following two subroutines; the user entry point is the function
30 : * initprimes() below; initprimes1() is the basecase, called when
31 : * maxnum (size) is moderate. Must be called after pari_init_stack() )*/
32 : static void
33 1862 : initprimes1(ulong size, long *lenp, pari_prime *p1)
34 : {
35 1862 : pari_sp av = avma;
36 : long k;
37 1862 : byteptr q, r, s, p = (byteptr)stack_calloc(size+2), fin = p + size;
38 : pari_prime *re;
39 :
40 22344 : for (r=q=p,k=1; r<=fin; )
41 : {
42 33516 : do { r+=k; k+=2; r+=k; } while (*++q);
43 899346 : for (s=r; s<=fin; s+=k) *s = 1;
44 : }
45 1862 : re = p1; *re++ = 2; *re++ = 3; /* 2 and 3 */
46 1862 : for (s=q=p+1; ; s=q)
47 : {
48 951482 : do q++; while (*q);
49 318402 : if (q > fin) break;
50 316540 : *re++ = (pari_prime) 2*(q-p)+1;
51 : }
52 1862 : *re++ = 0;
53 1862 : *lenp = re - p1;
54 1862 : set_avma(av);
55 1862 : }
56 :
57 : /* Timing in ms (Athlon/850; reports 512K of secondary cache; looks
58 : like there is 64K of quickier cache too).
59 :
60 : arena| 30m 100m 300m 1000m 2000m <-- primelimit
61 : =================================================
62 : 16K 1.1053 1.1407 1.2589 1.4368 1.6086
63 : 24K 1.0000 1.0625 1.1320 1.2443 1.3095
64 : 32K 1.0000 1.0469 1.0761 1.1336 1.1776
65 : 48K 1.0000 1.0000 1.0254 1.0445 1.0546
66 : 50K 1.0000 1.0000 1.0152 1.0345 1.0464
67 : 52K 1.0000 1.0000 1.0203 1.0273 1.0362
68 : 54K 1.0000 1.0000 1.0812 1.0216 1.0281
69 : 56K 1.0526 1.0000 1.0051 1.0144 1.0205
70 : 58K 1.0000 1.0000 1.0000 1.0086 1.0123
71 : 60K 0.9473 0.9844 1.0051 1.0014 1.0055
72 : 62K 1.0000 0.9844 0.9949 0.9971 0.9993
73 : 64K 1.0000 1.0000 1.0000 1.0000 1.0000
74 : 66K 1.2632 1.2187 1.2183 1.2055 1.1953
75 : 68K 1.4211 1.4844 1.4721 1.4425 1.4188
76 : 70K 1.7368 1.7188 1.7107 1.6767 1.6421
77 : 72K 1.9474 1.9531 1.9594 1.9023 1.8573
78 : 74K 2.2105 2.1875 2.1827 2.1207 2.0650
79 : 76K 2.4211 2.4219 2.4010 2.3305 2.2644
80 : 78K 2.5789 2.6250 2.6091 2.5330 2.4571
81 : 80K 2.8421 2.8125 2.8223 2.7213 2.6380
82 : 84K 3.1053 3.1875 3.1776 3.0819 2.9802
83 : 88K 3.5263 3.5312 3.5228 3.4124 3.2992
84 : 92K 3.7895 3.8438 3.8375 3.7213 3.5971
85 : 96K 4.0000 4.1093 4.1218 3.9986 3.9659
86 : 112K 4.3684 4.5781 4.5787 4.4583 4.6115
87 : 128K 4.7368 4.8750 4.9188 4.8075 4.8997
88 : 192K 5.5263 5.7188 5.8020 5.6911 5.7064
89 : 256K 6.0000 6.2187 6.3045 6.1954 6.1033
90 : 384K 6.7368 6.9531 7.0405 6.9181 6.7912
91 : 512K 7.3158 7.5156 7.6294 7.5000 7.4654
92 : 768K 9.1579 9.4531 9.6395 9.5014 9.1075
93 : 1024K 10.368 10.7497 10.9999 10.878 10.8201
94 : 1536K 12.579 13.3124 13.7660 13.747 13.4739
95 : 2048K 13.737 14.4839 15.0509 15.151 15.1282
96 : 3076K 14.789 15.5780 16.2993 16.513 16.3365
97 :
98 : Now the same number relative to the model
99 :
100 : (1 + 0.36*sqrt(primelimit)/arena) * (arena <= 64 ? 1.05 : (arena-64)**0.38)
101 :
102 : [SLOW2_IN_ROOTS = 0.36, ALPHA = 0.38]
103 :
104 : arena| 30m 100m 300m 1000m 2000m <-- primelimit
105 : =================================================
106 : 16K 1.014 0.9835 0.9942 0.9889 1.004
107 : 24K 0.9526 0.9758 0.9861 0.9942 0.981
108 : 32K 0.971 0.9939 0.9884 0.9849 0.9806
109 : 48K 0.9902 0.9825 0.996 0.9945 0.9885
110 : 50K 0.9917 0.9853 0.9906 0.9926 0.9907
111 : 52K 0.9932 0.9878 0.9999 0.9928 0.9903
112 : 54K 0.9945 0.9902 1.064 0.9939 0.9913
113 : 56K 1.048 0.9924 0.9925 0.993 0.9921
114 : 58K 0.9969 0.9945 0.9909 0.9932 0.9918
115 : 60K 0.9455 0.9809 0.9992 0.9915 0.9923
116 : 62K 0.9991 0.9827 0.9921 0.9924 0.9929
117 : 64K 1 1 1 1 1
118 : 66K 1.02 0.9849 0.9857 0.9772 0.9704
119 : 68K 0.8827 0.9232 0.9176 0.9025 0.8903
120 : 70K 0.9255 0.9177 0.9162 0.9029 0.8881
121 : 72K 0.9309 0.936 0.9429 0.9219 0.9052
122 : 74K 0.9715 0.9644 0.967 0.9477 0.9292
123 : 76K 0.9935 0.9975 0.9946 0.9751 0.9552
124 : 78K 0.9987 1.021 1.021 1.003 0.9819
125 : 80K 1.047 1.041 1.052 1.027 1.006
126 : 84K 1.052 1.086 1.092 1.075 1.053
127 : 88K 1.116 1.125 1.133 1.117 1.096
128 : 92K 1.132 1.156 1.167 1.155 1.134
129 : 96K 1.137 1.177 1.195 1.185 1.196
130 : 112K 1.067 1.13 1.148 1.15 1.217
131 : 128K 1.04 1.083 1.113 1.124 1.178
132 : 192K 0.9368 0.985 1.025 1.051 1.095
133 : 256K 0.8741 0.9224 0.9619 0.995 1.024
134 : 384K 0.8103 0.8533 0.8917 0.9282 0.9568
135 : 512K 0.7753 0.8135 0.8537 0.892 0.935
136 : 768K 0.8184 0.8638 0.9121 0.9586 0.9705
137 : 1024K 0.8241 0.8741 0.927 0.979 1.03
138 : 1536K 0.8505 0.9212 0.9882 1.056 1.096
139 : 2048K 0.8294 0.8954 0.9655 1.041 1.102
140 :
141 : */
142 :
143 : #ifndef SLOW2_IN_ROOTS
144 : /* SLOW2_IN_ROOTS below 3: some slowdown starts to be noticable
145 : * when things fit into the cache on Sparc.
146 : * The choice of 2.6 gives a slowdown of 1-2% on UltraSparcII,
147 : * but makes calculations for "maximum" of 436273009
148 : * fit into 256K cache (still common for some architectures).
149 : *
150 : * One may change it when small caches become uncommon, but the gain
151 : * is not going to be very noticable... */
152 : # ifdef i386 /* gcc defines this? */
153 : # define SLOW2_IN_ROOTS 0.36
154 : # else
155 : # define SLOW2_IN_ROOTS 2.6
156 : # endif
157 : #endif
158 : #ifndef CACHE_ARENA
159 : # ifdef i386 /* gcc defines this? */
160 : /* Due to smaller SLOW2_IN_ROOTS, smaller arena is OK; fit L1 cache */
161 : # define CACHE_ARENA (63 * 1024UL) /* No slowdown even with 64K L1 cache */
162 : # else
163 : # define CACHE_ARENA (200 * 1024UL) /* No slowdown even with 256K L2 cache */
164 : # endif
165 : #endif
166 :
167 : #define CACHE_ALPHA (0.38) /* Cache performance model parameter */
168 : #define CACHE_CUTOFF (0.018) /* Cache performance not smooth here */
169 :
170 : static double slow2_in_roots = SLOW2_IN_ROOTS;
171 :
172 : typedef struct {
173 : ulong arena;
174 : double power;
175 : double cutoff;
176 : ulong bigarena;
177 : } cache_model_t;
178 :
179 : static cache_model_t cache_model = { CACHE_ARENA, CACHE_ALPHA, CACHE_CUTOFF, 0 };
180 :
181 : /* Assume that some calculation requires a chunk of memory to be
182 : accessed often in more or less random fashion (as in sieving).
183 : Assume that the calculation can be done in steps by subdividing the
184 : chunk into smaller subchunks (arenas) and treating them
185 : separately. Assume that the overhead of subdivision is equivalent
186 : to the number of arenas.
187 :
188 : Find an optimal size of the arena taking into account the overhead
189 : of subdivision, and the overhead of arena not fitting into the
190 : cache. Assume that arenas of size slow2_in_roots slows down the
191 : calculation 2x (comparing to very big arenas; when cache hits do
192 : not matter). Since cache performance varies wildly with
193 : architecture, load, and wheather (especially with cache coloring
194 : enabled), use an idealized cache model based on benchmarks above.
195 :
196 : Assume that an independent region of FIXED_TO_CACHE bytes is accessed
197 : very often concurrently with the arena access.
198 : */
199 : static ulong
200 1862 : good_arena_size(ulong slow2_size, ulong total, ulong fixed_to_cache,
201 : cache_model_t *cache_model)
202 : {
203 1862 : ulong asize, cache_arena = cache_model->arena;
204 : double Xmin, Xmax, A, B, C1, C2, D, V;
205 1862 : double alpha = cache_model->power, cut_off = cache_model->cutoff;
206 :
207 : /* Estimated relative slowdown,
208 : with overhead = max((fixed_to_cache+arena)/cache_arena - 1, 0):
209 :
210 : 1 + slow2_size/arena due to initialization overhead;
211 :
212 : max(1, 4.63 * overhead^0.38 ) due to footprint > cache size.
213 :
214 : [The latter is hard to substantiate theoretically, but this
215 : function describes benchmarks pretty close; it does not hurt that
216 : one can minimize it explicitly too ;-). The switch between
217 : different choices of max() happens when overhead=0.018.]
218 :
219 : Thus the problem is minimizing (1 + slow2_size/arena)*overhead**0.29.
220 : This boils down to F=((X+A)/(X+B))X^alpha, X=overhead,
221 : B = (1 - fixed_to_cache/cache_arena), A = B + slow2_size/cache_arena,
222 : alpha = 0.38, and X>=0.018, X>-B.
223 :
224 : We need to find the rightmost root of (X+A)*(X+B) - alpha(A-B)X to the
225 : right of 0.018 (if such exists and is below Xmax). Then we manually
226 : check the remaining region [0, 0.018].
227 :
228 : Since we cannot trust the purely-experimental cache-hit slowdown
229 : function, as a sanity check always prefer fitting into the
230 : cache (or "almost fitting") if F-law predicts that the larger
231 : value of the arena provides less than 10% speedup.
232 : */
233 :
234 : /* The simplest case: we fit into cache */
235 1862 : asize = cache_arena - fixed_to_cache;
236 1862 : if (total <= asize) return total;
237 : /* The simple case: fitting into cache doesn't slow us down more than 10% */
238 1862 : if (asize > 10 * slow2_size) return asize;
239 : /* Slowdown of not fitting into cache is significant. Try to optimize.
240 : Do not be afraid to spend some time on optimization - in trivial
241 : cases we do not reach this point; any gain we get should
242 : compensate the time spent on optimization. */
243 :
244 0 : B = (1 - ((double)fixed_to_cache)/cache_arena);
245 0 : A = B + ((double)slow2_size)/cache_arena;
246 0 : C2 = A*B;
247 0 : C1 = (A + B - 1/alpha*(A - B))/2;
248 0 : D = C1*C1 - C2;
249 0 : if (D > 0)
250 0 : V = cut_off*cut_off + 2*C1*cut_off + C2; /* Value at CUT_OFF */
251 : else
252 0 : V = 0; /* Peacify the warning */
253 0 : Xmin = cut_off;
254 0 : Xmax = ((double)total - fixed_to_cache)/cache_arena; /* Two candidates */
255 :
256 0 : if ( D <= 0 || (V >= 0 && C1 + cut_off >= 0) ) /* slowdown increasing */
257 0 : Xmax = cut_off; /* Only one candidate */
258 0 : else if (V >= 0 && /* slowdown concave down */
259 0 : ((Xmax + C1) <= 0 || (Xmax*Xmax + 2*C1*Xmax + C2) <= 0))
260 : /* DO NOTHING */; /* Keep both candidates */
261 0 : else if (V <= 0 && (Xmax*Xmax + 2*C1*Xmax + C2) <= 0) /*slowdown decreasing*/
262 0 : Xmin = cut_off; /* Only one candidate */
263 : else /* Now we know: 2 roots, the largest is in CUT_OFF..Xmax */
264 0 : Xmax = sqrt(D) - C1;
265 0 : if (Xmax != Xmin) { /* Xmin == CUT_OFF; Check which one is better */
266 0 : double v1 = (cut_off + A)/(cut_off + B);
267 0 : double v2 = 2.33 * (Xmax + A)/(Xmax + B) * pow(Xmax, alpha);
268 :
269 0 : if (1.1 * v2 >= v1) /* Prefer fitting into the cache if slowdown < 10% */
270 0 : V = v1;
271 : else
272 0 : { Xmin = Xmax; V = v2; }
273 0 : } else if (B > 0) /* We need V */
274 0 : V = 2.33 * (Xmin + A)/(Xmin + B) * pow(Xmin, alpha);
275 0 : if (B > 0 && 1.1 * V > A/B) /* Now Xmin is the minumum. Compare with 0 */
276 0 : Xmin = 0;
277 :
278 0 : asize = (ulong)((1 + Xmin)*cache_arena - fixed_to_cache);
279 0 : if (asize > total) asize = total; /* May happen due to approximations */
280 0 : return asize;
281 : }
282 :
283 : /* Use as in
284 : install(set_optimize,lLDG) \\ Through some M too?
285 : set_optimize(2,1) \\ disable dependence on limit
286 : \\ 1: how much cache usable, 2: slowdown of setup, 3: alpha, 4: cutoff,
287 : \\ 5: cache size (typically whole L2 or L3) in bytes to use in forprime()
288 : \\ 2,3,4 are in units of 0.001
289 :
290 : { time_primes_arena(ar,limit) = \\ ar = arena size in K
291 : set_optimize(1,floor(ar*1024));
292 : default(primelimit, 200 000); \\ 100000 results in *larger* malloc()!
293 : gettime;
294 : default(primelimit, floor(limit));
295 : if(ar >= 1, ar=floor(ar));
296 : print("arena "ar"K => "gettime"ms");
297 : }
298 : */
299 : long
300 0 : set_optimize(long what, GEN g)
301 : {
302 0 : long ret = 0;
303 :
304 0 : switch (what) {
305 0 : case 1:
306 0 : ret = (long)cache_model.arena;
307 0 : break;
308 0 : case 2:
309 0 : ret = (long)(slow2_in_roots * 1000);
310 0 : break;
311 0 : case 3:
312 0 : ret = (long)(cache_model.power * 1000);
313 0 : break;
314 0 : case 4:
315 0 : ret = (long)(cache_model.cutoff * 1000);
316 0 : break;
317 0 : case 5:
318 0 : ret = (long)(cache_model.bigarena);
319 0 : break;
320 0 : default:
321 0 : pari_err_BUG("set_optimize");
322 0 : break;
323 : }
324 0 : if (g != NULL) {
325 0 : ulong val = itou(g);
326 :
327 0 : switch (what) {
328 0 : case 1: cache_model.arena = val; break;
329 0 : case 2: slow2_in_roots = (double)val / 1000.; break;
330 0 : case 3: cache_model.power = (double)val / 1000.; break;
331 0 : case 4: cache_model.cutoff = (double)val / 1000.; break;
332 0 : case 5: cache_model.bigarena = val; break;
333 : }
334 : }
335 0 : return ret;
336 : }
337 :
338 : /* s is odd; prime (starting from 3 = known_primes[2]), terminated by a 0 byte.
339 : * Checks n odd numbers starting at 'start', setting bytes to 0 (composite)
340 : * or 1 (prime), starting at data */
341 : static void
342 7134 : sieve_chunk(pari_prime *known_primes, ulong s, byteptr data, ulong n)
343 : {
344 7134 : ulong p, cnt = n-1, start = s;
345 : pari_prime *q;
346 :
347 7134 : memset(data, 0, n);
348 7134 : start >>= 1; /* (start - 1)/2 */
349 7134 : start += n; /* Corresponds to the end */
350 : /* data corresponds to start, q runs over primediffs */
351 1024872 : for (q = known_primes + 1, p = 3; p; p = *++q)
352 : { /* first odd number >= start > p and divisible by p
353 : = last odd number <= start + 2p - 2 and 0 (mod p)
354 : = p + last number <= start + p - 2 and 0 (mod 2p)
355 : = p + start+p-2 - (start+p-2) % 2p
356 : = start + 2(p - 1 - ((start-1)/2 + (p-1)/2) % p). */
357 1017738 : long off = cnt - ((start+(p>>1)) % p);
358 1625414974 : while (off >= 0) { data[off] = 1; off -= p; }
359 : }
360 7134 : }
361 :
362 : static long
363 1862 : ZV_size(GEN x)
364 : {
365 1862 : long i, l = lg(x), s = l;
366 26068 : for (i = 1; i < l; i++) s += lgefint(gel(x,i));
367 1862 : return s;
368 : }
369 : /* avoid gcopy_avma so as to deallocate using free() in pari_close_primes */
370 : static GEN
371 1862 : ZV_copy_alloc(GEN x)
372 : {
373 1862 : long i, l = lg(x), s = ZV_size(x);
374 1862 : GEN z = (GEN)pari_malloc(s * sizeof(long)), av = z + s;
375 26068 : for (i = 1; i < l; i++) gel(z,i) = av = icopy_avma(gel(x,i), (pari_sp)av);
376 1862 : z[0] = x[0] & (TYPBITS|LGBITS); return z;
377 : }
378 :
379 : static void
380 1862 : set_prodprimes(void)
381 : {
382 1862 : pari_sp av = avma;
383 1862 : ulong b = 1UL << 8, M = minuu(maxprime(), GP_DATA->factorlimit);
384 1862 : GEN W, v = primes_interval_zv(3, M);
385 1862 : long u, j, jold, l = lg(v);
386 :
387 1862 : W = cgetg(64+1, t_VEC);
388 1862 : M = v[l-1]; /* = precprime(M) */
389 1862 : _prodprimeslim = cgetalloc(64+1, t_VECSMALL);
390 1862 : if (b > M) b = M;
391 152730550 : for (jold = j = u = 1; j < l; j++)
392 152728688 : if (uel(v,j) >= b) /* if j = l-1, then b = M = v[j] */
393 : {
394 24206 : long lw = (j == l-1? l: j) - jold + 1;
395 24206 : GEN w = v+jold-1;
396 24206 : w[0] = evaltyp(t_VECSMALL) | _evallg(lw);
397 24206 : _prodprimeslim[u] = w[lw - 1];
398 : /* product of primes from
399 : * p_jold = 3 if first entry, else nextprime(_prodprime_lim[u - 1] + 1)
400 : * to
401 : * p_{j-1} = _prodprimeslim[u] = precprime(M or 2^{u+7}) */
402 24206 : gel(W,u++) = zv_prod_Z(w);
403 24206 : jold = j; b *= 2;
404 24206 : if (b > M) b = M; /* truncate last run */
405 : }
406 24206 : for (j = 2; j < u; j++) gel(W,j) = mulii(gel(W,j-1), gel(W,j));
407 1862 : setlg(W, u); _prodprimes = ZV_copy_alloc(W);
408 1862 : setlg(_prodprimeslim, u); set_avma(av);
409 1862 : }
410 :
411 : static void
412 1862 : initprimes0(ulong maxnum, long *lenp, pari_prime *p1)
413 : {
414 1862 : pari_sp av = avma, bot = pari_mainstack->bot;
415 : long alloced, psize;
416 : byteptr q, end, p;
417 : ulong remains, curlow, rootnum, asize, prime_above, last;
418 : pari_prime *end1, *curdiff, *p_prime_above;
419 :
420 1862 : if (!odd(maxnum)) maxnum--; /* make it odd. */
421 : /* base case */
422 1862 : if (maxnum < 1ul<<17) { initprimes1(maxnum>>1, lenp, p1); return; }
423 :
424 : /* Checked to be enough up to 40e6, attained at 155893 */
425 1862 : rootnum = usqrt(maxnum) | 1;
426 1862 : initprimes1(rootnum>>1, &psize, p1);
427 1862 : last = rootnum;
428 1862 : end1 = p1 + psize - 1;
429 1862 : remains = (maxnum - last) >> 1; /* number of odd numbers to check */
430 : /* we access primes array of psize too; but we access it consecutively,
431 : * thus we do not include it in fixed_to_cache */
432 1862 : asize = good_arena_size((ulong)(rootnum * slow2_in_roots), remains+1, 0,
433 : &cache_model) - 1;
434 : /* enough room on the stack ? */
435 1862 : alloced = (((byteptr)avma) <= ((byteptr)bot) + asize);
436 1862 : p = (byteptr)(alloced? pari_malloc(asize+1): stack_malloc(asize+1));
437 1862 : end = p + asize; /* the 0 sentinel goes at end. */
438 1862 : curlow = last + 2; /* First candidate: know primes up to last (odd). */
439 1862 : curdiff = end1;
440 :
441 : /* During each iteration p..end-1 represents a range of odd
442 : numbers. */
443 1862 : p_prime_above = p1 + 2;
444 1862 : prime_above = 3;
445 8996 : while (remains)
446 : { /* cycle over arenas; performance not crucial */
447 : pari_prime was_delta;
448 7134 : if (asize > remains) { asize = remains; end = p + asize; }
449 : /* Fake the upper limit appropriate for the given arena */
450 325536 : while (prime_above*prime_above <= curlow + (asize << 1) && *p_prime_above)
451 318402 : prime_above = *p_prime_above++;
452 7134 : was_delta = *p_prime_above;
453 7134 : *p_prime_above = 0; /* sentinel for sieve_chunk */
454 7134 : sieve_chunk(p1, curlow, p, asize);
455 7134 : *p_prime_above = was_delta; /* restore */
456 :
457 7134 : p[asize] = 0; /* sentinel */
458 7134 : for (q = p; ; q++)
459 : { /* q runs over addresses corresponding to primes */
460 975278046 : while (*q) q++; /* use sentinel at end */
461 152417420 : if (q >= end) break;
462 152410286 : *curdiff++ = (pari_prime) 2*(q-p) + curlow;
463 : }
464 7134 : remains -= asize;
465 7134 : curlow += (asize<<1);
466 : }
467 1862 : *curdiff++ = 0; /* sentinel */
468 1862 : *lenp = curdiff - p1;
469 1862 : if (alloced) pari_free(p); else set_avma(av);
470 : }
471 :
472 : ulong
473 33336096 : maxprime(void) { return pari_PRIMES? pari_PRIMES[pari_PRIMES[0]]: 0; }
474 : ulong
475 72683628 : maxprimelim(void) { return pari_PRIMES? _maxprimelim: 0; }
476 : ulong
477 196 : maxprimeN(void) { return pari_PRIMES? pari_PRIMES[0]: 0; }
478 : GEN
479 937368 : prodprimes(void) { return pari_PRIMES? _prodprimes: NULL; }
480 : GEN
481 937368 : prodprimeslim(void) { return pari_PRIMES? _prodprimeslim: NULL; }
482 : void
483 0 : maxprime_check(ulong c) { if (maxprime() < c) pari_err_MAXPRIME(c); }
484 :
485 : static pari_prime*
486 1862 : initprimes(ulong maxnum)
487 : {
488 : pari_prime *t;
489 : long len;
490 : ulong N;
491 1862 : if (maxnum < 65537)
492 : {
493 0 : maxnum = 65537;
494 0 : N = 6543;
495 : }
496 : else
497 1862 : N = (long) ceil(primepi_upper_bound((double)maxnum));
498 1862 : t = (pari_prime*) pari_malloc(sizeof(*t) * (N+2));
499 1862 : initprimes0(maxnum, &len, t+1); t[0] = (pari_prime)(len-1);
500 1862 : _maxprimelim = maxnum;
501 1862 : return (pari_prime*) pari_realloc(t, sizeof(*t) * (len+1));
502 : }
503 :
504 : void
505 1862 : initprimetable(ulong maxnum)
506 : {
507 1862 : pari_prime *old = pari_PRIMES;
508 : #ifdef LONG_IS_64BIT
509 1604 : maxnum = minuu(maxnum, 4294967295);
510 : #endif
511 1862 : pari_PRIMES = initprimes(maxnum);
512 1862 : if (old) free(old);
513 1862 : set_prodprimes();
514 1862 : }
515 :
516 : /**********************************************************************/
517 : /*** ***/
518 : /*** forprime ***/
519 : /*** ***/
520 : /**********************************************************************/
521 :
522 : /* return good chunk size for sieve, 16 | chunk + 2 */
523 : static ulong
524 8447797 : optimize_chunk(ulong a, ulong b)
525 : {
526 : /* TODO: Optimize size (surely < 512k to stay in L2 cache, but not so large
527 : * as to force recalculating too often). */
528 : /* bigarena is in bytes, we use bits, and only odds */
529 8447797 : ulong defchunk = (a>>31) > 1 ? 0x80000UL : 0x8000;
530 8447797 : ulong chunk = (cache_model.bigarena ? cache_model.bigarena : defchunk)<<4;
531 8447797 : ulong tmp = (b - a) / chunk + 1;
532 :
533 8447797 : if (tmp == 1)
534 196 : chunk = b - a + 16;
535 : else
536 8447601 : chunk = (b - a) / tmp + 15;
537 : /* ensure 16 | chunk + 2 */
538 8447797 : return (((chunk + 2)>>4)<<4) - 2;
539 : }
540 : static void
541 8447797 : sieve_init(forprime_t *T, ulong a, ulong b)
542 : {
543 8447797 : T->sieveb = b;
544 8447797 : T->chunk = optimize_chunk(a, b);
545 : /* >> 1 [only odds] + 3 [convert from bits to bytes] */
546 8447801 : T->isieve = (unsigned char*)stack_malloc(((T->chunk+2) >> 4) + 1);
547 8447773 : T->cache[0] = 0;
548 8447773 : T->a = a;
549 8447773 : T->end = minuu(a + T->chunk, b);
550 8447759 : T->pos = T->maxpos = 0;
551 8447759 : }
552 :
553 : enum {PRST_none, PRST_diffptr, PRST_sieve, PRST_unextprime, PRST_nextprime};
554 :
555 : static void
556 8031584 : u_forprime_set_prime_table(forprime_t *T, ulong a)
557 : {
558 8031584 : T->strategy = PRST_diffptr;
559 8031584 : if (a < 3)
560 : {
561 2396252 : T->p = 0;
562 2396252 : T->n = 0;
563 : }
564 : else
565 : {
566 5635332 : long n = PRIMES_search(a - 1);
567 5635587 : if (n < 0) n = - n - 1;
568 5635587 : T->n = n;
569 5635587 : T->p = pari_PRIMES[n];
570 : }
571 8031839 : }
572 :
573 : /* Set p so that p + q the smallest integer = c (mod q) and > original p.
574 : * Assume 0 < c < q. */
575 : static void
576 101996 : arith_set(forprime_t *T)
577 : {
578 101996 : ulong r = T->p % T->q; /* 0 <= r <= min(p, q-1) */
579 101996 : pari_sp av = avma;
580 101996 : GEN d = adduu(T->p - r, T->c); /* = c mod q */
581 101996 : if (T->c > r) d = subiu(d, T->q);
582 : /* d = c mod q, d = c > r? p-r+c-q: p-r+c, so that
583 : * d <= p and d+q = c>r? p-r+c : p-r+c+q > p */
584 101996 : if (signe(d) <= 0)
585 : {
586 20 : T->p = 0;
587 20 : T->strategy = PRST_nextprime;
588 20 : affii(d, T->pp);
589 : }
590 : else
591 101976 : T->p = itou_or_0(d);
592 101996 : set_avma(av);
593 101996 : }
594 :
595 : /* Run through primes in arithmetic progression = c (mod q).
596 : * Warning: b = ULONG_MAX may signal that we are called by higher level
597 : * function handling a continuation for larger b; this sentinel value
598 : * must not be modified */
599 : static int
600 23302765 : u_forprime_sieve_arith_init(forprime_t *T, struct pari_sieve *psieve,
601 : ulong a, ulong b, ulong c, ulong q)
602 : {
603 : #ifdef LONG_IS_64BIT
604 19962769 : const ulong UPRIME_MAX = 18446744073709551557UL;
605 : #else
606 3339996 : const ulong UPRIME_MAX = 4294967291UL;
607 : #endif
608 : ulong Plim, P, P2, Y, sieveb;
609 :
610 23302765 : if (!odd(b) && b > 2) b--;
611 23302637 : if (a > b || b < 2)
612 : {
613 883712 : T->strategy = PRST_diffptr; /* paranoia */
614 883712 : T->p = 0; /* empty */
615 883712 : T->b = 0; /* empty */
616 883712 : T->n = 0;
617 883712 : return 0;
618 : }
619 22418925 : P = maxprime();
620 22418940 : if (b != ULONG_MAX && b > UPRIME_MAX) b = UPRIME_MAX;
621 22418940 : if (q != 1)
622 : {
623 : ulong D;
624 588916 : c %= q; D = ugcd(c, q);
625 588908 : if (D != 1) { a = maxuu(a,D); if (b != ULONG_MAX) b = minuu(b,D); }
626 588908 : if (odd(q) && (a > 2 || c != 2))
627 : { /* only *odd* primes. If a <= c = 2, then p = 2 must be included :-( */
628 511136 : if (!odd(c)) c += q;
629 511532 : q <<= 1;
630 : }
631 : }
632 22419301 : T->q = q;
633 22419301 : T->c = c;
634 22419301 : T->strategy = PRST_none; /* unknown */
635 22419301 : T->psieve = psieve; /* unused for now */
636 22419301 : T->isieve = NULL; /* unused for now */
637 22419301 : T->b = b;
638 22419301 : if (P >= b) { /* [a,b] \subset prime table */
639 4419166 : u_forprime_set_prime_table(T, a);
640 4419137 : return 1;
641 : }
642 : /* b > P */
643 18000135 : if (a >= P)
644 : {
645 14387657 : T->p = a - 1;
646 14387657 : if (T->q != 1) arith_set(T);
647 : }
648 : else
649 3612478 : u_forprime_set_prime_table(T, a);
650 18000102 : if (T->strategy == PRST_none) T->strategy = PRST_unextprime;
651 : /* now strategy is either PRST_diffptr or PRST_unextprime */
652 :
653 18000102 : P2 = (P & HIGHMASK)? 0 : P*P;
654 18000102 : sieveb = b; if (P2 && P2 < b) sieveb = P2;
655 : /* maxprime^2 >= sieveb */
656 18000102 : Plim = maxprimelim();
657 18000112 : if (a <= Plim) a = Plim + 1;
658 18000112 : if (sieveb < a + 16) return 1;
659 8950930 : Y = sieveb - a + 1; /* number of integers in sievable interval > 16 */
660 8950930 : P = usqrt(sieveb); /* largest sieving prime */
661 : /* FIXME: should sieve as well if q != 1, adapt sieve code */
662 8951075 : if (q == 1 && (!P2 || P2 > a) && 3/M_LN2 * Y >= uprimepi(P))
663 : /* Sieve implemented & possible & not too costly. Cost model is
664 : * - nextprime: about Y / log(b) primes to test [neglect cost for composites]
665 : * individual cost average = 3 log2(b) mulmod, total = 3 Y / log(2) mulmod
666 : * - sieve: pi(P) mod + Y loglog(b) add
667 : * Since loglog(b) < 4, and add < 10*mulmod, we neglect the Y loglog(b) term.
668 : * We have mod < mulmod < 2*mod; for now, assume mulmod ~ mod. */
669 : {
670 8447800 : if (T->strategy == PRST_unextprime) T->strategy = PRST_sieve;
671 8447800 : sieve_init(T, a, sieveb);
672 : }
673 8951157 : return 1;
674 : }
675 :
676 : int
677 17972885 : u_forprime_arith_init(forprime_t *T, ulong a, ulong b, ulong c, ulong q)
678 17972885 : { return u_forprime_sieve_arith_init(T, NULL, a, b, c, q); }
679 :
680 : /* will run through primes in [a,b] */
681 : int
682 17381407 : u_forprime_init(forprime_t *T, ulong a, ulong b)
683 17381407 : { return u_forprime_arith_init(T, a,b, 0,1); }
684 :
685 : /* will run through primes in [a,b] */
686 : static int
687 5329550 : u_forprime_sieve_init(forprime_t *T, struct pari_sieve *s, ulong b)
688 5329550 : { return u_forprime_sieve_arith_init(T, s, s->start, b, s->c, s->q); }
689 :
690 : /* now only run through primes <= c; assume c <= b above */
691 : void
692 63 : u_forprime_restrict(forprime_t *T, ulong c) { T->b = c; }
693 :
694 : /* b = NULL: loop forever */
695 : int
696 2420 : forprimestep_init(forprime_t *T, GEN a, GEN b, GEN q)
697 : {
698 2420 : GEN c = NULL;
699 : long lb;
700 :
701 2420 : a = gceil(a); if (typ(a) != t_INT) pari_err_TYPE("forprime_init",a);
702 2420 : T->qq = NULL; T->q = 1; T->c = 0;
703 2420 : if (q)
704 : {
705 133 : switch(typ(q))
706 : {
707 56 : case t_INT:
708 56 : c = a; break;
709 77 : case t_INTMOD:
710 77 : c = gel(q,2); q = gel(q,1);
711 : /* first int >= initial a which is = c (mod q) */
712 77 : a = addii(a, modii(subii(c,a), q)); break;
713 0 : default: pari_err_TYPE("forprimestep_init",q);
714 : }
715 133 : if (signe(q) <= 0) pari_err_TYPE("forprimestep_init (q <= 0)",q);
716 133 : if (equali1(q)) c = q = NULL;
717 : else
718 : {
719 133 : GEN D = gcdii(c, q);
720 133 : if (!is_pm1(D))
721 : { /* at most one prime: c */
722 42 : if (cmpii(a, D) < 0) a = D;
723 42 : if (gcmp(b, D) > 0) b = D;
724 : }
725 133 : if ((T->q = itou_or_0(q)))
726 125 : T->c = umodiu(c, T->q);
727 : else
728 8 : T->qq = q;
729 : }
730 : }
731 2420 : if (signe(a) <= 0) a = q? modii(a, q): gen_1;
732 2420 : if (b && typ(b) != t_INFINITY)
733 : {
734 1013 : b = gfloor(b);
735 1013 : if (typ(b) != t_INT) pari_err_TYPE("forprime_init",b);
736 1013 : if (signe(b) < 0 || cmpii(a,b) > 0)
737 : {
738 21 : T->strategy = PRST_nextprime; /* paranoia */
739 21 : T->bb = T->pp = gen_0; return 0;
740 : }
741 992 : lb = lgefint(b);
742 992 : T->bb = b;
743 : }
744 1407 : else if (!b || inf_get_sign(b) > 0)
745 : {
746 1407 : lb = lgefint(a) + 4;
747 1407 : T->bb = NULL;
748 : }
749 : else /* b == -oo */
750 : {
751 0 : T->strategy = PRST_nextprime; /* paranoia */
752 0 : T->bb = T->pp = gen_0; return 0;
753 : }
754 2399 : T->pp = cgeti(T->qq? maxuu(lb, lgefint(T->qq)): lb);
755 : /* a, b are positive integers, a <= b */
756 2399 : if (!T->qq && lgefint(a) == 3) /* lb == 3 implies b != NULL */
757 2256 : return u_forprime_arith_init(T, uel(a,2), lb == 3? uel(b,2): ULONG_MAX,
758 : T->c, T->q);
759 143 : T->strategy = PRST_nextprime;
760 143 : affii(T->qq? subii(a,T->qq): subiu(a,T->q), T->pp); return 1;
761 : }
762 : int
763 1315 : forprime_init(forprime_t *T, GEN a, GEN b)
764 1315 : { return forprimestep_init(T,a,b,NULL); }
765 :
766 : /* assume a <= b <= maxprime()^2, a,b odd, sieve[n] corresponds to
767 : * a+16*n, a+16*n+2, ..., a+16*n+14 (bits 0 to 7)
768 : * maxpos = index of last sieve cell.
769 : * b-a+2 must be divisible by 16 for use by u_forprime_next */
770 : static void
771 9129 : sieve_block(ulong a, ulong b, ulong maxpos, unsigned char* sieve)
772 : {
773 9129 : ulong i, lim = usqrt(b), sz = (b-a) >> 1;
774 9129 : (void)memset(sieve, 0, maxpos+1);
775 9129 : for (i = 2;; i++)
776 24482880 : { /* p is odd */
777 24492009 : ulong k, r, p = pari_PRIMES[i]; /* starts at p = 3 */
778 24492009 : if (p > lim) break;
779 :
780 : /* solve a + 2k = 0 (mod p) */
781 24482880 : r = a % p;
782 24482880 : if (r == 0)
783 16042 : k = 0;
784 : else
785 : {
786 24466838 : k = p - r;
787 24466838 : if (odd(k)) k += p;
788 24466838 : k >>= 1;
789 : }
790 : /* m = a + 2k is the smallest odd m >= a, p | m */
791 : /* position n (corresponds to a+2n) is sieve[n>>3], bit n&7 */
792 5732223569 : while (k <= sz) { sieve[k>>3] |= 1 << (k&7); k += p; /* 2k += 2p */ }
793 : }
794 9129 : }
795 :
796 : static void
797 1862 : pari_sieve_init(struct pari_sieve *s, ulong a, ulong b)
798 : {
799 1862 : ulong maxpos= (b - a) >> 4;
800 1862 : s->start = a; s->end = b;
801 1862 : s->sieve = (unsigned char*) pari_malloc(maxpos+1);
802 1862 : s->c = 0; s->q = 1;
803 1862 : sieve_block(a, b, maxpos, s->sieve);
804 1862 : s->maxpos = maxpos; /* must be last in case of SIGINT */
805 1862 : }
806 :
807 : static struct pari_sieve pari_sieve_modular;
808 :
809 : #ifdef LONG_IS_64BIT
810 : #define PARI_MODULAR_BASE ((1UL<<((BITS_IN_LONG-2)>>1))+1)
811 : #else
812 : #define PARI_MODULAR_BASE ((1UL<<(BITS_IN_LONG-1))+1)
813 : #endif
814 :
815 : void
816 1862 : pari_init_primes(ulong maxprime)
817 : {
818 1862 : ulong a = PARI_MODULAR_BASE, b = a + (1UL<<20)-2;
819 1862 : initprimetable(maxprime);
820 1862 : pari_sieve_init(&pari_sieve_modular, a, b);
821 1862 : }
822 :
823 : void
824 1862 : pari_close_primes(void)
825 : {
826 1862 : if (pari_PRIMES)
827 : {
828 1862 : pari_free(pari_PRIMES);
829 1862 : pari_free(_prodprimes);
830 1862 : pari_free(_prodprimeslim);
831 : }
832 1862 : pari_free(pari_sieve_modular.sieve);
833 1862 : }
834 :
835 : void
836 4475341 : init_modular_small(forprime_t *S)
837 : {
838 : #ifdef LONG_IS_64BIT
839 3836165 : u_forprime_sieve_init(S, &pari_sieve_modular, ULONG_MAX);
840 : #else
841 639176 : ulong a = (1UL<<((BITS_IN_LONG-2)>>1))+1;
842 639176 : u_forprime_init(S, a, ULONG_MAX);
843 : #endif
844 4475354 : }
845 :
846 : void
847 10446182 : init_modular_big(forprime_t *S)
848 : {
849 : #ifdef LONG_IS_64BIT
850 8952797 : u_forprime_init(S, HIGHBIT + 1, ULONG_MAX);
851 : #else
852 1493385 : u_forprime_sieve_init(S, &pari_sieve_modular, ULONG_MAX);
853 : #endif
854 10446312 : }
855 :
856 : /* T->cache is a 0-terminated list of primes, return the first one and
857 : * remove it from list. Most of the time the list contains a single prime */
858 : static ulong
859 130035473 : shift_cache(forprime_t *T)
860 : {
861 : long i;
862 130035473 : T->p = T->cache[0];
863 173863134 : for (i = 1;; i++) /* remove one prime from cache */
864 173863134 : if (! (T->cache[i-1] = T->cache[i]) ) break;
865 130035473 : return T->p;
866 : }
867 :
868 : ulong
869 203938072 : u_forprime_next(forprime_t *T)
870 : {
871 203938072 : if (T->strategy == PRST_diffptr)
872 : {
873 : for(;;)
874 : {
875 217152773 : if (++T->n <= pari_PRIMES[0])
876 : {
877 217152612 : T->p = pari_PRIMES[T->n];
878 217152612 : if (T->p > T->b) return 0;
879 216964938 : if (T->q == 1 || T->p % T->q == T->c) return T->p;
880 : }
881 : else
882 : { /* beyond the table */
883 161 : T->strategy = T->isieve? PRST_sieve: PRST_unextprime;
884 161 : if (T->q != 1) { arith_set(T); if (!T->p) return 0; }
885 : /* T->p possibly not a prime if q != 1 */
886 161 : break;
887 : }
888 : }
889 : }
890 144943943 : if (T->strategy == PRST_sieve)
891 : { /* require sieveb - a >= 16 */
892 : ulong n;
893 130036017 : if (T->cache[0]) return shift_cache(T);
894 93002010 : NEXT_CHUNK:
895 93010320 : if (T->psieve)
896 : {
897 5329558 : T->sieve = T->psieve->sieve;
898 5329558 : T->end = T->psieve->end;
899 5329558 : if (T->end > T->sieveb) T->end = T->sieveb;
900 5329558 : T->maxpos = T->psieve->maxpos;
901 5329558 : T->pos = 0;
902 5329558 : T->psieve = NULL;
903 : }
904 140841751 : for (n = T->pos; n < T->maxpos; n++)
905 140831300 : if (T->sieve[n] != 0xFF)
906 : {
907 92999869 : unsigned char mask = T->sieve[n];
908 92999869 : ulong p = T->a + (n<<4);
909 92999869 : long i = 0;
910 92999869 : T->pos = n;
911 92999869 : if (!(mask & 1)) T->cache[i++] = p;
912 92999869 : if (!(mask & 2)) T->cache[i++] = p+2;
913 92999869 : if (!(mask & 4)) T->cache[i++] = p+4;
914 92999869 : if (!(mask & 8)) T->cache[i++] = p+6;
915 92999869 : if (!(mask & 16)) T->cache[i++] = p+8;
916 92999869 : if (!(mask & 32)) T->cache[i++] = p+10;
917 92999869 : if (!(mask & 64)) T->cache[i++] = p+12;
918 92999869 : if (!(mask &128)) T->cache[i++] = p+14;
919 92999869 : T->cache[i] = 0;
920 92999869 : T->pos = n+1;
921 92999869 : return shift_cache(T);
922 : }
923 : /* n = T->maxpos, last cell: check p <= b */
924 10451 : if (T->maxpos && n == T->maxpos && T->sieve[n] != 0xFF)
925 : {
926 2873 : unsigned char mask = T->sieve[n];
927 2873 : ulong p = T->a + (n<<4);
928 2873 : long i = 0;
929 2873 : T->pos = n;
930 2873 : if (!(mask & 1) && p <= T->sieveb) T->cache[i++] = p;
931 2873 : if (!(mask & 2) && p <= T->sieveb-2) T->cache[i++] = p+2;
932 2873 : if (!(mask & 4) && p <= T->sieveb-4) T->cache[i++] = p+4;
933 2873 : if (!(mask & 8) && p <= T->sieveb-6) T->cache[i++] = p+6;
934 2873 : if (!(mask & 16) && p <= T->sieveb-8) T->cache[i++] = p+8;
935 2873 : if (!(mask & 32) && p <= T->sieveb-10) T->cache[i++] = p+10;
936 2873 : if (!(mask & 64) && p <= T->sieveb-12) T->cache[i++] = p+12;
937 2873 : if (!(mask &128) && p <= T->sieveb-14) T->cache[i++] = p+14;
938 2873 : if (i)
939 : {
940 2772 : T->cache[i] = 0;
941 2772 : T->pos = n+1;
942 2772 : return shift_cache(T);
943 : }
944 : }
945 :
946 7679 : if (T->maxpos && T->end >= T->sieveb) /* done with sieves ? */
947 : {
948 419 : if (T->sieveb == T->b && T->b != ULONG_MAX) return 0;
949 1 : T->strategy = PRST_unextprime;
950 : }
951 : else
952 : { /* initialize next chunk */
953 7260 : T->sieve = T->isieve;
954 7260 : if (T->maxpos == 0)
955 3366 : T->a |= 1; /* first time; ensure odd */
956 : else
957 3894 : T->a = (T->end + 2) | 1;
958 7260 : T->end = T->a + T->chunk; /* may overflow */
959 7260 : if (T->end < T->a || T->end > T->sieveb) T->end = T->sieveb;
960 : /* end and a are odd; sieve[k] contains the a + 8*2k + (0,2,...,14).
961 : * The largest k is (end-a) >> 4 */
962 7260 : T->pos = 0;
963 7260 : T->maxpos = (T->end - T->a) >> 4; /* >= 1 */
964 7260 : sieve_block(T->a, T->end, T->maxpos, T->sieve);
965 8310 : goto NEXT_CHUNK;
966 : }
967 : }
968 14907927 : if (T->strategy == PRST_unextprime)
969 : {
970 14905887 : if (T->q == 1)
971 : {
972 : #ifdef LONG_IS_64BIT
973 14752752 : switch(T->p)
974 : {
975 : #define retp(x) return T->p = (HIGHBIT+x <= T->b)? HIGHBIT+x: 0
976 8952877 : case HIGHBIT: retp(29);
977 3186559 : case HIGHBIT + 29: retp(99);
978 361191 : case HIGHBIT + 99: retp(123);
979 209984 : case HIGHBIT +123: retp(131);
980 151366 : case HIGHBIT +131: retp(155);
981 130509 : case HIGHBIT +155: retp(255);
982 109175 : case HIGHBIT +255: retp(269);
983 99432 : case HIGHBIT +269: retp(359);
984 79515 : case HIGHBIT +359: retp(435);
985 60216 : case HIGHBIT +435: retp(449);
986 53425 : case HIGHBIT +449: retp(453);
987 50523 : case HIGHBIT +453: retp(485);
988 44498 : case HIGHBIT +485: retp(491);
989 41417 : case HIGHBIT +491: retp(543);
990 39005 : case HIGHBIT +543: retp(585);
991 36395 : case HIGHBIT +585: retp(599);
992 30243 : case HIGHBIT +599: retp(753);
993 29463 : case HIGHBIT +753: retp(849);
994 28383 : case HIGHBIT +849: retp(879);
995 26757 : case HIGHBIT +879: retp(885);
996 26061 : case HIGHBIT +885: retp(903);
997 25581 : case HIGHBIT +903: retp(995);
998 : #undef retp
999 : }
1000 : #endif
1001 980249 : T->p = unextprime(T->p + 1);
1002 980277 : if (T->p > T->b) return 0;
1003 : }
1004 : else do {
1005 2798512 : T->p += T->q;
1006 2798512 : if (T->p < T->q || T->p > T->b) { T->p = 0; break; } /* overflow */
1007 2798486 : } while (!uisprime(T->p));
1008 1133283 : if (T->p && T->p <= T->b) return T->p;
1009 : /* overflow ulong, switch to GEN */
1010 48 : T->strategy = PRST_nextprime;
1011 : }
1012 2088 : return 0; /* overflow */
1013 : }
1014 :
1015 : GEN
1016 45385959 : forprime_next(forprime_t *T)
1017 : {
1018 : pari_sp av;
1019 : GEN p;
1020 45385959 : if (T->strategy != PRST_nextprime)
1021 : {
1022 45378080 : ulong u = u_forprime_next(T);
1023 45378080 : if (u) { affui(u, T->pp); return T->pp; }
1024 : /* failure */
1025 814 : if (T->strategy != PRST_nextprime) return NULL; /* we're done */
1026 : /* overflow ulong, switch to GEN */
1027 48 : u = ULONG_MAX;
1028 48 : if (T->q > 1) u -= (ULONG_MAX-T->c) % T->q;
1029 48 : affui(u, T->pp);
1030 : }
1031 7927 : av = avma; p = T->pp;
1032 7927 : if (T->q == 1)
1033 : {
1034 7749 : p = nextprime(addiu(p, 1));
1035 7749 : if (T->bb && abscmpii(p, T->bb) > 0) return gc_NULL(av);
1036 : } else do {
1037 3341 : p = T->qq? addii(p, T->qq): addiu(p, T->q);
1038 3341 : if (T->bb && abscmpii(p, T->bb) > 0) return gc_NULL(av);
1039 3285 : } while (!BPSW_psp(p));
1040 7744 : affii(p, T->pp); return gc_const(av, T->pp);
1041 : }
1042 :
1043 : void
1044 1085 : forprimestep(GEN a, GEN b, GEN q, GEN code)
1045 : {
1046 1085 : pari_sp av = avma;
1047 : forprime_t T;
1048 :
1049 1085 : if (!forprimestep_init(&T, a,b,q)) { set_avma(av); return; }
1050 :
1051 1071 : push_lex(T.pp,code);
1052 309822 : while(forprime_next(&T))
1053 : {
1054 309178 : closure_evalvoid(code); if (loop_break()) break;
1055 : /* p changed in 'code', complain */
1056 308758 : if (get_lex(-1) != T.pp)
1057 7 : pari_err(e_MISC,"prime index read-only: was changed to %Ps", get_lex(-1));
1058 : }
1059 1064 : pop_lex(1); set_avma(av);
1060 : }
1061 : void
1062 959 : forprime(GEN a, GEN b, GEN code) { return forprimestep(a,b,NULL,code); }
1063 :
1064 : int
1065 70 : forcomposite_init(forcomposite_t *C, GEN a, GEN b)
1066 : {
1067 70 : pari_sp av = avma;
1068 70 : a = gceil(a);
1069 70 : if (typ(a)!=t_INT) pari_err_TYPE("forcomposite",a);
1070 70 : if (b) {
1071 63 : if (typ(b) == t_INFINITY) b = NULL;
1072 : else
1073 : {
1074 56 : b = gfloor(b);
1075 56 : if (typ(b)!=t_INT) pari_err_TYPE("forcomposite",b);
1076 : }
1077 : }
1078 70 : if (signe(a) < 0) pari_err_DOMAIN("forcomposite", "a", "<", gen_0, a);
1079 70 : if (abscmpiu(a, 4) < 0) a = utoipos(4);
1080 70 : C->first = 1;
1081 70 : if (!forprime_init(&C->T, a,b) && cmpii(a,b) > 0)
1082 : {
1083 7 : C->n = gen_1; /* in case caller forgets to check the return value */
1084 7 : C->b = gen_0; return gc_bool(av,0);
1085 : }
1086 63 : C->n = setloop(a);
1087 63 : C->b = b;
1088 63 : C->p = NULL; return 1;
1089 : }
1090 :
1091 : GEN
1092 238 : forcomposite_next(forcomposite_t *C)
1093 : {
1094 238 : if (C->first) /* first call ever */
1095 : {
1096 63 : C->first = 0;
1097 63 : C->p = forprime_next(&C->T);
1098 : }
1099 : else
1100 175 : C->n = incloop(C->n);
1101 238 : if (C->p)
1102 : {
1103 161 : if (cmpii(C->n, C->p) < 0) return C->n;
1104 77 : C->n = incloop(C->n);
1105 : /* n = p+1 */
1106 77 : C->p = forprime_next(&C->T); /* nextprime(p) > n */
1107 77 : if (C->p) return C->n;
1108 : }
1109 105 : if (!C->b || cmpii(C->n, C->b) <= 0) return C->n;
1110 42 : return NULL;
1111 : }
1112 :
1113 : void
1114 70 : forcomposite(GEN a, GEN b, GEN code)
1115 : {
1116 70 : pari_sp av = avma;
1117 : forcomposite_t T;
1118 : GEN n;
1119 70 : if (!forcomposite_init(&T,a,b)) return;
1120 63 : push_lex(T.n,code);
1121 238 : while((n = forcomposite_next(&T)))
1122 : {
1123 196 : closure_evalvoid(code); if (loop_break()) break;
1124 : /* n changed in 'code', complain */
1125 182 : if (get_lex(-1) != n)
1126 7 : pari_err(e_MISC,"index read-only: was changed to %Ps", get_lex(-1));
1127 : }
1128 56 : pop_lex(1); set_avma(av);
1129 : }
|