Line data Source code
1 : /* Copyright (C) 2016 The PARI group.
2 :
3 : This file is part of the PARI/GP package.
4 :
5 : PARI/GP is free software; you can redistribute it and/or modify it under the
6 : terms of the GNU General Public License as published by the Free Software
7 : Foundation; either version 2 of the License, or (at your option) any later
8 : version. It is distributed in the hope that it will be useful, but WITHOUT
9 : ANY WARRANTY WHATSOEVER.
10 :
11 : Check the License for details. You should have received a copy of it, along
12 : with the package; see the file 'COPYING'. If not, write to the Free Software
13 : Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */
14 :
15 : #include "pari.h"
16 : #include "paripriv.h"
17 :
18 : /**********************************************************************/
19 : /*** ***/
20 : /*** Public prime table ***/
21 : /*** ***/
22 : /**********************************************************************/
23 :
24 : static ulong _maxprime = 0;
25 : static ulong _maxprimelim = 0;
26 : static ulong diffptrlen;
27 : static GEN _prodprimes,_prodprimes_addr;
28 :
29 : /* Building/Rebuilding the diffptr table. The actual work is done by the
30 : * following two subroutines; the user entry point is the function
31 : * initprimes() below. initprimes1() is the old algorithm, called when
32 : * maxnum (size) is moderate. Must be called after pari_init_stack() )*/
33 : static void
34 1806 : initprimes1(ulong size, long *lenp, ulong *lastp, byteptr p1)
35 : {
36 1806 : pari_sp av = avma;
37 : long k;
38 1806 : byteptr q, r, s, p = (byteptr)stack_calloc(size+2), fin = p + size;
39 :
40 21672 : for (r=q=p,k=1; r<=fin; )
41 : {
42 32508 : do { r+=k; k+=2; r+=k; } while (*++q);
43 874104 : for (s=r; s<=fin; s+=k) *s = 1;
44 : }
45 1806 : r = p1; *r++ = 2; *r++ = 1; /* 2 and 3 */
46 1806 : for (s=q=p+1; ; s=q)
47 : {
48 924672 : do q++; while (*q);
49 308826 : if (q > fin) break;
50 307020 : *r++ = (unsigned char) ((q-s) << 1);
51 : }
52 1806 : *r++ = 0;
53 1806 : *lenp = r - p1;
54 1806 : *lastp = ((s - p) << 1) + 1;
55 1806 : set_avma(av);
56 1806 : }
57 :
58 : /* Timing in ms (Athlon/850; reports 512K of secondary cache; looks
59 : like there is 64K of quickier cache too).
60 :
61 : arena| 30m 100m 300m 1000m 2000m <-- primelimit
62 : =================================================
63 : 16K 1.1053 1.1407 1.2589 1.4368 1.6086
64 : 24K 1.0000 1.0625 1.1320 1.2443 1.3095
65 : 32K 1.0000 1.0469 1.0761 1.1336 1.1776
66 : 48K 1.0000 1.0000 1.0254 1.0445 1.0546
67 : 50K 1.0000 1.0000 1.0152 1.0345 1.0464
68 : 52K 1.0000 1.0000 1.0203 1.0273 1.0362
69 : 54K 1.0000 1.0000 1.0812 1.0216 1.0281
70 : 56K 1.0526 1.0000 1.0051 1.0144 1.0205
71 : 58K 1.0000 1.0000 1.0000 1.0086 1.0123
72 : 60K 0.9473 0.9844 1.0051 1.0014 1.0055
73 : 62K 1.0000 0.9844 0.9949 0.9971 0.9993
74 : 64K 1.0000 1.0000 1.0000 1.0000 1.0000
75 : 66K 1.2632 1.2187 1.2183 1.2055 1.1953
76 : 68K 1.4211 1.4844 1.4721 1.4425 1.4188
77 : 70K 1.7368 1.7188 1.7107 1.6767 1.6421
78 : 72K 1.9474 1.9531 1.9594 1.9023 1.8573
79 : 74K 2.2105 2.1875 2.1827 2.1207 2.0650
80 : 76K 2.4211 2.4219 2.4010 2.3305 2.2644
81 : 78K 2.5789 2.6250 2.6091 2.5330 2.4571
82 : 80K 2.8421 2.8125 2.8223 2.7213 2.6380
83 : 84K 3.1053 3.1875 3.1776 3.0819 2.9802
84 : 88K 3.5263 3.5312 3.5228 3.4124 3.2992
85 : 92K 3.7895 3.8438 3.8375 3.7213 3.5971
86 : 96K 4.0000 4.1093 4.1218 3.9986 3.9659
87 : 112K 4.3684 4.5781 4.5787 4.4583 4.6115
88 : 128K 4.7368 4.8750 4.9188 4.8075 4.8997
89 : 192K 5.5263 5.7188 5.8020 5.6911 5.7064
90 : 256K 6.0000 6.2187 6.3045 6.1954 6.1033
91 : 384K 6.7368 6.9531 7.0405 6.9181 6.7912
92 : 512K 7.3158 7.5156 7.6294 7.5000 7.4654
93 : 768K 9.1579 9.4531 9.6395 9.5014 9.1075
94 : 1024K 10.368 10.7497 10.9999 10.878 10.8201
95 : 1536K 12.579 13.3124 13.7660 13.747 13.4739
96 : 2048K 13.737 14.4839 15.0509 15.151 15.1282
97 : 3076K 14.789 15.5780 16.2993 16.513 16.3365
98 :
99 : Now the same number relative to the model
100 :
101 : (1 + 0.36*sqrt(primelimit)/arena) * (arena <= 64 ? 1.05 : (arena-64)**0.38)
102 :
103 : [SLOW2_IN_ROOTS = 0.36, ALPHA = 0.38]
104 :
105 : arena| 30m 100m 300m 1000m 2000m <-- primelimit
106 : =================================================
107 : 16K 1.014 0.9835 0.9942 0.9889 1.004
108 : 24K 0.9526 0.9758 0.9861 0.9942 0.981
109 : 32K 0.971 0.9939 0.9884 0.9849 0.9806
110 : 48K 0.9902 0.9825 0.996 0.9945 0.9885
111 : 50K 0.9917 0.9853 0.9906 0.9926 0.9907
112 : 52K 0.9932 0.9878 0.9999 0.9928 0.9903
113 : 54K 0.9945 0.9902 1.064 0.9939 0.9913
114 : 56K 1.048 0.9924 0.9925 0.993 0.9921
115 : 58K 0.9969 0.9945 0.9909 0.9932 0.9918
116 : 60K 0.9455 0.9809 0.9992 0.9915 0.9923
117 : 62K 0.9991 0.9827 0.9921 0.9924 0.9929
118 : 64K 1 1 1 1 1
119 : 66K 1.02 0.9849 0.9857 0.9772 0.9704
120 : 68K 0.8827 0.9232 0.9176 0.9025 0.8903
121 : 70K 0.9255 0.9177 0.9162 0.9029 0.8881
122 : 72K 0.9309 0.936 0.9429 0.9219 0.9052
123 : 74K 0.9715 0.9644 0.967 0.9477 0.9292
124 : 76K 0.9935 0.9975 0.9946 0.9751 0.9552
125 : 78K 0.9987 1.021 1.021 1.003 0.9819
126 : 80K 1.047 1.041 1.052 1.027 1.006
127 : 84K 1.052 1.086 1.092 1.075 1.053
128 : 88K 1.116 1.125 1.133 1.117 1.096
129 : 92K 1.132 1.156 1.167 1.155 1.134
130 : 96K 1.137 1.177 1.195 1.185 1.196
131 : 112K 1.067 1.13 1.148 1.15 1.217
132 : 128K 1.04 1.083 1.113 1.124 1.178
133 : 192K 0.9368 0.985 1.025 1.051 1.095
134 : 256K 0.8741 0.9224 0.9619 0.995 1.024
135 : 384K 0.8103 0.8533 0.8917 0.9282 0.9568
136 : 512K 0.7753 0.8135 0.8537 0.892 0.935
137 : 768K 0.8184 0.8638 0.9121 0.9586 0.9705
138 : 1024K 0.8241 0.8741 0.927 0.979 1.03
139 : 1536K 0.8505 0.9212 0.9882 1.056 1.096
140 : 2048K 0.8294 0.8954 0.9655 1.041 1.102
141 :
142 : */
143 :
144 : #ifndef SLOW2_IN_ROOTS
145 : /* SLOW2_IN_ROOTS below 3: some slowdown starts to be noticable
146 : * when things fit into the cache on Sparc.
147 : * The choice of 2.6 gives a slowdown of 1-2% on UltraSparcII,
148 : * but makes calculations for "maximum" of 436273009
149 : * fit into 256K cache (still common for some architectures).
150 : *
151 : * One may change it when small caches become uncommon, but the gain
152 : * is not going to be very noticable... */
153 : # ifdef i386 /* gcc defines this? */
154 : # define SLOW2_IN_ROOTS 0.36
155 : # else
156 : # define SLOW2_IN_ROOTS 2.6
157 : # endif
158 : #endif
159 : #ifndef CACHE_ARENA
160 : # ifdef i386 /* gcc defines this? */
161 : /* Due to smaller SLOW2_IN_ROOTS, smaller arena is OK; fit L1 cache */
162 : # define CACHE_ARENA (63 * 1024UL) /* No slowdown even with 64K L1 cache */
163 : # else
164 : # define CACHE_ARENA (200 * 1024UL) /* No slowdown even with 256K L2 cache */
165 : # endif
166 : #endif
167 :
168 : #define CACHE_ALPHA (0.38) /* Cache performance model parameter */
169 : #define CACHE_CUTOFF (0.018) /* Cache performance not smooth here */
170 :
171 : static double slow2_in_roots = SLOW2_IN_ROOTS;
172 :
173 : typedef struct {
174 : ulong arena;
175 : double power;
176 : double cutoff;
177 : } cache_model_t;
178 :
179 : static cache_model_t cache_model = { CACHE_ARENA, CACHE_ALPHA, CACHE_CUTOFF };
180 :
181 : /* Assume that some calculation requires a chunk of memory to be
182 : accessed often in more or less random fashion (as in sieving).
183 : Assume that the calculation can be done in steps by subdividing the
184 : chunk into smaller subchunks (arenas) and treating them
185 : separately. Assume that the overhead of subdivision is equivalent
186 : to the number of arenas.
187 :
188 : Find an optimal size of the arena taking into account the overhead
189 : of subdivision, and the overhead of arena not fitting into the
190 : cache. Assume that arenas of size slow2_in_roots slows down the
191 : calculation 2x (comparing to very big arenas; when cache hits do
192 : not matter). Since cache performance varies wildly with
193 : architecture, load, and wheather (especially with cache coloring
194 : enabled), use an idealized cache model based on benchmarks above.
195 :
196 : Assume that an independent region of FIXED_TO_CACHE bytes is accessed
197 : very often concurrently with the arena access.
198 : */
199 : static ulong
200 1806 : good_arena_size(ulong slow2_size, ulong total, ulong fixed_to_cache,
201 : cache_model_t *cache_model)
202 : {
203 1806 : ulong asize, cache_arena = cache_model->arena;
204 : double Xmin, Xmax, A, B, C1, C2, D, V;
205 1806 : double alpha = cache_model->power, cut_off = cache_model->cutoff;
206 :
207 : /* Estimated relative slowdown,
208 : with overhead = max((fixed_to_cache+arena)/cache_arena - 1, 0):
209 :
210 : 1 + slow2_size/arena due to initialization overhead;
211 :
212 : max(1, 4.63 * overhead^0.38 ) due to footprint > cache size.
213 :
214 : [The latter is hard to substantiate theoretically, but this
215 : function describes benchmarks pretty close; it does not hurt that
216 : one can minimize it explicitly too ;-). The switch between
217 : different choices of max() happens when overhead=0.018.]
218 :
219 : Thus the problem is minimizing (1 + slow2_size/arena)*overhead**0.29.
220 : This boils down to F=((X+A)/(X+B))X^alpha, X=overhead,
221 : B = (1 - fixed_to_cache/cache_arena), A = B + slow2_size/cache_arena,
222 : alpha = 0.38, and X>=0.018, X>-B.
223 :
224 : We need to find the rightmost root of (X+A)*(X+B) - alpha(A-B)X to the
225 : right of 0.018 (if such exists and is below Xmax). Then we manually
226 : check the remaining region [0, 0.018].
227 :
228 : Since we cannot trust the purely-experimental cache-hit slowdown
229 : function, as a sanity check always prefer fitting into the
230 : cache (or "almost fitting") if F-law predicts that the larger
231 : value of the arena provides less than 10% speedup.
232 : */
233 :
234 : /* The simplest case: we fit into cache */
235 1806 : asize = cache_arena - fixed_to_cache;
236 1806 : if (total <= asize) return total;
237 : /* The simple case: fitting into cache doesn't slow us down more than 10% */
238 1806 : if (asize > 10 * slow2_size) return asize;
239 : /* Slowdown of not fitting into cache is significant. Try to optimize.
240 : Do not be afraid to spend some time on optimization - in trivial
241 : cases we do not reach this point; any gain we get should
242 : compensate the time spent on optimization. */
243 :
244 0 : B = (1 - ((double)fixed_to_cache)/cache_arena);
245 0 : A = B + ((double)slow2_size)/cache_arena;
246 0 : C2 = A*B;
247 0 : C1 = (A + B - 1/alpha*(A - B))/2;
248 0 : D = C1*C1 - C2;
249 0 : if (D > 0)
250 0 : V = cut_off*cut_off + 2*C1*cut_off + C2; /* Value at CUT_OFF */
251 : else
252 0 : V = 0; /* Peacify the warning */
253 0 : Xmin = cut_off;
254 0 : Xmax = ((double)total - fixed_to_cache)/cache_arena; /* Two candidates */
255 :
256 0 : if ( D <= 0 || (V >= 0 && C1 + cut_off >= 0) ) /* slowdown increasing */
257 0 : Xmax = cut_off; /* Only one candidate */
258 0 : else if (V >= 0 && /* slowdown concave down */
259 0 : ((Xmax + C1) <= 0 || (Xmax*Xmax + 2*C1*Xmax + C2) <= 0))
260 : /* DO NOTHING */; /* Keep both candidates */
261 0 : else if (V <= 0 && (Xmax*Xmax + 2*C1*Xmax + C2) <= 0) /*slowdown decreasing*/
262 0 : Xmin = cut_off; /* Only one candidate */
263 : else /* Now we know: 2 roots, the largest is in CUT_OFF..Xmax */
264 0 : Xmax = sqrt(D) - C1;
265 0 : if (Xmax != Xmin) { /* Xmin == CUT_OFF; Check which one is better */
266 0 : double v1 = (cut_off + A)/(cut_off + B);
267 0 : double v2 = 2.33 * (Xmax + A)/(Xmax + B) * pow(Xmax, alpha);
268 :
269 0 : if (1.1 * v2 >= v1) /* Prefer fitting into the cache if slowdown < 10% */
270 0 : V = v1;
271 : else
272 0 : { Xmin = Xmax; V = v2; }
273 0 : } else if (B > 0) /* We need V */
274 0 : V = 2.33 * (Xmin + A)/(Xmin + B) * pow(Xmin, alpha);
275 0 : if (B > 0 && 1.1 * V > A/B) /* Now Xmin is the minumum. Compare with 0 */
276 0 : Xmin = 0;
277 :
278 0 : asize = (ulong)((1 + Xmin)*cache_arena - fixed_to_cache);
279 0 : if (asize > total) asize = total; /* May happen due to approximations */
280 0 : return asize;
281 : }
282 :
283 : /* Use as in
284 : install(set_optimize,lLDG) \\ Through some M too?
285 : set_optimize(2,1) \\ disable dependence on limit
286 : \\ 1: how much cache usable, 2: slowdown of setup, 3: alpha, 4: cutoff
287 : \\ 2,3,4 are in units of 0.001
288 :
289 : { time_primes_arena(ar,limit) = \\ ar = arena size in K
290 : set_optimize(1,floor(ar*1024));
291 : default(primelimit, 200 000); \\ 100000 results in *larger* malloc()!
292 : gettime;
293 : default(primelimit, floor(limit));
294 : if(ar >= 1, ar=floor(ar));
295 : print("arena "ar"K => "gettime"ms");
296 : }
297 : */
298 : long
299 0 : set_optimize(long what, GEN g)
300 : {
301 0 : long ret = 0;
302 :
303 0 : switch (what) {
304 0 : case 1:
305 0 : ret = (long)cache_model.arena;
306 0 : break;
307 0 : case 2:
308 0 : ret = (long)(slow2_in_roots * 1000);
309 0 : break;
310 0 : case 3:
311 0 : ret = (long)(cache_model.power * 1000);
312 0 : break;
313 0 : case 4:
314 0 : ret = (long)(cache_model.cutoff * 1000);
315 0 : break;
316 0 : default:
317 0 : pari_err_BUG("set_optimize");
318 0 : break;
319 : }
320 0 : if (g != NULL) {
321 0 : ulong val = itou(g);
322 :
323 0 : switch (what) {
324 0 : case 1: cache_model.arena = val; break;
325 0 : case 2: slow2_in_roots = (double)val / 1000.; break;
326 0 : case 3: cache_model.power = (double)val / 1000.; break;
327 0 : case 4: cache_model.cutoff = (double)val / 1000.; break;
328 : }
329 0 : }
330 0 : return ret;
331 : }
332 :
333 : /* s is odd; prime differences (starting from 5-3=2) start at known_primes[2],
334 : terminated by a 0 byte. Checks n odd numbers starting at 'start', setting
335 : bytes starting at data to 0 (composite) or 1 (prime) */
336 : static void
337 6918 : sieve_chunk(byteptr known_primes, ulong s, byteptr data, ulong n)
338 : {
339 6918 : ulong p, cnt = n-1, start = s, delta = 1;
340 : byteptr q;
341 :
342 6918 : memset(data, 0, n);
343 6918 : start >>= 1; /* (start - 1)/2 */
344 6918 : start += n; /* Corresponds to the end */
345 : /* data corresponds to start, q runs over primediffs */
346 993864 : for (q = known_primes + 1, p = 3; delta; delta = *++q, p += delta)
347 : { /* first odd number >= start > p and divisible by p
348 : = last odd number <= start + 2p - 2 and 0 (mod p)
349 : = p + last number <= start + p - 2 and 0 (mod 2p)
350 : = p + start+p-2 - (start+p-2) % 2p
351 : = start + 2(p - 1 - ((start-1)/2 + (p-1)/2) % p). */
352 986946 : long off = cnt - ((start+(p>>1)) % p);
353 1576540030 : while (off >= 0) { data[off] = 1; off -= p; }
354 : }
355 6918 : }
356 :
357 : static void
358 1806 : set_prodprimes(void)
359 : {
360 1806 : pari_sp ltop = avma, av;
361 1806 : long m = expu(_maxprime) + 1 - 7;
362 1806 : GEN W, w, v = primes_interval_zv(3, _maxprime);
363 1806 : long s, j, jold, lv = lg(v), u = 1;
364 1806 : ulong b = 1UL << 8;
365 :
366 1806 : W = cgetg(m+1, t_VEC);
367 148137150 : for (jold = j = 1; j < lv; j++)
368 148135344 : if ((ulong)v[j] >= b)
369 : {
370 23478 : long lw = (j == lv-1? lv:j) - jold + 1;
371 23478 : w = v+jold-1; w[0] = evaltyp(t_VECSMALL) | _evallg(lw);
372 23478 : gel(W,u++) = zv_prod_Z(w); /* p_jold ... p_{j-1} */
373 23478 : jold = j; b *= 2;
374 23478 : if (b > _maxprime) b = _maxprime; /* truncate last run */
375 : }
376 1806 : m = u - 1; setlg(W, u);
377 23478 : for (j = 2; j <= m; j++) gel(W,j) = mulii(gel(W,j-1), gel(W,j));
378 1806 : s = gsizeword(W);
379 1806 : w = (GEN)pari_malloc(s*sizeof(long));
380 1806 : av = (pari_sp)(w + s);
381 1806 : _prodprimes_addr = w;
382 1806 : _prodprimes = gcopy_avma(W, &av);
383 1806 : set_avma(ltop);
384 1806 : }
385 :
386 : /* assume maxnum <= 436273289 < 2^29 */
387 : static void
388 1806 : initprimes0(ulong maxnum, long *lenp, ulong *lastp, byteptr p1)
389 : {
390 1806 : pari_sp av = avma, bot = pari_mainstack->bot;
391 : long alloced, psize;
392 : byteptr q, end, p, end1, plast, curdiff;
393 : ulong last, remains, curlow, rootnum, asize;
394 : ulong prime_above;
395 : byteptr p_prime_above;
396 :
397 1806 : maxnum |= 1; /* make it odd. */
398 : /* base case */
399 1806 : if (maxnum < 1ul<<17) { initprimes1(maxnum>>1, lenp, lastp, p1); return; }
400 :
401 : /* Checked to be enough up to 40e6, attained at 155893 */
402 1806 : rootnum = usqrt(maxnum) | 1;
403 1806 : initprimes1(rootnum>>1, &psize, &last, p1);
404 1806 : end1 = p1 + psize - 1;
405 1806 : remains = (maxnum - last) >> 1; /* number of odd numbers to check */
406 :
407 : /* we access primes array of psize too; but we access it consecutively,
408 : * thus we do not include it in fixed_to_cache */
409 1806 : asize = good_arena_size((ulong)(rootnum * slow2_in_roots), remains+1, 0,
410 : &cache_model) - 1;
411 : /* enough room on the stack ? */
412 1806 : alloced = (((byteptr)avma) <= ((byteptr)bot) + asize);
413 1806 : if (alloced)
414 0 : p = (byteptr)pari_malloc(asize+1);
415 : else
416 1806 : p = (byteptr)stack_malloc(asize+1);
417 1806 : end = p + asize; /* the 0 sentinel goes at end. */
418 1806 : curlow = last + 2; /* First candidate: know primes up to last (odd). */
419 1806 : curdiff = end1;
420 :
421 : /* During each iteration p..end-1 represents a range of odd
422 : numbers. plast is a pointer which represents the last prime seen,
423 : it may point before p..end-1. */
424 1806 : plast = p - 1;
425 1806 : p_prime_above = p1 + 2;
426 1806 : prime_above = 3;
427 8724 : while (remains)
428 : { /* cycle over arenas; performance not crucial */
429 : unsigned char was_delta;
430 6918 : if (asize > remains) { asize = remains; end = p + asize; }
431 : /* Fake the upper limit appropriate for the given arena */
432 315744 : while (prime_above*prime_above <= curlow + (asize << 1) && *p_prime_above)
433 308826 : prime_above += *p_prime_above++;
434 6918 : was_delta = *p_prime_above;
435 6918 : *p_prime_above = 0; /* sentinel for sieve_chunk */
436 6918 : sieve_chunk(p1, curlow, p, asize);
437 6918 : *p_prime_above = was_delta; /* restore */
438 :
439 6918 : p[asize] = 0; /* sentinel */
440 6918 : for (q = p; ; plast = q++)
441 : { /* q runs over addresses corresponding to primes */
442 945949986 : while (*q) q++; /* use sentinel at end */
443 147833436 : if (q >= end) break;
444 147826518 : *curdiff++ = (unsigned char)(q-plast) << 1; /* < 255 for q < 436273291 */
445 : }
446 6918 : plast -= asize;
447 6918 : remains -= asize;
448 6918 : curlow += (asize<<1);
449 : }
450 1806 : last = curlow - ((p - plast) << 1);
451 1806 : *curdiff++ = 0; /* sentinel */
452 1806 : *lenp = curdiff - p1;
453 1806 : *lastp = last;
454 1806 : if (alloced) pari_free(p); else set_avma(av);
455 : }
456 :
457 : ulong
458 48196957 : maxprime(void) { return diffptr ? _maxprime : 0; }
459 : ulong
460 0 : maxprimelim(void) { return diffptr ? _maxprimelim : 0; }
461 : ulong
462 434 : maxprimeN(void) { return diffptr ? diffptrlen-1: 0; }
463 : GEN
464 14300704 : prodprimes(void) { return diffptr ? _prodprimes: NULL; }
465 :
466 : void
467 0 : maxprime_check(ulong c) { if (_maxprime < c) pari_err_MAXPRIME(c); }
468 :
469 : /* We ensure 65302 <= maxnum <= 436273289: the LHS ensures modular function
470 : * have enough fast primes to work, the RHS ensures that p_{n+1} - p_n < 255
471 : * (N.B. RHS would be incorrect since initprimes0 would make it odd, thereby
472 : * increasing it by 1) */
473 : byteptr
474 1806 : initprimes(ulong maxnum, long *lenp, ulong *lastp)
475 : {
476 : byteptr t;
477 1806 : if (maxnum < 65537)
478 0 : maxnum = 65537;
479 1806 : else if (maxnum > 436273289)
480 0 : maxnum = 436273289;
481 1806 : t = (byteptr)pari_malloc((size_t) (1.09 * maxnum/log((double)maxnum)) + 146);
482 1806 : initprimes0(maxnum, lenp, lastp, t);
483 1806 : _maxprimelim = maxnum;
484 1806 : return (byteptr)pari_realloc(t, *lenp);
485 : }
486 :
487 : void
488 1806 : initprimetable(ulong maxnum)
489 : {
490 : long len;
491 : ulong last;
492 1806 : byteptr p = initprimes(maxnum, &len, &last), old = diffptr;
493 1806 : diffptrlen = minss(diffptrlen, len);
494 1806 : _maxprime = minss(_maxprime,last); /*Protect against ^C*/
495 1806 : diffptr = p; diffptrlen = len; _maxprime = last;
496 1806 : set_prodprimes();
497 1806 : if (old) free(old);
498 1806 : }
499 :
500 : /* all init_primepointer_xx routines set *ptr to the corresponding place
501 : * in prime table */
502 : /* smallest p >= a */
503 : ulong
504 0 : init_primepointer_geq(ulong a, byteptr *pd)
505 : {
506 : ulong n, p;
507 0 : prime_table_next_p(a, pd, &p, &n);
508 0 : return p;
509 : }
510 : /* largest p < a */
511 : ulong
512 17381474 : init_primepointer_lt(ulong a, byteptr *pd)
513 : {
514 : ulong n, p;
515 17381474 : prime_table_next_p(a, pd, &p, &n);
516 17380740 : PREC_PRIME_VIADIFF(p, *pd);
517 17380740 : return p;
518 : }
519 : /* largest p <= a */
520 : ulong
521 0 : init_primepointer_leq(ulong a, byteptr *pd)
522 : {
523 : ulong n, p;
524 0 : prime_table_next_p(a, pd, &p, &n);
525 0 : if (p != a) PREC_PRIME_VIADIFF(p, *pd);
526 0 : return p;
527 : }
528 : /* smallest p > a */
529 : ulong
530 0 : init_primepointer_gt(ulong a, byteptr *pd)
531 : {
532 : ulong n, p;
533 0 : prime_table_next_p(a, pd, &p, &n);
534 0 : if (p == a) NEXT_PRIME_VIADIFF(p, *pd);
535 0 : return p;
536 : }
537 :
538 : /**********************************************************************/
539 : /*** ***/
540 : /*** forprime ***/
541 : /*** ***/
542 : /**********************************************************************/
543 :
544 : /* return good chunk size for sieve, 16 | chunk + 2 */
545 : static ulong
546 5397563 : optimize_chunk(ulong a, ulong b)
547 : {
548 : /* TODO: Optimize size (surely < 512k to stay in L2 cache, but not so large
549 : * as to force recalculating too often). */
550 5397563 : ulong chunk = 0x80000UL;
551 5397563 : ulong tmp = (b - a) / chunk + 1;
552 :
553 5397563 : if (tmp == 1)
554 0 : chunk = b - a + 16;
555 : else
556 5397563 : chunk = (b - a) / tmp + 15;
557 : /* ensure 16 | chunk + 2 */
558 5397563 : return (((chunk + 2)>>4)<<4) - 2;
559 : }
560 : static void
561 5397584 : sieve_init(forprime_t *T, ulong a, ulong b)
562 : {
563 5397584 : T->sieveb = b;
564 5397584 : T->chunk = optimize_chunk(a, b);
565 : /* >> 1 [only odds] + 3 [convert from bits to bytes] */
566 5397603 : T->isieve = (unsigned char*)stack_malloc(((T->chunk+2) >> 4) + 1);
567 5397603 : T->cache[0] = 0;
568 5397603 : T->a = a;
569 5397603 : T->end = minuu(a + T->chunk, b);
570 5397596 : T->pos = T->maxpos = 0;
571 5397596 : }
572 :
573 : enum {PRST_none, PRST_diffptr, PRST_sieve, PRST_unextprime, PRST_nextprime};
574 :
575 : static void
576 18874049 : u_forprime_set_prime_table(forprime_t *T, ulong a)
577 : {
578 18874049 : T->strategy = PRST_diffptr;
579 18874049 : if (a < 3)
580 : {
581 1494331 : T->p = 0;
582 1494331 : T->d = diffptr;
583 : }
584 : else
585 17379718 : T->p = init_primepointer_lt(a, &T->d);
586 18875681 : }
587 :
588 : /* Set p so that p + q the smallest integer = c (mod q) and > original p.
589 : * Assume 0 < c < q. Set p = 0 on overflow */
590 : static void
591 101667 : arith_set(forprime_t *T)
592 : {
593 101667 : ulong r = T->p % T->q; /* 0 <= r <= min(p, q-1) */
594 101667 : pari_sp av = avma;
595 101667 : GEN d = adduu(T->p - r, T->c);
596 101667 : if (T->c > r) d = subiu(d, T->q);
597 : /* d = c mod q, d = c > r? p-r+c-q: p-r+c, so that
598 : * d <= p and d+q = c>r? p-r+c : p-r+c+q > p */
599 101667 : if (signe(d) <= 0) d = addiu(d, T->q); /* and now d > 0 */
600 101667 : T->p = itou_or_0(d); set_avma(av);
601 101667 : }
602 :
603 : /* run through primes in arithmetic progression = c (mod q) */
604 : static int
605 30600911 : u_forprime_sieve_arith_init(forprime_t *T, struct pari_sieve *psieve,
606 : ulong a, ulong b, ulong c, ulong q)
607 : {
608 : ulong maxp, maxp2;
609 30600911 : if (!odd(b) && b > 2) b--;
610 30600949 : if (a > b || b < 2)
611 : {
612 872040 : T->strategy = PRST_diffptr; /* paranoia */
613 872040 : T->p = 0; /* empty */
614 872040 : T->b = 0; /* empty */
615 872040 : T->d = diffptr;
616 872040 : return 0;
617 : }
618 29728909 : maxp = maxprime();
619 29728260 : if (q != 1)
620 : {
621 604913 : c %= q;
622 604913 : if (ugcd(c,q) != 1) { a = maxuu(a,c); b = minuu(b,c); }
623 604910 : if (odd(q) && (a > 2 || c != 2))
624 : { /* only *odd* primes. If a <= c = 2, then p = 2 must be included :-( */
625 528775 : if (!odd(c)) c += q;
626 528551 : q <<= 1;
627 : }
628 : }
629 29728023 : T->q = q;
630 29728023 : T->c = c;
631 29728023 : T->strategy = PRST_none; /* unknown */
632 29728023 : T->psieve = psieve; /* unused for now */
633 29728023 : T->isieve = NULL; /* unused for now */
634 29728023 : T->b = b;
635 29728023 : if (maxp >= b) { /* [a,b] \subset prime table */
636 16308127 : u_forprime_set_prime_table(T, a);
637 16308742 : return 1;
638 : }
639 : /* b > maxp */
640 13419896 : if (a >= maxp)
641 : {
642 10853746 : T->p = a - 1;
643 10853746 : if (T->q > 1) arith_set(T);
644 : }
645 : else
646 2566150 : u_forprime_set_prime_table(T, a);
647 :
648 13419958 : maxp2 = (maxp & HIGHMASK)? 0 : maxp*maxp;
649 : /* FIXME: should sieve as well if q != 1, adapt sieve code */
650 13419958 : if (q != 1 || (maxp2 && maxp2 <= a)
651 5586961 : || T->b - maxuu(a,maxp) < maxp / expu(b))
652 8022415 : { if (T->strategy==PRST_none) T->strategy = PRST_unextprime; }
653 : else
654 : { /* worth sieving */
655 : #ifdef LONG_IS_64BIT
656 3589650 : const ulong UPRIME_MAX = 18446744073709551557UL;
657 : #else
658 1807941 : const ulong UPRIME_MAX = 4294967291UL;
659 : #endif
660 : ulong sieveb;
661 5397591 : if (b > UPRIME_MAX) b = UPRIME_MAX;
662 5397591 : sieveb = b;
663 5397591 : if (maxp2 && maxp2 < b) sieveb = maxp2;
664 5397591 : if (T->strategy==PRST_none) T->strategy = PRST_sieve;
665 5397591 : sieve_init(T, maxuu(maxp+2, a), sieveb);
666 : }
667 13419960 : return 1;
668 : }
669 :
670 : int
671 27076407 : u_forprime_arith_init(forprime_t *T, ulong a, ulong b, ulong c, ulong q)
672 27076407 : { return u_forprime_sieve_arith_init(T, NULL, a, b, c, q); }
673 :
674 : /* will run through primes in [a,b] */
675 : int
676 26467240 : u_forprime_init(forprime_t *T, ulong a, ulong b)
677 26467240 : { return u_forprime_arith_init(T, a,b, 0,1); }
678 :
679 : /* will run through primes in [a,b] */
680 : static int
681 3518463 : u_forprime_sieve_init(forprime_t *T, struct pari_sieve *s, ulong b)
682 3518463 : { return u_forprime_sieve_arith_init(T, s, s->start, b, s->c, s->q); }
683 :
684 : /* now only run through primes <= c; assume c <= b above */
685 : void
686 63 : u_forprime_restrict(forprime_t *T, ulong c) { T->b = c; }
687 :
688 : /* b = NULL: loop forever */
689 : int
690 2168 : forprimestep_init(forprime_t *T, GEN a, GEN b, GEN q)
691 : {
692 : long lb;
693 2168 : a = gceil(a); if (typ(a) != t_INT) pari_err_TYPE("forprime_init",a);
694 2168 : if (signe(a) <= 0) a = gen_1;
695 2168 : if (b && typ(b) != t_INFINITY)
696 : {
697 789 : b = gfloor(b);
698 789 : if (typ(b) != t_INT) pari_err_TYPE("forprime_init",b);
699 789 : if (signe(b) < 0 || cmpii(a,b) > 0)
700 : {
701 7 : T->strategy = PRST_nextprime; /* paranoia */
702 7 : T->bb = T->pp = gen_0; return 0;
703 : }
704 782 : lb = lgefint(b);
705 782 : T->bb = b;
706 : }
707 1379 : else if (!b || inf_get_sign(b) > 0)
708 : {
709 1379 : lb = lgefint(a) + 4;
710 1379 : T->bb = NULL;
711 : }
712 : else /* b == -oo */
713 : {
714 0 : T->strategy = PRST_nextprime; /* paranoia */
715 0 : T->bb = T->pp = gen_0; return 0;
716 : }
717 2161 : T->pp = cgeti(lb);
718 2161 : T->c = 0;
719 2161 : T->q = 1;
720 : /* a, b are positive integers, a <= b */
721 2161 : if (q)
722 : {
723 98 : switch(typ(q))
724 : {
725 28 : case t_INT: break;
726 70 : case t_INTMOD: a = addii(a, modii(subii(gel(q,2),a), gel(q,1)));
727 70 : q = gel(q,1); break;
728 0 : default: pari_err_TYPE("forprimestep_init",q);
729 : }
730 98 : if (signe(q) <= 0) pari_err_TYPE("forprimestep_init (q <= 0)",q);
731 98 : if (equali1(q)) q = NULL;
732 : else
733 : {
734 98 : T->q = itou(q);
735 98 : T->c = umodiu(a, T->q);
736 : }
737 : }
738 2161 : if (lgefint(a) == 3) /* lb == 3 implies b != NULL */
739 2026 : return u_forprime_arith_init(T, uel(a,2), lb == 3? uel(b,2): ULONG_MAX,
740 : T->c, T->q);
741 135 : T->strategy = PRST_nextprime;
742 135 : affii(subiu(a,T->q), T->pp);
743 135 : return 1;
744 : }
745 : int
746 1336 : forprime_init(forprime_t *T, GEN a, GEN b)
747 1336 : { return forprimestep_init(T,a,b,NULL); }
748 :
749 : /* assume a <= b <= maxprime()^2, a,b odd, sieve[n] corresponds to
750 : * a+16*n, a+16*n+2, ..., a+16*n+14 (bits 0 to 7)
751 : * maxpos = index of last sieve cell.
752 : * b-a+2 must be divisible by 16 for use by u_forprime_next */
753 : static void
754 6538 : sieve_block(ulong a, ulong b, ulong maxpos, unsigned char* sieve)
755 : {
756 6538 : ulong p = 2, lim = usqrt(b), sz = (b-a) >> 1;
757 6538 : byteptr d = diffptr+1;
758 6538 : (void)memset(sieve, 0, maxpos+1);
759 : for (;;)
760 18551491 : { /* p is odd */
761 : ulong k, r;
762 18558029 : NEXT_PRIME_VIADIFF(p, d); /* starts at p = 3 */
763 18558029 : if (p > lim) break;
764 :
765 : /* solve a + 2k = 0 (mod p) */
766 18551491 : r = a % p;
767 18551491 : if (r == 0)
768 11354 : k = 0;
769 : else
770 : {
771 18540137 : k = p - r;
772 18540137 : if (odd(k)) k += p;
773 18540137 : k >>= 1;
774 : }
775 : /* m = a + 2k is the smallest odd m >= a, p | m */
776 : /* position n (corresponds to a+2n) is sieve[n>>3], bit n&7 */
777 4387591644 : while (k <= sz) { sieve[k>>3] |= 1 << (k&7); k += p; /* 2k += 2p */ }
778 : }
779 6538 : }
780 :
781 : static void
782 1806 : pari_sieve_init(struct pari_sieve *s, ulong a, ulong b)
783 : {
784 1806 : ulong maxpos= (b - a) >> 4;
785 1806 : s->start = a; s->end = b;
786 1806 : s->sieve = (unsigned char*) pari_malloc(maxpos+1);
787 1806 : s->c = 0; s->q = 1;
788 1806 : sieve_block(a, b, maxpos, s->sieve);
789 1806 : s->maxpos = maxpos; /* must be last in case of SIGINT */
790 1806 : }
791 :
792 : static struct pari_sieve pari_sieve_modular;
793 :
794 : #ifdef LONG_IS_64BIT
795 : #define PARI_MODULAR_BASE ((1UL<<((BITS_IN_LONG-2)>>1))+1)
796 : #else
797 : #define PARI_MODULAR_BASE ((1UL<<(BITS_IN_LONG-1))+1)
798 : #endif
799 :
800 : void
801 1806 : pari_init_primes(ulong maxprime)
802 : {
803 1806 : ulong a = PARI_MODULAR_BASE, b = a + (1UL<<20)-2;
804 1806 : initprimetable(maxprime);
805 1806 : pari_sieve_init(&pari_sieve_modular, a, b);
806 1806 : }
807 :
808 : void
809 1806 : pari_close_primes(void)
810 : {
811 1806 : if (diffptr)
812 : {
813 1806 : pari_free(diffptr);
814 1806 : pari_free(_prodprimes_addr);
815 : }
816 1806 : pari_free(pari_sieve_modular.sieve);
817 1806 : }
818 :
819 : void
820 2693765 : init_modular_small(forprime_t *S)
821 : {
822 : #ifdef LONG_IS_64BIT
823 2308898 : u_forprime_sieve_init(S, &pari_sieve_modular, ULONG_MAX);
824 : #else
825 384867 : ulong a = (1UL<<((BITS_IN_LONG-2)>>1))+1;
826 384867 : u_forprime_init(S, a, ULONG_MAX);
827 : #endif
828 2693756 : }
829 :
830 : void
831 8437595 : init_modular_big(forprime_t *S)
832 : {
833 : #ifdef LONG_IS_64BIT
834 7228029 : u_forprime_init(S, HIGHBIT + 1, ULONG_MAX);
835 : #else
836 1209566 : u_forprime_sieve_init(S, &pari_sieve_modular, ULONG_MAX);
837 : #endif
838 8437592 : }
839 :
840 : /* T->cache is a 0-terminated list of primes, return the first one and
841 : * remove it from list. Most of the time the list contains a single prime */
842 : static ulong
843 120001668 : shift_cache(forprime_t *T)
844 : {
845 : long i;
846 120001668 : T->p = T->cache[0];
847 160640122 : for (i = 1;; i++) /* remove one prime from cache */
848 160640122 : if (! (T->cache[i-1] = T->cache[i]) ) break;
849 120001668 : return T->p;
850 : }
851 :
852 : ulong
853 333849571 : u_forprime_next(forprime_t *T)
854 : {
855 333849571 : if (T->strategy == PRST_diffptr)
856 : {
857 : for(;;)
858 : {
859 359821441 : if (!*(T->d))
860 : {
861 2002 : T->strategy = T->isieve? PRST_sieve: PRST_unextprime;
862 2002 : if (T->q != 1) { arith_set(T); if (!T->p) return 0; }
863 : /* T->p possibly not a prime if q != 1 */
864 2002 : break;
865 : }
866 : else
867 : {
868 359819439 : NEXT_PRIME_VIADIFF(T->p, T->d);
869 359819439 : if (T->p > T->b) return 0;
870 359687391 : if (T->q == 1 || T->p % T->q == T->c) return T->p;
871 : }
872 : }
873 : }
874 132912923 : if (T->strategy == PRST_sieve)
875 : {
876 : ulong n;
877 120001879 : if (T->cache[0]) return shift_cache(T);
878 85594600 : NEXT_CHUNK:
879 85599332 : if (T->psieve)
880 : {
881 3518446 : T->sieve = T->psieve->sieve;
882 3518446 : T->end = T->psieve->end;
883 3518446 : if (T->end > T->sieveb) T->end = T->sieveb;
884 3518446 : T->maxpos = T->psieve->maxpos;
885 3518446 : T->pos = 0;
886 3518446 : T->psieve = NULL;
887 : }
888 130958245 : for (n = T->pos; n < T->maxpos; n++)
889 130950838 : if (T->sieve[n] != 0xFF)
890 : {
891 85591925 : unsigned char mask = T->sieve[n];
892 85591925 : ulong p = T->a + (n<<4);
893 85591925 : long i = 0;
894 85591925 : T->pos = n;
895 85591925 : if (!(mask & 1)) T->cache[i++] = p;
896 85591925 : if (!(mask & 2)) T->cache[i++] = p+2;
897 85591925 : if (!(mask & 4)) T->cache[i++] = p+4;
898 85591925 : if (!(mask & 8)) T->cache[i++] = p+6;
899 85591925 : if (!(mask & 16)) T->cache[i++] = p+8;
900 85591925 : if (!(mask & 32)) T->cache[i++] = p+10;
901 85591925 : if (!(mask & 64)) T->cache[i++] = p+12;
902 85591925 : if (!(mask &128)) T->cache[i++] = p+14;
903 85591925 : T->cache[i] = 0;
904 85591925 : T->pos = n+1;
905 85591925 : return shift_cache(T);
906 : }
907 : /* n = T->maxpos, last cell: check p <= b */
908 7407 : if (T->maxpos && n == T->maxpos && T->sieve[n] != 0xFF)
909 : {
910 2611 : unsigned char mask = T->sieve[n];
911 2611 : ulong p = T->a + (n<<4);
912 2611 : long i = 0;
913 2611 : T->pos = n;
914 2611 : if (!(mask & 1) && p <= T->sieveb) T->cache[i++] = p;
915 2611 : if (!(mask & 2) && p <= T->sieveb-2) T->cache[i++] = p+2;
916 2611 : if (!(mask & 4) && p <= T->sieveb-4) T->cache[i++] = p+4;
917 2611 : if (!(mask & 8) && p <= T->sieveb-6) T->cache[i++] = p+6;
918 2611 : if (!(mask & 16) && p <= T->sieveb-8) T->cache[i++] = p+8;
919 2611 : if (!(mask & 32) && p <= T->sieveb-10) T->cache[i++] = p+10;
920 2611 : if (!(mask & 64) && p <= T->sieveb-12) T->cache[i++] = p+12;
921 2611 : if (!(mask &128) && p <= T->sieveb-14) T->cache[i++] = p+14;
922 2611 : if (i)
923 : {
924 2457 : T->cache[i] = 0;
925 2457 : T->pos = n+1;
926 2457 : return shift_cache(T);
927 : }
928 : }
929 :
930 4950 : if (T->maxpos && T->end >= T->sieveb) /* done with sieves ? */
931 : {
932 217 : if (T->sieveb == T->b && T->b != ULONG_MAX) return 0;
933 1 : T->strategy = PRST_unextprime;
934 : }
935 : else
936 : { /* initialize next chunk */
937 4733 : T->sieve = T->isieve;
938 4733 : if (T->maxpos == 0)
939 1015 : T->a |= 1; /* first time; ensure odd */
940 : else
941 3718 : T->a = (T->end + 2) | 1;
942 4733 : T->end = T->a + T->chunk; /* may overflow */
943 4733 : if (T->end < T->a || T->end > T->sieveb) T->end = T->sieveb;
944 : /* end and a are odd; sieve[k] contains the a + 8*2k + (0,2,...,14).
945 : * The largest k is (end-a) >> 4 */
946 4733 : T->pos = 0;
947 4733 : T->maxpos = (T->end - T->a) >> 4;
948 4733 : sieve_block(T->a, T->end, T->maxpos, T->sieve);
949 4732 : goto NEXT_CHUNK;
950 : }
951 : }
952 12911045 : if (T->strategy == PRST_unextprime)
953 : {
954 12911104 : if (T->q == 1)
955 : {
956 : #ifdef LONG_IS_64BIT
957 12757007 : switch(T->p)
958 : {
959 : #define retp(x) return T->p = (HIGHBIT+x <= T->b)? HIGHBIT+x: 0
960 7227977 : case HIGHBIT: retp(29);
961 3161268 : case HIGHBIT + 29: retp(99);
962 336411 : case HIGHBIT + 99: retp(123);
963 190662 : case HIGHBIT +123: retp(131);
964 132156 : case HIGHBIT +131: retp(155);
965 111525 : case HIGHBIT +155: retp(255);
966 89676 : case HIGHBIT +255: retp(269);
967 80198 : case HIGHBIT +269: retp(359);
968 65357 : case HIGHBIT +359: retp(435);
969 57600 : case HIGHBIT +435: retp(449);
970 50720 : case HIGHBIT +449: retp(453);
971 46755 : case HIGHBIT +453: retp(485);
972 40422 : case HIGHBIT +485: retp(491);
973 37083 : case HIGHBIT +491: retp(543);
974 34077 : case HIGHBIT +543: retp(585);
975 31471 : case HIGHBIT +585: retp(599);
976 27510 : case HIGHBIT +599: retp(753);
977 26742 : case HIGHBIT +753: retp(849);
978 25794 : case HIGHBIT +849: retp(879);
979 24211 : case HIGHBIT +879: retp(885);
980 23500 : case HIGHBIT +885: retp(903);
981 23002 : case HIGHBIT +903: retp(995);
982 : #undef retp
983 : }
984 : #endif
985 913944 : T->p = unextprime(T->p + 1);
986 : }
987 : else do {
988 2798520 : T->p += T->q;
989 2798520 : if (T->p < T->q || T->p > T->b) { T->p = 0; break; } /* overflow */
990 2798494 : } while (!uisprime(T->p));
991 1067019 : if (T->p && T->p <= T->b) return T->p;
992 : /* overflow ulong, switch to GEN */
993 6634 : T->strategy = PRST_nextprime;
994 : }
995 6575 : return 0; /* overflow */
996 : }
997 :
998 : GEN
999 45025303 : forprime_next(forprime_t *T)
1000 : {
1001 : pari_sp av;
1002 : GEN p;
1003 45025303 : if (T->strategy != PRST_nextprime)
1004 : {
1005 45017501 : ulong u = u_forprime_next(T);
1006 45017501 : if (u) { affui(u, T->pp); return T->pp; }
1007 : /* failure */
1008 625 : if (T->strategy != PRST_nextprime) return NULL; /* we're done */
1009 : /* overflow ulong, switch to GEN */
1010 54 : u = ULONG_MAX;
1011 54 : if (T->q > 1) u -= (ULONG_MAX-T->c) % T->q;
1012 54 : affui(u, T->pp);
1013 : }
1014 7856 : av = avma; p = T->pp;
1015 7856 : if (T->q == 1)
1016 : {
1017 7755 : p = nextprime(addiu(p, 1));
1018 7755 : if (T->bb && abscmpii(p, T->bb) > 0) return gc_NULL(av);
1019 : } else do {
1020 3062 : p = addiu(p, T->q);
1021 3062 : if (T->bb && abscmpii(p, T->bb) > 0) return gc_NULL(av);
1022 3034 : } while (!BPSW_psp(p));
1023 7695 : affii(p, T->pp); return gc_const(av, T->pp);
1024 : }
1025 :
1026 : void
1027 812 : forprimestep(GEN a, GEN b, GEN q, GEN code)
1028 : {
1029 812 : pari_sp av = avma;
1030 : forprime_t T;
1031 :
1032 812 : if (!forprimestep_init(&T, a,b,q)) { set_avma(av); return; }
1033 :
1034 805 : push_lex(T.pp,code);
1035 37478 : while(forprime_next(&T))
1036 : {
1037 37086 : closure_evalvoid(code); if (loop_break()) break;
1038 : /* p changed in 'code', complain */
1039 36680 : if (get_lex(-1) != T.pp)
1040 7 : pari_err(e_MISC,"prime index read-only: was changed to %Ps", get_lex(-1));
1041 : }
1042 798 : pop_lex(1); set_avma(av);
1043 : }
1044 : void
1045 721 : forprime(GEN a, GEN b, GEN code) { return forprimestep(a,b,NULL,code); }
1046 :
1047 : int
1048 70 : forcomposite_init(forcomposite_t *C, GEN a, GEN b)
1049 : {
1050 70 : pari_sp av = avma;
1051 70 : a = gceil(a);
1052 70 : if (typ(a)!=t_INT) pari_err_TYPE("forcomposite",a);
1053 70 : if (b) {
1054 63 : if (typ(b) == t_INFINITY) b = NULL;
1055 : else
1056 : {
1057 56 : b = gfloor(b);
1058 56 : if (typ(b)!=t_INT) pari_err_TYPE("forcomposite",b);
1059 : }
1060 : }
1061 70 : if (signe(a) < 0) pari_err_DOMAIN("forcomposite", "a", "<", gen_0, a);
1062 70 : if (abscmpiu(a, 4) < 0) a = utoipos(4);
1063 70 : C->first = 1;
1064 70 : if (!forprime_init(&C->T, a,b) && cmpii(a,b) > 0)
1065 : {
1066 7 : C->n = gen_1; /* in case caller forgets to check the return value */
1067 7 : C->b = gen_0; return gc_bool(av,0);
1068 : }
1069 63 : C->n = setloop(a);
1070 63 : C->b = b;
1071 63 : C->p = NULL; return 1;
1072 : }
1073 :
1074 : GEN
1075 238 : forcomposite_next(forcomposite_t *C)
1076 : {
1077 238 : if (C->first) /* first call ever */
1078 : {
1079 63 : C->first = 0;
1080 63 : C->p = forprime_next(&C->T);
1081 : }
1082 : else
1083 175 : C->n = incloop(C->n);
1084 238 : if (C->p)
1085 : {
1086 161 : if (cmpii(C->n, C->p) < 0) return C->n;
1087 77 : C->n = incloop(C->n);
1088 : /* n = p+1 */
1089 77 : C->p = forprime_next(&C->T); /* nextprime(p) > n */
1090 77 : if (C->p) return C->n;
1091 : }
1092 105 : if (!C->b || cmpii(C->n, C->b) <= 0) return C->n;
1093 42 : return NULL;
1094 : }
1095 :
1096 : void
1097 70 : forcomposite(GEN a, GEN b, GEN code)
1098 : {
1099 70 : pari_sp av = avma;
1100 : forcomposite_t T;
1101 : GEN n;
1102 70 : if (!forcomposite_init(&T,a,b)) return;
1103 63 : push_lex(T.n,code);
1104 238 : while((n = forcomposite_next(&T)))
1105 : {
1106 196 : closure_evalvoid(code); if (loop_break()) break;
1107 : /* n changed in 'code', complain */
1108 182 : if (get_lex(-1) != n)
1109 7 : pari_err(e_MISC,"index read-only: was changed to %Ps", get_lex(-1));
1110 : }
1111 56 : pop_lex(1); set_avma(av);
1112 : }
|