Line data Source code
1 : /* Copyright (C) 2000 The PARI group.
2 :
3 : This file is part of the PARI/GP package.
4 :
5 : PARI/GP is free software; you can redistribute it and/or modify it under the
6 : terms of the GNU General Public License as published by the Free Software
7 : Foundation; either version 2 of the License, or (at your option) any later
8 : version. It is distributed in the hope that it will be useful, but WITHOUT
9 : ANY WARRANTY WHATSOEVER.
10 :
11 : Check the License for details. You should have received a copy of it, along
12 : with the package; see the file 'COPYING'. If not, write to the Free Software
13 : Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */
14 : #include "pari.h"
15 : #include "paripriv.h"
16 :
17 : #define DEBUGLEVEL DEBUGLEVEL_factorint
18 :
19 : /***********************************************************************/
20 : /** PRIMES IN SUCCESSION **/
21 : /***********************************************************************/
22 :
23 : /* map from prime residue classes mod 210 to their numbers in {0...47}.
24 : * Subscripts into this array take the form ((k-1)%210)/2, ranging from
25 : * 0 to 104. Unused entries are */
26 : #define NPRC 128 /* nonprime residue class */
27 :
28 : static unsigned char prc210_no[] = {
29 : 0, NPRC, NPRC, NPRC, NPRC, 1, 2, NPRC, 3, 4, NPRC, /* 21 */
30 : 5, NPRC, NPRC, 6, 7, NPRC, NPRC, 8, NPRC, 9, /* 41 */
31 : 10, NPRC, 11, NPRC, NPRC, 12, NPRC, NPRC, 13, 14, NPRC, /* 63 */
32 : NPRC, 15, NPRC, 16, 17, NPRC, NPRC, 18, NPRC, 19, /* 83 */
33 : NPRC, NPRC, 20, NPRC, NPRC, NPRC, 21, NPRC, 22, 23, NPRC, /* 105 */
34 : 24, 25, NPRC, 26, NPRC, NPRC, NPRC, 27, NPRC, NPRC, /* 125 */
35 : 28, NPRC, 29, NPRC, NPRC, 30, 31, NPRC, 32, NPRC, NPRC, /* 147 */
36 : 33, 34, NPRC, NPRC, 35, NPRC, NPRC, 36, NPRC, 37, /* 167 */
37 : 38, NPRC, 39, NPRC, NPRC, 40, 41, NPRC, NPRC, 42, NPRC, /* 189 */
38 : 43, 44, NPRC, 45, 46, NPRC, NPRC, NPRC, NPRC, 47, /* 209 */
39 : };
40 :
41 : /* first differences of the preceding */
42 : static unsigned char prc210_d1[] = {
43 : 10, 2, 4, 2, 4, 6, 2, 6, 4, 2, 4, 6, 6, 2, 6, 4, 2, 6,
44 : 4, 6, 8, 4, 2, 4, 2, 4, 8, 6, 4, 6, 2, 4, 6,
45 : 2, 6, 6, 4, 2, 4, 6, 2, 6, 4, 2, 4, 2, 10, 2,
46 : };
47 :
48 : static int
49 919287 : unextprime_overflow(ulong n)
50 : {
51 : #ifdef LONG_IS_64BIT
52 899897 : return (n > (ulong)-59);
53 : #else
54 19390 : return (n > (ulong)-5);
55 : #endif
56 : }
57 :
58 : /* return 0 for overflow */
59 : ulong
60 931273 : unextprime(ulong n)
61 : {
62 : long rc, rc0, rcn;
63 :
64 931273 : switch(n) {
65 6718 : case 0: case 1: case 2: return 2;
66 2434 : case 3: return 3;
67 1668 : case 4: case 5: return 5;
68 1162 : case 6: case 7: return 7;
69 : }
70 919291 : if (unextprime_overflow(n)) return 0;
71 : /* here n > 7 */
72 919255 : n |= 1; /* make it odd */
73 919255 : rc = rc0 = n % 210;
74 : /* find next prime residue class mod 210 */
75 : for(;;)
76 : {
77 1979797 : rcn = (long)(prc210_no[rc>>1]);
78 1979797 : if (rcn != NPRC) break;
79 1060542 : rc += 2; /* cannot wrap since 209 is coprime and rc odd */
80 : }
81 919255 : if (rc > rc0) n += rc - rc0;
82 : /* now find an actual (pseudo)prime */
83 : for(;;)
84 : {
85 9496958 : if (uisprime(n)) break;
86 8577703 : n += prc210_d1[rcn];
87 8577703 : if (++rcn > 47) rcn = 0;
88 : }
89 919272 : return n;
90 : }
91 :
92 : GEN
93 126839 : nextprime(GEN n)
94 : {
95 : long rc, rc0, rcn;
96 126839 : pari_sp av = avma;
97 :
98 126839 : if (typ(n) != t_INT)
99 : {
100 14 : n = gceil(n);
101 14 : if (typ(n) != t_INT) pari_err_TYPE("nextprime",n);
102 : }
103 126832 : if (signe(n) <= 0) { set_avma(av); return gen_2; }
104 126832 : if (lgefint(n) == 3)
105 : {
106 118696 : ulong k = unextprime(uel(n,2));
107 118696 : set_avma(av);
108 118696 : if (k) return utoipos(k);
109 : #ifdef LONG_IS_64BIT
110 6 : return uutoi(1,13);
111 : #else
112 1 : return uutoi(1,15);
113 : #endif
114 : }
115 : /* here n > 7 */
116 8136 : if (!mod2(n)) n = addui(1,n);
117 8136 : rc = rc0 = umodiu(n, 210);
118 : /* find next prime residue class mod 210 */
119 : for(;;)
120 : {
121 17707 : rcn = (long)(prc210_no[rc>>1]);
122 17707 : if (rcn != NPRC) break;
123 9571 : rc += 2; /* cannot wrap since 209 is coprime and rc odd */
124 : }
125 8136 : if (rc > rc0) n = addui(rc - rc0, n);
126 : /* now find an actual (pseudo)prime */
127 : for(;;)
128 : {
129 84490 : if (BPSW_psp(n)) break;
130 76354 : n = addui(prc210_d1[rcn], n);
131 76354 : if (++rcn > 47) rcn = 0;
132 : }
133 8136 : if (avma == av) return icopy(n);
134 8136 : return gerepileuptoint(av, n);
135 : }
136 :
137 : ulong
138 32 : uprecprime(ulong n)
139 : {
140 : long rc, rc0, rcn;
141 : { /* check if n <= 10 */
142 32 : if (n <= 1) return 0;
143 25 : if (n == 2) return 2;
144 18 : if (n <= 4) return 3;
145 18 : if (n <= 6) return 5;
146 18 : if (n <= 10) return 7;
147 : }
148 : /* here n >= 11 */
149 18 : if (!(n % 2)) n--;
150 18 : rc = rc0 = n % 210;
151 : /* find previous prime residue class mod 210 */
152 : for(;;)
153 : {
154 36 : rcn = (long)(prc210_no[rc>>1]);
155 36 : if (rcn != NPRC) break;
156 18 : rc -= 2; /* cannot wrap since 1 is coprime and rc odd */
157 : }
158 18 : if (rc < rc0) n += rc - rc0;
159 : /* now find an actual (pseudo)prime */
160 : for(;;)
161 : {
162 36 : if (uisprime(n)) break;
163 18 : if (--rcn < 0) rcn = 47;
164 18 : n -= prc210_d1[rcn];
165 : }
166 18 : return n;
167 : }
168 :
169 : GEN
170 49 : precprime(GEN n)
171 : {
172 : long rc, rc0, rcn;
173 49 : pari_sp av = avma;
174 :
175 49 : if (typ(n) != t_INT)
176 : {
177 14 : n = gfloor(n);
178 14 : if (typ(n) != t_INT) pari_err_TYPE("nextprime",n);
179 : }
180 42 : if (signe(n) <= 0) { set_avma(av); return gen_0; }
181 42 : if (lgefint(n) <= 3)
182 : {
183 32 : ulong k = uel(n,2);
184 32 : return gc_utoi(av, uprecprime(k));
185 : }
186 10 : if (!mod2(n)) n = subiu(n,1);
187 10 : rc = rc0 = umodiu(n, 210);
188 : /* find previous prime residue class mod 210 */
189 : for(;;)
190 : {
191 20 : rcn = (long)(prc210_no[rc>>1]);
192 20 : if (rcn != NPRC) break;
193 10 : rc -= 2; /* cannot wrap since 1 is coprime and rc odd */
194 : }
195 10 : if (rc0 > rc) n = subiu(n, rc0 - rc);
196 : /* now find an actual (pseudo)prime */
197 : for(;;)
198 : {
199 48 : if (BPSW_psp(n)) break;
200 38 : if (--rcn < 0) rcn = 47;
201 38 : n = subiu(n, prc210_d1[rcn]);
202 : }
203 10 : if (avma == av) return icopy(n);
204 10 : return gerepileuptoint(av, n);
205 : }
206 :
207 : /* Find next single-word prime strictly larger than p.
208 : * If **d is non-NULL (somewhere in a diffptr), this is p + *(*d)++;
209 : * otherwise imitate nextprime().
210 : * *rcn = NPRC or the correct residue class for the current p; we'll use this
211 : * to track the current prime residue class mod 210 once we're out of range of
212 : * the diffptr table, and we'll update it before that if it isn't NPRC.
213 : *
214 : * *q is incremented whenever q!=NULL and we wrap from 209 mod 210 to
215 : * 1 mod 210 */
216 : ulong
217 4634168 : snextpr(ulong p, byteptr *d, long *rcn, long *q, int (*ispsp)(ulong))
218 : {
219 4634168 : if (**d)
220 : {
221 4634168 : byteptr dd = *d;
222 4634168 : long d1 = 0;
223 :
224 4634168 : NEXT_PRIME_VIADIFF(d1,dd);
225 : /* d1 = nextprime(p+1) - p */
226 4634168 : if (*rcn != NPRC)
227 : {
228 16214716 : while (d1 > 0)
229 : {
230 11597848 : d1 -= prc210_d1[*rcn];
231 11597848 : if (++*rcn > 47) { *rcn = 0; if (q) (*q)++; }
232 : }
233 : /* assert(d1 == 0) */
234 : }
235 4634168 : NEXT_PRIME_VIADIFF(p,*d);
236 4634168 : return p;
237 : }
238 0 : if (unextprime_overflow(p)) pari_err_OVERFLOW("snextpr");
239 : /* we are beyond the diffptr table, initialize if needed */
240 0 : if (*rcn == NPRC) *rcn = prc210_no[(p % 210) >> 1]; /* != NPRC */
241 : /* look for the next one */
242 : do {
243 0 : p += prc210_d1[*rcn];
244 0 : if (++*rcn > 47) { *rcn = 0; if (q) (*q)++; }
245 0 : } while (!ispsp(p));
246 0 : return p;
247 : }
248 :
249 : /********************************************************************/
250 : /** **/
251 : /** INTEGER FACTORIZATION **/
252 : /** **/
253 : /********************************************************************/
254 : int factor_add_primes = 0, factor_proven = 0;
255 :
256 : /***********************************************************************/
257 : /** **/
258 : /** FACTORIZATION (ECM) -- GN Jul-Aug 1998 **/
259 : /** Integer factorization using the elliptic curves method (ECM). **/
260 : /** ellfacteur() returns a non trivial factor of N, assuming N>0, **/
261 : /** is composite, and has no prime divisor below 2^14 or so. **/
262 : /** Thanks to Paul Zimmermann for much helpful advice and to **/
263 : /** Guillaume Hanrot and Igor Schein for intensive testing **/
264 : /** **/
265 : /***********************************************************************/
266 : #define nbcmax 64 /* max number of simultaneous curves */
267 :
268 : static const ulong TB1[] = {
269 : 142,172,208,252,305,370,450,545,661,801,972,1180,1430,
270 : 1735,2100,2550,3090,3745,4540,5505,6675,8090,9810,11900,
271 : 14420,17490,21200,25700,31160,37780UL,45810UL,55550UL,67350UL,
272 : 81660UL,99010UL,120050UL,145550UL,176475UL,213970UL,259430UL,
273 : 314550UL,381380UL,462415UL,560660UL,679780UL,824220UL,999340UL,
274 : 1211670UL,1469110UL,1781250UL,2159700UL,2618600UL,3175000UL,
275 : 3849600UL,4667500UL,5659200UL,6861600UL,8319500UL,10087100UL,
276 : 12230300UL,14828900UL,17979600UL,21799700UL,26431500UL,
277 : 32047300UL,38856400UL, /* 110 times that still fits into 32bits */
278 : #ifdef LONG_IS_64BIT
279 : 47112200UL,57122100UL,69258800UL,83974200UL,101816200UL,
280 : 123449000UL,149678200UL,181480300UL,220039400UL,266791100UL,
281 : 323476100UL,392204900UL,475536500UL,576573500UL,699077800UL,
282 : 847610500UL,1027701900UL,1246057200UL,1510806400UL,1831806700UL,
283 : 2221009800UL,2692906700UL,3265067200UL,3958794400UL,4799917500UL
284 : #endif
285 : };
286 : static const ulong TB1_for_stage[] = {
287 : /* Start below the optimal B1 for finding factors which would just have been
288 : * missed by pollardbrent(), and escalate, changing curves to give good
289 : * coverage of the small factor ranges. Entries grow faster than what would
290 : * be optimal but a table instead of a 2D array keeps the code simple */
291 : 500,520,560,620,700,800,900,1000,1150,1300,1450,1600,1800,2000,
292 : 2200,2450,2700,2950,3250,3600,4000,4400,4850,5300,5800,6400,
293 : 7100,7850,8700,9600,10600,11700,12900,14200,15700,17300,
294 : 19000,21000,23200,25500,28000,31000,34500UL,38500UL,43000UL,
295 : 48000UL,53800UL,60400UL,67750UL,76000UL,85300UL,95700UL,
296 : 107400UL,120500UL,135400UL,152000UL,170800UL,191800UL,215400UL,
297 : 241800UL,271400UL,304500UL,341500UL,383100UL,429700UL,481900UL,
298 : 540400UL,606000UL,679500UL,761800UL,854100UL,957500UL,1073500UL
299 : };
300 :
301 : /* addition/doubling/multiplication of a point on an 'elliptic curve mod N'
302 : * may result in one of three things:
303 : * - a new bona fide point
304 : * - a point at infinity (denominator divisible by N)
305 : * - a point at infinity mod some p | N but finite mod q | N betraying itself
306 : * by a denominator which has nontrivial gcd with N.
307 : *
308 : * In the second case, addition/doubling aborts, copying one of the summands
309 : * to the destination array of points unless they coincide.
310 : * Multiplication will stop at some unpredictable intermediate stage: The
311 : * destination will contain _some_ multiple of the input point, but not
312 : * necessarily the desired one, which doesn't matter. As long as we're
313 : * multiplying (B1 phase) we simply carry on with the next multiplier.
314 : * During the B2 phase, the only additions are the giant steps, and the
315 : * worst that can happen here is that we lose one residue class mod 210
316 : * of prime multipliers on 4 of the curves, so again, we ignore the problem
317 : * and just carry on.)
318 : *
319 : * Idea: select nbc curves mod N and one point P on each of them. For each
320 : * such P, compute [M]P = Q where M is the product of all powers <= B2 of
321 : * primes <= nextprime(B1). Then check whether [p]Q for p < nextprime(B2)
322 : * betrays a factor. This second stage looks separately at the primes in
323 : * each residue class mod 210, four curves at a time, and steps additively
324 : * to ever larger multipliers, by comparing X coordinates of points which we
325 : * would need to add in order to reach another prime multiplier in the same
326 : * residue class. 'Comparing' means that we accumulate a product of
327 : * differences of X coordinates, and from time to time take a gcd of this
328 : * product with N. Montgomery's multi-inverse trick is used heavily. */
329 :
330 : /* *** auxiliary functions for ellfacteur: *** */
331 : /* (Rx,Ry) <- (Px,Py)+(Qx,Qy) over Z/NZ, z=1/(Px-Qx). If Ry = NULL, don't set */
332 : static void
333 8397084 : FpE_add_i(GEN N, GEN z, GEN Px, GEN Py, GEN Qx, GEN Qy, GEN *Rx, GEN *Ry)
334 : {
335 8397084 : GEN slope = modii(mulii(subii(Py, Qy), z), N);
336 8397084 : GEN t = subii(sqri(slope), addii(Qx, Px));
337 8397084 : affii(modii(t, N), *Rx);
338 8397084 : if (Ry) {
339 8322620 : t = subii(mulii(slope, subii(Px, *Rx)), Py);
340 8322620 : affii(modii(t, N), *Ry);
341 : }
342 8397084 : }
343 : /* X -> Z; cannot add on one of the curves: make sure Z contains
344 : * something useful before letting caller proceed */
345 : static void
346 28044 : ZV_aff(long n, GEN *X, GEN *Z)
347 : {
348 28044 : if (X != Z) {
349 : long k;
350 1601656 : for (k = n; k--; ) affii(X[k],Z[k]);
351 : }
352 28044 : }
353 :
354 : /* Parallel addition on nbc curves, assigning the result to locations at and
355 : * following *X3, *Y3. (If Y-coords of result not desired, set Y=NULL.)
356 : * Safe even if (X3,Y3) = (X2,Y2), _not_ if (X1,Y1). It is also safe to
357 : * overwrite Y2 with X3. If nbc1 < nbc, the first summand is
358 : * assumed to hold only nbc1 distinct points, repeated as often as we need
359 : * them (to add one point on each of a few curves to several other points on
360 : * the same curves): only used with nbc1 = nbc or nbc1 = 4 | nbc.
361 : *
362 : * Return 0 [SUCCESS], 1 [N | den], 2 [gcd(den, N) is a factor of N, preserved
363 : * in gl.
364 : * Stack space is bounded by a constant multiple of lgefint(N)*nbc:
365 : * - Phase 2 creates 12 items on the stack per iteration, of which 4 are twice
366 : * as long and 1 is thrice as long as N, i.e. 18 units per iteration.
367 : * - Phase 1 creates 4 units.
368 : * Total can be as large as 4*nbcmax + 18*8 units; ecm_elladd2() is
369 : * just as bad, and elldouble() comes to 3*nbcmax + 29*8 units. */
370 : static int
371 252408 : ecm_elladd0(GEN N, GEN *gl, long nbc, long nbc1,
372 : GEN *X1, GEN *Y1, GEN *X2, GEN *Y2, GEN *X3, GEN *Y3)
373 : {
374 252408 : const ulong mask = (nbc1 == 4)? 3: ~0UL; /*nbc1 = 4 or nbc*/
375 252408 : GEN W[2*nbcmax], *A = W+nbc; /* W[0],A[0] unused */
376 : long i;
377 252408 : pari_sp av = avma;
378 :
379 252408 : W[1] = subii(X1[0], X2[0]);
380 7932408 : for (i=1; i<nbc; i++)
381 : { /*prepare for multi-inverse*/
382 7680000 : A[i] = subii(X1[i&mask], X2[i]); /* don't waste time reducing mod N */
383 7680000 : W[i+1] = modii(mulii(A[i], W[i]), N);
384 : }
385 252408 : if (!invmod(W[nbc], N, gl))
386 : {
387 368 : if (!equalii(N,*gl)) return 2;
388 343 : ZV_aff(nbc, X2,X3);
389 343 : if (Y3) ZV_aff(nbc, Y2,Y3);
390 343 : return gc_int(av,1);
391 : }
392 :
393 7926252 : while (i--) /* nbc times */
394 : {
395 7926252 : pari_sp av2 = avma;
396 7926252 : GEN Px = X1[i&mask], Py = Y1[i&mask], Qx = X2[i], Qy = Y2[i];
397 7926252 : GEN z = i? mulii(*gl,W[i]): *gl; /*1/(Px-Qx)*/
398 7926252 : FpE_add_i(N,z, Px,Py,Qx,Qy, X3+i, Y3? Y3+i: NULL);
399 7926252 : if (!i) break;
400 7674212 : set_avma(av2); *gl = modii(mulii(*gl, A[i]), N);
401 : }
402 252040 : return gc_int(av,0);
403 : }
404 :
405 : /* Shortcut, for use in cases where Y coordinates follow their corresponding
406 : * X coordinates, and first summand doesn't need to be repeated */
407 : static int
408 246226 : ecm_elladd(GEN N, GEN *gl, long nbc, GEN *X1, GEN *X2, GEN *X3) {
409 246226 : return ecm_elladd0(N, gl, nbc, nbc, X1, X1+nbc, X2, X2+nbc, X3, X3+nbc);
410 : }
411 :
412 : /* As ecm_elladd except it does twice as many additions (and hides even more
413 : * of the cost of the modular inverse); the net effect is the same as
414 : * ecm_elladd(nbc,X1,X2,X3) && ecm_elladd(nbc,X4,X5,X6). Safe to
415 : * have X2=X3, X5=X6, or X1,X2 coincide with X4,X5 in any order. */
416 : static int
417 7474 : ecm_elladd2(GEN N, GEN *gl, long nbc,
418 : GEN *X1, GEN *X2, GEN *X3, GEN *X4, GEN *X5, GEN *X6)
419 : {
420 7474 : GEN *Y1 = X1+nbc, *Y2 = X2+nbc, *Y3 = X3+nbc;
421 7474 : GEN *Y4 = X4+nbc, *Y5 = X5+nbc, *Y6 = X6+nbc;
422 7474 : GEN W[4*nbcmax], *A = W+2*nbc; /* W[0],A[0] unused */
423 : long i, j;
424 7474 : pari_sp av = avma;
425 :
426 7474 : W[1] = subii(X1[0], X2[0]);
427 235472 : for (i=1; i<nbc; i++)
428 : {
429 227998 : A[i] = subii(X1[i], X2[i]); /* don't waste time reducing mod N here */
430 227998 : W[i+1] = modii(mulii(A[i], W[i]), N);
431 : }
432 242946 : for (j=0; j<nbc; i++,j++)
433 : {
434 235472 : A[i] = subii(X4[j], X5[j]);
435 235472 : W[i+1] = modii(mulii(A[i], W[i]), N);
436 : }
437 7474 : if (!invmod(W[2*nbc], N, gl))
438 : {
439 7 : if (!equalii(N,*gl)) return 2;
440 7 : ZV_aff(2*nbc, X2,X3); /* hack: 2*nbc => copy Y2->Y3 */
441 7 : ZV_aff(2*nbc, X5,X6); /* also copy Y5->Y6 */
442 7 : return gc_int(av,1);
443 : }
444 :
445 242883 : while (j--) /* nbc times */
446 : {
447 235416 : pari_sp av2 = avma;
448 235416 : GEN Px = X4[j], Py = Y4[j], Qx = X5[j], Qy = Y5[j];
449 235416 : GEN z = mulii(*gl,W[--i]); /*1/(Px-Qx)*/
450 235416 : FpE_add_i(N,z, Px,Py, Qx,Qy, X6+j,Y6+j);
451 235416 : set_avma(av2); *gl = modii(mulii(*gl, A[i]), N);
452 : }
453 235416 : while (i--) /* nbc times */
454 : {
455 235416 : pari_sp av2 = avma;
456 235416 : GEN Px = X1[i], Py = Y1[i], Qx = X2[i], Qy = Y2[i];
457 235416 : GEN z = i? mulii(*gl, W[i]): *gl; /*1/(Px-Qx)*/
458 235416 : FpE_add_i(N,z, Px,Py, Qx,Qy, X3+i,Y3+i);
459 235416 : if (!i) break;
460 227949 : set_avma(av2); *gl = modii(mulii(*gl, A[i]), N);
461 : }
462 7467 : return gc_int(av,0);
463 : }
464 :
465 : /* Parallel doubling on nbc curves, assigning the result to locations at
466 : * and following *X2. Safe to be called with X2 equal to X1. Return
467 : * value as for ecm_elladd. If we find a point at infinity mod N,
468 : * and if X1 != X2, we copy the points at X1 to X2. */
469 : static int
470 42656 : elldouble(GEN N, GEN *gl, long nbc, GEN *X1, GEN *X2)
471 : {
472 42656 : GEN *Y1 = X1+nbc, *Y2 = X2+nbc;
473 : GEN W[nbcmax+1]; /* W[0] unused */
474 : long i;
475 42656 : pari_sp av = avma;
476 42656 : /*W[0] = gen_1;*/ W[1] = Y1[0];
477 1254208 : for (i=1; i<nbc; i++) W[i+1] = modii(mulii(Y1[i], W[i]), N);
478 42656 : if (!invmod(W[nbc], N, gl))
479 : {
480 0 : if (!equalii(N,*gl)) return 2;
481 0 : ZV_aff(2*nbc,X1,X2); /* also copies Y1->Y2 */
482 0 : return gc_int(av,1);
483 : }
484 1296864 : while (i--) /* nbc times */
485 : {
486 : pari_sp av2;
487 1254208 : GEN v, w, L, z = i? mulii(*gl,W[i]): *gl;
488 1254208 : if (i) *gl = modii(mulii(*gl, Y1[i]), N);
489 1254208 : av2 = avma;
490 1254208 : L = modii(mulii(addui(1, mului(3, Fp_sqr(X1[i],N))), z), N);
491 1254208 : if (signe(L)) /* half of zero is still zero */
492 1254208 : L = shifti(mod2(L)? addii(L, N): L, -1);
493 1254208 : v = modii(subii(sqri(L), shifti(X1[i],1)), N);
494 1254208 : w = modii(subii(mulii(L, subii(X1[i], v)), Y1[i]), N);
495 1254208 : affii(v, X2[i]);
496 1254208 : affii(w, Y2[i]);
497 1254208 : set_avma(av2);
498 : }
499 42656 : return gc_int(av,0);
500 : }
501 :
502 : /* Parallel multiplication by an odd prime k on nbc curves, storing the
503 : * result to locations at and following *X2. Safe to be called with X2 = X1.
504 : * Return values as ecm_elladd. Uses (a simplified variant of) Montgomery's
505 : * PRAC algorithm; see ftp://ftp.cwi.nl/pub/pmontgom/Lucas.ps.gz .
506 : * With thanks to Paul Zimmermann for the reference. --GN1998Aug13 */
507 : static int
508 218303 : get_rule(ulong d, ulong e)
509 : {
510 218303 : if (d <= e + (e>>2)) /* floor(1.25*e) */
511 : {
512 17351 : if ((d+e)%3 == 0) return 0; /* rule 1 */
513 10383 : if ((d-e)%6 == 0) return 1; /* rule 2 */
514 : }
515 : /* d <= 4*e but no ofl */
516 211281 : if ((d+3)>>2 <= e) return 2; /* rule 3, common case */
517 12638 : if ((d&1)==(e&1)) return 1; /* rule 4 = rule 2 */
518 6561 : if (!(d&1)) return 3; /* rule 5 */
519 1839 : if (d%3 == 0) return 4; /* rule 6 */
520 424 : if ((d+e)%3 == 0) return 5; /* rule 7 */
521 0 : if ((d-e)%3 == 0) return 6; /* rule 8 */
522 : /* when we get here, e is even, otherwise one of rules 4,5 would apply */
523 0 : return 7; /* rule 9 */
524 : }
525 :
526 : /* PRAC implementation notes - main changes against the paper version:
527 : * (1) The general function [m+n]P = f([m]P,[n]P,[m-n]P) collapses (for m!=n)
528 : * to an ecm_elladd() which does not depend on the third argument; thus
529 : * references to the third variable (C in the paper) can be eliminated.
530 : * (2) Since our multipliers are prime, the outer loop of the paper
531 : * version executes only once, and thus is invisible above.
532 : * (3) The first step in the inner loop of the paper version will always be
533 : * rule 3, but the addition requested by this rule amounts to a doubling, and
534 : * will always be followed by a swap, so we have unrolled this first iteration.
535 : * (4) Simplifications in rules 6 and 7 are possible given the above, and we
536 : * save one addition in each of the two cases. NB none of the other
537 : * ecm_elladd()s in the loop can ever degenerate into an elldouble.
538 : * (5) I tried to optimize for rule 3, which is used more frequently than all
539 : * others together, but it didn't improve things, so I removed the nested
540 : * tight loop again. --GN */
541 : /* The main loop body of ellfacteur() runs _slower_ under PRAC than under a
542 : * straightforward left-shift binary multiplication when N has <30 digits and
543 : * B1 is small; PRAC wins when N and B1 get larger. Weird. --GN */
544 : /* k>2 assumed prime, XAUX = scratchpad */
545 : static int
546 27351 : ellmult(GEN N, GEN *gl, long nbc, ulong k, GEN *X1, GEN *X2, GEN *XAUX)
547 : {
548 : ulong r, d, e, e1;
549 : int res;
550 27351 : GEN *A = X2, *B = XAUX, *T = XAUX + 2*nbc;
551 :
552 27351 : ZV_aff(2*nbc,X1,XAUX);
553 : /* first doubling picks up X1; after this we'll be working in XAUX and
554 : * X2 only, mostly via A and B and T */
555 27351 : if ((res = elldouble(N, gl, nbc, X1, X2)) != 0) return res;
556 :
557 : /* split the work at the golden ratio */
558 27351 : r = (ulong)(k*0.61803398875 + .5);
559 27351 : d = k - r;
560 27351 : e = r - d; /* d+e == r, so no danger of ofl below */
561 245507 : while (d != e)
562 : { /* apply one of the nine transformations from PM's Table 4. */
563 218303 : switch(get_rule(d,e))
564 : {
565 6968 : case 0: /* rule 1 */
566 6968 : if ( (res = ecm_elladd(N, gl, nbc, A, B, T)) ) return res;
567 6961 : if ( (res = ecm_elladd2(N, gl, nbc, T, A, A, T, B, B)) != 0) return res;
568 6954 : e1 = d - e; d = (d + e1)/3; e = (e - e1)/3; break;
569 6131 : case 1: /* rules 2 and 4 */
570 6131 : if ( (res = ecm_elladd(N, gl, nbc, A, B, B)) ) return res;
571 6117 : if ( (res = elldouble(N, gl, nbc, A, A)) ) return res;
572 6117 : d = (d-e)>>1; break;
573 4722 : case 3: /* rule 5 */
574 4722 : if ( (res = elldouble(N, gl, nbc, A, A)) ) return res;
575 4722 : d >>= 1; break;
576 1415 : case 4: /* rule 6 */
577 1415 : if ( (res = elldouble(N, gl, nbc, A, T)) ) return res;
578 1415 : if ( (res = ecm_elladd(N, gl, nbc, T, A, A)) ) return res;
579 1415 : if ( (res = ecm_elladd(N, gl, nbc, A, B, B)) ) return res;
580 1415 : d = d/3 - e; break;
581 198643 : case 2: /* rule 3 */
582 198643 : if ( (res = ecm_elladd(N, gl, nbc, A, B, B)) ) return res;
583 198524 : d -= e; break;
584 424 : case 5: /* rule 7 */
585 424 : if ( (res = elldouble(N, gl, nbc, A, T)) ) return res;
586 424 : if ( (res = ecm_elladd2(N, gl, nbc, T, A, A, T, B, B)) != 0) return res;
587 424 : d = (d - 2*e)/3; break;
588 0 : case 6: /* rule 8 */
589 0 : if ( (res = ecm_elladd(N, gl, nbc, A, B, B)) ) return res;
590 0 : if ( (res = elldouble(N, gl, nbc, A, T)) ) return res;
591 0 : if ( (res = ecm_elladd(N, gl, nbc, T, A, A)) ) return res;
592 0 : d = (d - e)/3; break;
593 0 : case 7: /* rule 9 */
594 0 : if ( (res = elldouble(N, gl, nbc, B, B)) ) return res;
595 0 : e >>= 1; break;
596 : }
597 : /* swap d <-> e and A <-> B if necessary */
598 218156 : if (d < e) { lswap(d,e); pswap(A,B); }
599 : }
600 27204 : return ecm_elladd(N, gl, nbc, XAUX, X2, X2);
601 : }
602 :
603 : struct ECM {
604 : pari_timer T;
605 : long nbc, nbc2, seed;
606 : GEN *X, *XAUX, *XT, *XD, *XB, *XB2, *XH, *Xh, *Yh;
607 : };
608 :
609 : /* memory layout in ellfacteur(): a large array of GEN pointers, and one
610 : * huge chunk of memory containing all the actual GEN (t_INT) objects.
611 : * nbc is constant throughout the invocation:
612 : * - The B1 stage of each iteration through the main loop needs little
613 : * space: enough for the X and Y coordinates of the current points,
614 : * and twice as much again as scratchpad for ellmult().
615 : * - The B2 stage, starting from some current set of points Q, needs, in
616 : * succession:
617 : * + space for [2]Q, [4]Q, ..., [10]Q, and [p]Q for building the helix;
618 : * + space for 48*nbc X and Y coordinates to hold the helix. This could
619 : * re-use [2]Q,...,[8]Q, but only with difficulty, since we don't
620 : * know in advance which residue class mod 210 our p is going to be in.
621 : * It can and should re-use [p]Q, though;
622 : * + space for (temporarily [30]Q and then) [210]Q, [420]Q, and several
623 : * further doublings until the giant step multiplier is reached. This
624 : * can re-use the remaining cells from above. The computation of [210]Q
625 : * will have been the last call to ellmult() within this iteration of the
626 : * main loop, so the scratchpad is now also free to be re-used. We also
627 : * compute [630]Q by a parallel addition; we'll need it later to get the
628 : * baby-step table bootstrapped a little faster.
629 : * + Finally, for no more than 4 curves at a time, room for up to 1024 X
630 : * coordinates only: the Y coordinates needed whilst setting up this baby
631 : * step table are temporarily stored in the upper half, and overwritten
632 : * during the last series of additions.
633 : *
634 : * Graphically: after end of B1 stage (X,Y are the coords of Q):
635 : * +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--
636 : * | X Y | scratch | [2]Q| [4]Q| [6]Q| [8]Q|[10]Q| ... | ...
637 : * +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--
638 : * *X *XAUX *XT *XD *XB
639 : *
640 : * [30]Q is computed from [10]Q. [210]Q can go into XY, etc:
641 : * +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--
642 : * |[210]|[420]|[630]|[840]|[1680,3360,6720,...,2048*210] |bstp table...
643 : * +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--
644 : * *X *XAUX *XT *XD [*XG, somewhere here] *XB .... *XH
645 : *
646 : * So we need (13 + 48) * 2 * nbc slots here + 4096 slots for the baby step
647 : * table (not all of which will be used when we start with a small B1, but
648 : * better to allocate and initialize ahead of time all the slots that might
649 : * be needed later).
650 : *
651 : * Note on memory locality: During the B2 phase, accesses to the helix
652 : * (once it is set up) will be clustered by curves (4 out of nbc at a time).
653 : * Accesses to the baby steps table will wander from one end of the array to
654 : * the other and back, one such cycle per giant step, and during a full cycle
655 : * we would expect on the order of 2E4 accesses when using the largest giant
656 : * step size. Thus we shouldn't be doing too bad with respect to thrashing
657 : * a 512KBy L2 cache. However, we don't want the baby step table to grow
658 : * larger than this, even if it would reduce the number of EC operations by a
659 : * few more per cent for very large B2, lest cache thrashing slow down
660 : * everything disproportionally. --GN */
661 : /* Auxiliary routines need < (3*nbc+240)*lN words on the PARI stack, in
662 : * addition to the spc*(lN+1) words occupied by our main table. */
663 : static void
664 71 : ECM_alloc(struct ECM *E, long lN)
665 : {
666 71 : const long bstpmax = 1024; /* max number of baby step table entries */
667 71 : long spc = (13 + 48) * E->nbc2 + bstpmax * 4;
668 71 : long len = spc + 385 + spc*lN;
669 71 : long i, tw = evallg(lN) | evaltyp(t_INT);
670 71 : GEN w, *X = (GEN*)new_chunk(len);
671 : /* hack for X[i] = cgeti(lN). X = current point in B1 phase */
672 71 : w = (GEN)(X + spc + 385);
673 448023 : for (i = spc-1; i >= 0; i--) { X[i] = w; *w = tw; w += lN; }
674 71 : E->X = X;
675 71 : E->XAUX = E->X + E->nbc2; /* scratchpad for ellmult() */
676 71 : E->XT = E->XAUX + E->nbc2; /* ditto, will later hold [3*210]Q */
677 71 : E->XD = E->XT + E->nbc2; /* room for various multiples */
678 71 : E->XB = E->XD + 10*E->nbc2; /* start of baby steps table */
679 71 : E->XB2 = E->XB + 2 * bstpmax; /* middle of baby steps table */
680 71 : E->XH = E->XB2 + 2 * bstpmax; /* end of bstps table, start of helix */
681 71 : E->Xh = E->XH + 48*E->nbc2; /* little helix, X coords */
682 71 : E->Yh = E->XH + 192; /* ditto, Y coords */
683 : /* XG,YG set inside the main loop, since they depend on B2 */
684 : /* E.Xh range of 384 pointers not set; these will later duplicate the pointers
685 : * in the E.XH range, 4 curves at a time. Some of the cells reserved here for
686 : * the E.XB range will never be used, instead, we'll warp the pointers to
687 : * connect to (read-only) GENs in the X/E.XD range */
688 71 : }
689 : /* N.B. E->seed is not initialized here */
690 : static void
691 71 : ECM_init(struct ECM *E, GEN N, long nbc)
692 : {
693 71 : if (nbc < 0)
694 : { /* choose a sensible default */
695 64 : const long size = expi(N) + 1;
696 64 : nbc = ((size >> 3) << 2) - 80;
697 64 : if (nbc < 8) nbc = 8;
698 : }
699 71 : if (nbc > nbcmax) nbc = nbcmax;
700 71 : E->nbc = nbc;
701 71 : E->nbc2 = nbc << 1;
702 71 : ECM_alloc(E, lgefint(N));
703 71 : }
704 :
705 : static GEN
706 114 : ECM_loop(struct ECM *E, GEN N, ulong B1)
707 : {
708 114 : const ulong B2 = 110 * B1, B2_rt = usqrt(B2);
709 114 : const ulong nbc = E->nbc, nbc2 = E->nbc2;
710 : pari_sp av1, avtmp;
711 114 : byteptr d0, d = diffptr;
712 : long i, gse, gss, bstp, bstp0, rcn0, rcn;
713 : ulong B2_p, m, p, p0;
714 : GEN g, *XG, *YG;
715 114 : GEN *X = E->X, *XAUX = E->XAUX, *XT = E->XT, *XD = E->XD;
716 114 : GEN *XB = E->XB, *XB2 = E->XB2, *XH = E->XH, *Xh = E->Xh, *Yh = E->Yh;
717 : /* pick curves */
718 4818 : for (i = nbc2; i--; ) affui(E->seed++, X[i]);
719 : /* pick giant step exponent and size */
720 114 : gse = B1 < 656
721 : ? (B1 < 200? 5: 6)
722 114 : : (B1 < 10500
723 : ? (B1 < 2625? 7: 8)
724 : : (B1 < 42000? 9: 10));
725 114 : gss = 1UL << gse;
726 : /* With 32 baby steps, a giant step corresponds to 32*420 = 13440,
727 : * appropriate for the smallest B2s. With 1024, a giant step will be 430080;
728 : * appropriate for B1 >~ 42000, where 512 baby steps would imply roughly
729 : * the same number of curve additions. */
730 114 : XG = XT + gse*nbc2; /* will later hold [2^(gse+1)*210]Q */
731 114 : YG = XG + nbc;
732 :
733 114 : if (DEBUGLEVEL >= 4) {
734 0 : err_printf("ECM: time = %6ld ms\nECM: B1 = %4lu,", timer_delay(&E->T), B1);
735 0 : err_printf("\tB2 = %6lu,\tgss = %4ld*420\n", B2, gss);
736 : }
737 114 : p = 0;
738 114 : NEXT_PRIME_VIADIFF(p,d);
739 :
740 : /* ---B1 PHASE--- */
741 : /* treat p=2 separately */
742 114 : B2_p = B2 >> 1;
743 1911 : for (m=1; m<=B2_p; m<<=1)
744 : {
745 1797 : int fl = elldouble(N, &g, nbc, X, X);
746 1797 : if (fl > 1) return g; else if (fl) break;
747 : }
748 114 : rcn = NPRC; /* multipliers begin at the beginning */
749 : /* p=3,...,nextprime(B1) */
750 7133 : while (p < B1 && p <= B2_rt)
751 : {
752 7026 : pari_sp av2 = avma;
753 7026 : p = snextpr(p, &d, &rcn, NULL, uisprime);
754 7026 : B2_p = B2/p; /* beware integer overflow on 32-bit CPUs */
755 23792 : for (m=1; m<=B2_p; m*=p)
756 : {
757 16899 : int fl = ellmult(N, &g, nbc, p, X, X, XAUX);
758 16899 : if (fl > 1) return g; else if (fl) break;
759 16766 : set_avma(av2);
760 : }
761 7019 : set_avma(av2);
762 : }
763 : /* primes p larger than sqrt(B2) appear only to the 1st power */
764 10274 : while (p < B1)
765 : {
766 10185 : pari_sp av2 = avma;
767 10185 : p = snextpr(p, &d, &rcn, NULL, uisprime);
768 10185 : if (ellmult(N, &g, nbc, p, X, X, XAUX) > 1) return g;
769 10167 : set_avma(av2);
770 : }
771 89 : if (DEBUGLEVEL >= 4) {
772 0 : err_printf("ECM: time = %6ld ms, B1 phase done, ", timer_delay(&E->T));
773 0 : err_printf("p = %lu, setting up for B2\n", p);
774 : }
775 :
776 : /* ---B2 PHASE--- */
777 : /* compute [2]Q,...,[10]Q, needed to build the helix */
778 89 : if (elldouble(N, &g, nbc, X, XD) > 1) return g; /*[2]Q*/
779 89 : if (elldouble(N, &g, nbc, XD, XD + nbc2) > 1) return g; /*[4]Q*/
780 89 : if (ecm_elladd(N, &g, nbc,
781 89 : XD, XD + nbc2, XD + (nbc<<2)) > 1) return g; /* [6]Q */
782 89 : if (ecm_elladd2(N, &g, nbc,
783 : XD, XD + (nbc<<2), XT + (nbc<<3),
784 89 : XD + nbc2, XD + (nbc<<2), XD + (nbc<<3)) > 1)
785 0 : return g; /* [8]Q and [10]Q */
786 89 : if (DEBUGLEVEL >= 7) err_printf("\t(got [2]Q...[10]Q)\n");
787 :
788 : /* get next prime (still using the foolproof test) */
789 89 : p = snextpr(p, &d, &rcn, NULL, uisprime);
790 : /* make sure we have the residue class number (mod 210) */
791 89 : if (rcn == NPRC)
792 : {
793 89 : rcn = prc210_no[(p % 210) >> 1];
794 89 : if (rcn == NPRC)
795 : {
796 0 : err_printf("ECM: %lu should have been prime but isn\'t\n", p);
797 0 : pari_err_BUG("ellfacteur");
798 : }
799 : }
800 :
801 : /* compute [p]Q and put it into its place in the helix */
802 89 : if (ellmult(N, &g, nbc, p, X, XH + rcn*nbc2, XAUX) > 1)
803 0 : return g;
804 89 : if (DEBUGLEVEL >= 7)
805 0 : err_printf("\t(got [p]Q, p = %lu = prc210_rp[%ld] mod 210)\n", p, rcn);
806 :
807 : /* save current p, d, and rcn; we'll need them more than once below */
808 89 : p0 = p;
809 89 : d0 = d;
810 89 : rcn0 = rcn; /* remember where the helix wraps */
811 89 : bstp0 = 0; /* p is at baby-step offset 0 from itself */
812 :
813 : /* fill up the helix, stepping forward through the prime residue classes
814 : * mod 210 until we're back at the r'class of p0. Keep updating p so
815 : * that we can print meaningful diagnostics if a factor shows up; don't
816 : * bother checking which of these p's are in fact prime */
817 4272 : for (i = 47; i; i--) /* 47 iterations */
818 : {
819 4183 : ulong dp = (ulong)prc210_d1[rcn];
820 4183 : p += dp;
821 4183 : if (rcn == 47)
822 : { /* wrap mod 210 */
823 89 : if (ecm_elladd(N, &g, nbc, XT+dp*nbc, XH+rcn*nbc2, XH) > 1) return g;
824 89 : rcn = 0; continue;
825 : }
826 4094 : if (ecm_elladd(N, &g, nbc, XT+dp*nbc, XH+rcn*nbc2, XH+rcn*nbc2+nbc2) > 1)
827 0 : return g;
828 4094 : rcn++;
829 : }
830 89 : if (DEBUGLEVEL >= 7) err_printf("\t(got initial helix)\n");
831 : /* compute [210]Q etc, needed for the baby step table */
832 89 : if (ellmult(N, &g, nbc, 3, XD + (nbc<<3), X, XAUX) > 1) return g;
833 89 : if (ellmult(N, &g, nbc, 7, X, X, XAUX) > 1) return g; /* [210]Q */
834 : /* this was the last call to ellmult() in the main loop body; may now
835 : * overwrite XAUX and slots XD and following */
836 89 : if (elldouble(N, &g, nbc, X, XAUX) > 1) return g; /* [420]Q */
837 89 : if (ecm_elladd(N, &g, nbc, X, XAUX, XT) > 1) return g;/*[630]Q*/
838 89 : if (ecm_elladd(N, &g, nbc, X, XT, XD) > 1) return g; /*[840]Q*/
839 652 : for (i=1; i <= gse; i++)
840 563 : if (elldouble(N, &g, nbc, XT + i*nbc2, XD + i*nbc2) > 1) return g;
841 : /* (the last iteration has initialized XG to [210*2^(gse+1)]Q) */
842 :
843 89 : if (DEBUGLEVEL >= 4)
844 0 : err_printf("ECM: time = %6ld ms, entering B2 phase, p = %lu\n",
845 : timer_delay(&E->T), p);
846 :
847 433 : for (i = nbc - 4; i >= 0; i -= 4)
848 : { /* loop over small sets of 4 curves at a time */
849 : GEN *Xb;
850 : long j, k;
851 351 : if (DEBUGLEVEL >= 6)
852 0 : err_printf("ECM: finishing curves %ld...%ld\n", i, i+3);
853 : /* Copy relevant pointers from XH to Xh. Memory layout in XH:
854 : * nbc X coordinates, nbc Y coordinates for residue class
855 : * 1 mod 210, then the same for r.c. 11 mod 210, etc. Memory layout for
856 : * Xh is: four X coords for 1 mod 210, four for 11 mod 210, ..., four
857 : * for 209 mod 210, then the corresponding Y coordinates in the same
858 : * order. This allows a giant step on Xh using just three calls to
859 : * ecm_elladd0() each acting on 64 points in parallel */
860 17199 : for (j = 48; j--; )
861 : {
862 16848 : k = nbc2*j + i;
863 16848 : m = j << 2; /* X coordinates */
864 16848 : Xh[m] = XH[k]; Xh[m+1] = XH[k+1];
865 16848 : Xh[m+2] = XH[k+2]; Xh[m+3] = XH[k+3];
866 16848 : k += nbc; /* Y coordinates */
867 16848 : Yh[m] = XH[k]; Yh[m+1] = XH[k+1];
868 16848 : Yh[m+2] = XH[k+2]; Yh[m+3] = XH[k+3];
869 : }
870 : /* Build baby step table of X coords of multiples of [210]Q. XB[4*j]
871 : * will point at X coords on four curves from [(j+1)*210]Q. Until
872 : * we're done, we need some Y coords as well, which we keep in the
873 : * second half of the table, overwriting them at the end when gse=10.
874 : * Multiples which we already have (by 1,2,3,4,8,16,...,2^gse) are
875 : * entered simply by copying the pointers, ignoring the few slots in w
876 : * that were initially reserved for them. Here are the initial entries */
877 1053 : for (Xb=XB,k=2,j=i; k--; Xb=XB2,j+=nbc) /* first X, then Y coords */
878 : {
879 702 : Xb[0] = X[j]; Xb[1] = X[j+1]; /* [210]Q */
880 702 : Xb[2] = X[j+2]; Xb[3] = X[j+3];
881 702 : Xb[4] = XAUX[j]; Xb[5] = XAUX[j+1]; /* [420]Q */
882 702 : Xb[6] = XAUX[j+2]; Xb[7] = XAUX[j+3];
883 702 : Xb[8] = XT[j]; Xb[9] = XT[j+1]; /* [630]Q */
884 702 : Xb[10] = XT[j+2]; Xb[11] = XT[j+3];
885 702 : Xb += 4; /* points at [420]Q */
886 : /* ... entries at powers of 2 times 210 .... */
887 4337 : for (m = 2; m < (ulong)gse+k; m++) /* omit Y coords of [2^gse*210]Q */
888 : {
889 3635 : long m2 = m*nbc2 + j;
890 3635 : Xb += (2UL<<m); /* points at [2^m*210]Q */
891 3635 : Xb[0] = XAUX[m2]; Xb[1] = XAUX[m2+1];
892 3635 : Xb[2] = XAUX[m2+2]; Xb[3] = XAUX[m2+3];
893 : }
894 : }
895 351 : if (DEBUGLEVEL >= 7)
896 0 : err_printf("\t(extracted precomputed helix / baby step entries)\n");
897 : /* ... glue in between, up to 16*210 ... */
898 351 : if (ecm_elladd0(N, &g, 12, 4, /* 12 pts + (4 pts replicated thrice) */
899 : XB + 12, XB2 + 12,
900 : XB, XB2,
901 0 : XB + 16, XB2 + 16) > 1) return g; /*4+{1,2,3} = {5,6,7}*/
902 351 : if (ecm_elladd0(N, &g, 28, 4, /* 28 pts + (4 pts replicated 7fold) */
903 : XB + 28, XB2 + 28,
904 : XB, XB2,
905 0 : XB + 32, XB2 + 32) > 1) return g;/*8+{1...7} = {9...15}*/
906 : /* ... and the remainder of the lot */
907 1291 : for (m = 5; m <= (ulong)gse; m++)
908 : { /* fill in from 2^(m-1)+1 to 2^m-1 in chunks of 64 and 60 points */
909 940 : ulong m2 = 2UL << m; /* will point at 2^(m-1)+1 */
910 2033 : for (j = 0; (ulong)j < m2-64; j+=64) /* executed 0 times when m = 5 */
911 : {
912 1934 : if (ecm_elladd0(N, &g, 64, 4,
913 1093 : XB + m2-4, XB2 + m2-4,
914 1093 : XB + j, XB2 + j,
915 1934 : XB + m2+j, (m<(ulong)gse? XB2+m2+j: NULL)) > 1)
916 0 : return g;
917 : } /* j = m2-64 here, 60 points left */
918 1291 : if (ecm_elladd0(N, &g, 60, 4,
919 940 : XB + m2-4, XB2 + m2-4,
920 940 : XB + j, XB2 + j,
921 1291 : XB + m2+j, (m<(ulong)gse? XB2+m2+j: NULL)) > 1)
922 0 : return g;
923 : /* when m=gse, drop Y coords of result, and when both equal 1024,
924 : * overwrite Y coords of second argument with X coords of result */
925 : }
926 351 : if (DEBUGLEVEL >= 7) err_printf("\t(baby step table complete)\n");
927 : /* initialize a few other things */
928 351 : bstp = bstp0;
929 351 : p = p0; d = d0; rcn = rcn0;
930 351 : g = gen_1; av1 = avma;
931 : /* scratchspace for prod (x_i-x_j) */
932 351 : avtmp = (pari_sp)new_chunk(8 * lgefint(N));
933 : /* The correct entry in XB to use depends on bstp and on where we are
934 : * on the helix. As we skip from prime to prime, bstp is incremented
935 : * by snextpr each time we wrap around through residue class number 0
936 : * (1 mod 210), but the baby step should not be taken until rcn>=rcn0,
937 : * i.e. until we pass again the residue class of p0.
938 : *
939 : * The correct signed multiplier is thus k = bstp - (rcn < rcn0),
940 : * and the offset from XB is four times (|k| - 1). When k=0, we ignore
941 : * the current prime: if it had led to a factorization, this
942 : * would have been noted during the last giant step, or -- when we
943 : * first get here -- whilst initializing the helix. When k > gss,
944 : * we must do a giant step and bump bstp back by -2*gss.
945 : *
946 : * The gcd of the product of X coord differences against N is taken just
947 : * before we do a giant step. */
948 4617212 : while (p < B2)
949 : {/* loop over probable primes p0 < p <= nextprime(B2), inserting giant
950 : * steps as necessary */
951 4616868 : p = snextpr(p, &d, &rcn, &bstp, uis2psp); /* next probable prime */
952 : /* work out the corresponding baby-step multiplier */
953 4616868 : k = bstp - (rcn < rcn0 ? 1 : 0);
954 4616868 : if (k > gss)
955 : { /* giant-step time, take gcd */
956 1156 : g = gcdii(g, N);
957 1156 : if (!is_pm1(g) && !equalii(g, N)) return g;
958 1149 : g = gen_1; set_avma(av1);
959 2298 : while (k > gss)
960 : { /* giant step */
961 1149 : if (DEBUGLEVEL >= 7) err_printf("\t(giant step at p = %lu)\n", p);
962 1149 : if (ecm_elladd0(N, &g, 64, 4, XG + i, YG + i,
963 0 : Xh, Yh, Xh, Yh) > 1) return g;
964 1149 : if (ecm_elladd0(N, &g, 64, 4, XG + i, YG + i,
965 : Xh + 64, Yh + 64, Xh + 64, Yh + 64) > 1)
966 0 : return g;
967 1149 : if (ecm_elladd0(N, &g, 64, 4, XG + i, YG + i,
968 : Xh + 128, Yh + 128, Xh + 128, Yh + 128) > 1)
969 0 : return g;
970 1149 : bstp -= (gss << 1);
971 1149 : k = bstp - (rcn < rcn0? 1: 0); /* recompute multiplier */
972 : }
973 : }
974 4616861 : if (!k) continue; /* point of interest is already in Xh */
975 4590320 : if (k < 0) k = -k;
976 4590320 : m = ((ulong)k - 1) << 2;
977 : /* accumulate product of differences of X coordinates */
978 4590320 : j = rcn<<2;
979 4590320 : avma = avtmp; /* go to garbage zone; don't use set_avma */
980 4590320 : g = modii(mulii(g, subii(XB[m], Xh[j])), N);
981 4590320 : g = modii(mulii(g, subii(XB[m+1], Xh[j+1])), N);
982 4590320 : g = modii(mulii(g, subii(XB[m+2], Xh[j+2])), N);
983 4590320 : g = mulii(g, subii(XB[m+3], Xh[j+3]));
984 4590320 : set_avma(av1);
985 4590320 : g = modii(g, N);
986 : }
987 344 : set_avma(av1);
988 : }
989 82 : return NULL;
990 : }
991 :
992 : /* ellfacteur() tuned to be useful as a first stage before MPQS, especially for
993 : * large arguments, when 'insist' is false, and now also for the case when
994 : * 'insist' is true, vaguely following suggestions by Paul Zimmermann
995 : * (http://www.loria.fr/~zimmerma/records/ecmnet.html). --GN 1998Jul,Aug */
996 : static GEN
997 1084 : ellfacteur(GEN N, int insist)
998 : {
999 1084 : const long size = expi(N) + 1;
1000 1084 : pari_sp av = avma;
1001 : struct ECM E;
1002 1084 : long nbc, dsn, dsnmax, rep = 0;
1003 1084 : if (insist)
1004 : {
1005 7 : const long DSNMAX = numberof(TB1)-1;
1006 7 : dsnmax = (size >> 2) - 10;
1007 7 : if (dsnmax < 0) dsnmax = 0;
1008 0 : else if (dsnmax > DSNMAX) dsnmax = DSNMAX;
1009 7 : E.seed = 1 + (nbcmax<<7)*(size&0xffff); /* seed for choice of curves */
1010 :
1011 7 : dsn = (size >> 3) - 5;
1012 7 : if (dsn < 0) dsn = 0; else if (dsn > 47) dsn = 47;
1013 : /* pick up the torch where noninsistent stage would have given up */
1014 7 : nbc = dsn + (dsn >> 2) + 9; /* 8 or more curves in parallel */
1015 7 : nbc &= ~3; /* 4 | nbc */
1016 : }
1017 : else
1018 : {
1019 1077 : dsn = (size - 140) >> 3;
1020 1077 : if (dsn < 0)
1021 : {
1022 : #ifndef __EMX__ /* unless DOS/EMX: MPQS's disk access is abysmally slow */
1023 1013 : if (DEBUGLEVEL >= 4)
1024 0 : err_printf("ECM: number too small to justify this stage\n");
1025 1013 : return NULL; /* too small, decline the task */
1026 : #endif
1027 : dsn = 0;
1028 64 : } else if (dsn > 12) dsn = 12;
1029 64 : rep = (size <= 248 ?
1030 64 : (size <= 176 ? (size - 124) >> 4 : (size - 148) >> 3) :
1031 18 : (size - 224) >> 1);
1032 : #ifdef __EMX__ /* DOS/EMX: extra rounds (shun MPQS) */
1033 : rep += 20;
1034 : #endif
1035 64 : dsnmax = 72;
1036 : /* Use disjoint sets of curves for non-insist and insist phases; moreover,
1037 : * repeated calls acting on factors of the same original number should try
1038 : * to use fresh curves. The following achieves this */
1039 64 : E.seed = 1 + (nbcmax<<3)*(size & 0xf);
1040 64 : nbc = -1;
1041 : }
1042 71 : ECM_init(&E, N, nbc);
1043 71 : if (DEBUGLEVEL >= 4)
1044 : {
1045 0 : timer_start(&E.T);
1046 0 : err_printf("ECM: working on %ld curves at a time; initializing", E.nbc);
1047 0 : if (!insist)
1048 : {
1049 0 : if (rep == 1) err_printf(" for one round");
1050 0 : else err_printf(" for up to %ld rounds", rep);
1051 : }
1052 0 : err_printf("...\n");
1053 : }
1054 71 : if (dsn > dsnmax) dsn = dsnmax;
1055 : for(;;)
1056 43 : {
1057 114 : ulong B1 = insist? TB1[dsn]: TB1_for_stage[dsn];
1058 114 : GEN g = ECM_loop(&E, N, B1);
1059 114 : if (g)
1060 : {
1061 32 : if (DEBUGLEVEL >= 4)
1062 0 : err_printf("ECM: time = %6ld ms\n\tfound factor = %Ps\n",
1063 : timer_delay(&E.T), g);
1064 32 : return gerepilecopy(av, g);
1065 : }
1066 82 : if (dsn < dsnmax)
1067 : {
1068 75 : if (insist) dsn++;
1069 75 : else { dsn += 2; if (dsn > dsnmax) dsn = dsnmax; }
1070 : }
1071 82 : if (!insist && !--rep)
1072 : {
1073 39 : if (DEBUGLEVEL >= 4)
1074 0 : err_printf("ECM: time = %6ld ms,\tellfacteur giving up.\n",
1075 : timer_delay(&E.T));
1076 39 : return gc_NULL(av);
1077 : }
1078 : }
1079 : }
1080 : /* assume rounds >= 1, seed >= 1, B1 <= ULONG_MAX / 110 */
1081 : GEN
1082 0 : Z_ECM(GEN N, long rounds, long seed, ulong B1)
1083 : {
1084 0 : pari_sp av = avma;
1085 : struct ECM E;
1086 : long i;
1087 0 : E.seed = seed;
1088 0 : ECM_init(&E, N, -1);
1089 0 : if (DEBUGLEVEL >= 4) timer_start(&E.T);
1090 0 : for (i = rounds; i--; )
1091 : {
1092 0 : GEN g = ECM_loop(&E, N, B1);
1093 0 : if (g) return gerepilecopy(av, g);
1094 : }
1095 0 : return gc_NULL(av);
1096 : }
1097 :
1098 : /***********************************************************************/
1099 : /** **/
1100 : /** FACTORIZATION (Pollard-Brent rho) --GN1998Jun18-26 **/
1101 : /** pollardbrent() returns a nontrivial factor of n, assuming n is **/
1102 : /** composite and has no small prime divisor, or NULL if going on **/
1103 : /** would take more time than we want to spend. Sometimes it finds **/
1104 : /** more than one factor, and returns a structure suitable for **/
1105 : /** interpretation by ifac_crack. (Cf Algo 8.5.2 in ACiCNT) **/
1106 : /** **/
1107 : /***********************************************************************/
1108 : #define VALUE(x) gel(x,0)
1109 : #define EXPON(x) gel(x,1)
1110 : #define CLASS(x) gel(x,2)
1111 :
1112 : INLINE void
1113 57987 : INIT(GEN x, GEN v, GEN e, GEN c) {
1114 57987 : VALUE(x) = v;
1115 57987 : EXPON(x) = e;
1116 57987 : CLASS(x) = c;
1117 57987 : }
1118 : static void
1119 51259 : ifac_delete(GEN x) { INIT(x,NULL,NULL,NULL); }
1120 :
1121 : static void
1122 0 : rho_dbg(pari_timer *T, long c, long msg_mask)
1123 : {
1124 0 : if (c & msg_mask) return;
1125 0 : err_printf("Rho: time = %6ld ms,\t%3ld round%s\n",
1126 : timer_delay(T), c, (c==1?"":"s"));
1127 : }
1128 :
1129 : static void
1130 33343888 : one_iter(GEN *x, GEN *P, GEN x1, GEN n, long delta)
1131 : {
1132 33343888 : *x = addis(remii(sqri(*x), n), delta);
1133 33298978 : *P = modii(mulii(*P, subii(x1, *x)), n);
1134 33349279 : }
1135 : /* Return NULL when we run out of time, or a single t_INT containing a
1136 : * nontrivial factor of n, or a vector of t_INTs, each triple of successive
1137 : * entries containing a factor, an exponent (equal to one), and a factor
1138 : * class (NULL for unknown or zero for known composite), matching the
1139 : * internal representation used by the ifac_*() routines below. Repeated
1140 : * factors may arise; the caller will sort the factors anyway. Result
1141 : * is not gerepile-able (contains NULL) */
1142 : static GEN
1143 5605 : pollardbrent_i(GEN n, long size, long c0, long retries)
1144 : {
1145 5605 : long tf = lgefint(n), delta, msg_mask, c, k, k1, l;
1146 : pari_sp av;
1147 : GEN x, x1, y, P, g, g1, res;
1148 : pari_timer T;
1149 :
1150 5605 : if (DEBUGLEVEL >= 4) timer_start(&T);
1151 5605 : c = c0 << 5; /* 2^5 iterations per round */
1152 11210 : msg_mask = (size >= 448? 0x1fff:
1153 5605 : (size >= 192? (256L<<((size-128)>>6))-1: 0xff));
1154 5605 : y = cgeti(tf);
1155 5605 : x1= cgeti(tf);
1156 5605 : av = avma;
1157 :
1158 5605 : PB_RETRY:
1159 : /* trick to make a 'random' choice determined by n. Don't use x^2+0 or
1160 : * x^2-2, ever. Don't use x^2-3 or x^2-7 with a starting value of 2.
1161 : * x^2+4, x^2+9 are affine conjugate to x^2+1, so don't use them either.
1162 : *
1163 : * (the point being that when we get called again on a composite cofactor
1164 : * of something we've already seen, we had better avoid the same delta) */
1165 5605 : switch ((size + retries) & 7)
1166 : {
1167 1119 : case 0: delta= 1; break;
1168 685 : case 1: delta= -1; break;
1169 669 : case 2: delta= 3; break;
1170 797 : case 3: delta= 5; break;
1171 547 : case 4: delta= -5; break;
1172 642 : case 5: delta= 7; break;
1173 615 : case 6: delta= 11; break;
1174 : /* case 7: */
1175 531 : default: delta=-11; break;
1176 : }
1177 5605 : if (DEBUGLEVEL >= 4)
1178 : {
1179 0 : if (!retries)
1180 0 : err_printf("Rho: searching small factor of %ld-bit integer\n", size);
1181 : else
1182 0 : err_printf("Rho: restarting for remaining rounds...\n");
1183 0 : err_printf("Rho: using X^2%+1ld for up to %ld rounds of 32 iterations\n",
1184 : delta, c >> 5);
1185 : }
1186 5605 : x = gen_2; P = gen_1; g1 = NULL; k = 1; l = 1;
1187 5605 : affui(2, y);
1188 5605 : affui(2, x1);
1189 : for (;;) /* terminated under the control of c */
1190 : { /* use the polynomial x^2 + delta */
1191 15433085 : one_iter(&x, &P, x1, n, delta);
1192 :
1193 15435012 : if ((--c & 0x1f)==0)
1194 : { /* one round complete */
1195 477439 : g = gcdii(n, P); if (!is_pm1(g)) goto fin;
1196 475255 : if (c <= 0)
1197 : { /* getting bored */
1198 2278 : if (DEBUGLEVEL >= 4)
1199 0 : err_printf("Rho: time = %6ld ms,\tPollard-Brent giving up.\n",
1200 : timer_delay(&T));
1201 2278 : return NULL;
1202 : }
1203 472977 : P = gen_1;
1204 472977 : if (DEBUGLEVEL >= 4) rho_dbg(&T, c0-(c>>5), msg_mask);
1205 472977 : affii(x,y); x = y; set_avma(av);
1206 : }
1207 :
1208 15428750 : if (--k) continue; /* normal end of loop body */
1209 :
1210 51022 : if (c & 0x1f) /* otherwise, we already checked */
1211 : {
1212 33630 : g = gcdii(n, P); if (!is_pm1(g)) goto fin;
1213 33609 : P = gen_1;
1214 : }
1215 :
1216 : /* Fast forward phase, doing l inner iterations without computing gcds.
1217 : * Check first whether it would take us beyond the alloted time.
1218 : * Fast forward rounds count only half (although they're taking
1219 : * more like 2/3 the time of normal rounds). This to counteract the
1220 : * nuisance that all c0 between 4096 and 6144 would act exactly as
1221 : * 4096; with the halving trick only the range 4096..5120 collapses
1222 : * (similarly for all other powers of two) */
1223 51001 : if ((c -= (l>>1)) <= 0)
1224 : { /* got bored */
1225 1149 : if (DEBUGLEVEL >= 4)
1226 0 : err_printf("Rho: time = %6ld ms,\tPollard-Brent giving up.\n",
1227 : timer_delay(&T));
1228 1149 : return NULL;
1229 : }
1230 49852 : c &= ~0x1f; /* keep it on multiples of 32 */
1231 :
1232 : /* Fast forward loop */
1233 49852 : affii(x, x1); set_avma(av); x = x1;
1234 49852 : k = l; l <<= 1;
1235 : /* don't show this for the first several (short) fast forward phases. */
1236 49852 : if (DEBUGLEVEL >= 4 && (l>>7) > msg_mask)
1237 0 : err_printf("Rho: fast forward phase (%ld rounds of 64)...\n", l>>7);
1238 17985968 : for (k1=k; k1; k1--)
1239 : {
1240 17936225 : one_iter(&x, &P, x1, n, delta);
1241 17935843 : if ((k1 & 0x1f) == 0) gerepileall(av, 2, &x, &P);
1242 : }
1243 49743 : if (DEBUGLEVEL >= 4 && (l>>7) > msg_mask)
1244 0 : err_printf("Rho: time = %6ld ms,\t%3ld rounds, back to normal mode\n",
1245 0 : timer_delay(&T), c0-(c>>5));
1246 49743 : affii(x,y); P = gerepileuptoint(av, P); x = y;
1247 : } /* forever */
1248 :
1249 2178 : fin:
1250 : /* An accumulated gcd was > 1 */
1251 2178 : if (!equalii(g,n))
1252 : { /* if it isn't n, and looks prime, return it */
1253 2052 : if (MR_Jaeschke(g))
1254 : {
1255 2052 : if (DEBUGLEVEL >= 4)
1256 : {
1257 0 : rho_dbg(&T, c0-(c>>5), 0);
1258 0 : err_printf("\tfound factor = %Ps\n",g);
1259 : }
1260 2052 : return g;
1261 : }
1262 0 : set_avma(av); g1 = icopy(g); /* known composite, keep it safe */
1263 0 : av = avma;
1264 : }
1265 126 : else g1 = n; /* and work modulo g1 for backtracking */
1266 :
1267 : /* Here g1 is known composite */
1268 126 : if (DEBUGLEVEL >= 4 && size > 192)
1269 0 : err_printf("Rho: hang on a second, we got something here...\n");
1270 126 : x = y;
1271 : for(;;)
1272 : { /* backtrack until period recovered. Must terminate */
1273 9450 : x = addis(remii(sqri(x), g1), delta);
1274 9450 : g = gcdii(subii(x1, x), g1); if (!is_pm1(g)) break;
1275 :
1276 9324 : if (DEBUGLEVEL >= 4 && (--c & 0x1f) == 0) rho_dbg(&T, c0-(c>>5), msg_mask);
1277 : }
1278 :
1279 126 : if (g1 == n || equalii(g,g1))
1280 : {
1281 126 : if (g1 == n && equalii(g,g1))
1282 : { /* out of luck */
1283 0 : if (DEBUGLEVEL >= 4)
1284 : {
1285 0 : rho_dbg(&T, c0-(c>>5), 0);
1286 0 : err_printf("\tPollard-Brent failed.\n");
1287 : }
1288 0 : if (++retries >= 4) pari_err_BUG("");
1289 0 : goto PB_RETRY;
1290 : }
1291 : /* half lucky: we've split n, but g1 equals either g or n */
1292 126 : if (DEBUGLEVEL >= 4)
1293 : {
1294 0 : rho_dbg(&T, c0-(c>>5), 0);
1295 0 : err_printf("\tfound %sfactor = %Ps\n", (g1!=n ? "composite " : ""), g);
1296 : }
1297 126 : res = cgetg(7, t_VEC);
1298 : /* g^1: known composite when g1!=n */
1299 126 : INIT(res+1, g, gen_1, (g1!=n? gen_0: NULL));
1300 : /* cofactor^1: status unknown */
1301 126 : INIT(res+4, diviiexact(n,g), gen_1, NULL);
1302 126 : return res;
1303 : }
1304 : /* g < g1 < n : our lucky day -- we've split g1, too */
1305 0 : res = cgetg(10, t_VEC);
1306 : /* unknown status for all three factors */
1307 0 : INIT(res+1, g, gen_1, NULL);
1308 0 : INIT(res+4, diviiexact(g1,g), gen_1, NULL);
1309 0 : INIT(res+7, diviiexact(n,g1), gen_1, NULL);
1310 0 : if (DEBUGLEVEL >= 4)
1311 : {
1312 0 : rho_dbg(&T, c0-(c>>5), 0);
1313 0 : err_printf("\tfound factors = %Ps, %Ps,\n\tand %Ps\n",
1314 0 : gel(res,1), gel(res,4), gel(res,7));
1315 : }
1316 0 : return res;
1317 : }
1318 : /* Tuning parameter: for input up to 64 bits long, we must not spend more
1319 : * than a very short time, for fear of slowing things down on average.
1320 : * With the current tuning formula, increase our efforts somewhat at 49 bit
1321 : * input (an extra round for each bit at first), and go up more and more
1322 : * rapidly after we pass 80 bits.-- Changed this to adjust for the presence of
1323 : * squfof, which will finish input up to 59 bits quickly. */
1324 : static GEN
1325 5605 : pollardbrent(GEN n)
1326 : {
1327 5605 : const long tune_pb_min = 14; /* even 15 seems too much. */
1328 5605 : long c0, size = expi(n) + 1;
1329 5605 : if (size <= 28)
1330 0 : c0 = 32;/* amounts very nearly to 'insist'. Now that we have squfof(), we
1331 : * don't insist any more when input is 2^29 ... 2^32 */
1332 5605 : else if (size <= 42)
1333 1763 : c0 = tune_pb_min;
1334 3842 : else if (size <= 59) /* match squfof() cutoff point */
1335 2159 : c0 = tune_pb_min + ((size - 42)<<1);
1336 1683 : else if (size <= 72)
1337 803 : c0 = tune_pb_min + size - 24;
1338 880 : else if (size <= 301)
1339 : /* nonlinear increase in effort, kicking in around 80 bits */
1340 : /* 301 gives 48121 + tune_pb_min */
1341 873 : c0 = tune_pb_min + size - 60 +
1342 873 : ((size-73)>>1)*((size-70)>>3)*((size-56)>>4);
1343 : else
1344 7 : c0 = 49152; /* ECM is faster when it'd take longer */
1345 5605 : return pollardbrent_i(n, size, c0, 0);
1346 : }
1347 : GEN
1348 0 : Z_pollardbrent(GEN n, long rounds, long seed)
1349 : {
1350 0 : pari_sp av = avma;
1351 0 : GEN v = pollardbrent_i(n, expi(n)+1, rounds, seed);
1352 0 : if (!v) return NULL;
1353 0 : if (typ(v) == t_INT) v = mkvec2(v, diviiexact(n,v));
1354 0 : else if (lg(v) == 7) v = mkvec2(gel(v,1), gel(v,4));
1355 0 : else v = mkvec3(gel(v,1), gel(v,4), gel(v,7));
1356 0 : return gerepilecopy(av, v);
1357 : }
1358 :
1359 : /***********************************************************************/
1360 : /** FACTORIZATION (Shanks' SQUFOF) --GN2000Sep30-Oct01 **/
1361 : /** squfof() returns a nontrivial factor of n, assuming n is odd, **/
1362 : /** composite, not a pure square, and has no small prime divisor, **/
1363 : /** or NULL if it fails to find one. It works on two discriminants **/
1364 : /** simultaneously (n and 5n for n=1(4), 3n and 4n for n=3(4)). **/
1365 : /** Present implementation is limited to input <2^59, and works most **/
1366 : /** of the time in signed arithmetic on integers <2^31 in absolute **/
1367 : /** size. (Cf. Algo 8.7.2 in ACiCNT) **/
1368 : /***********************************************************************/
1369 :
1370 : /* The following is invoked to walk back along the ambiguous cycle* until we
1371 : * hit an ambiguous form and thus the desired factor, which it returns. If it
1372 : * fails for any reason, it returns 0. It doesn't interfere with timing and
1373 : * diagnostics, which it leaves to squfof().
1374 : *
1375 : * Before we invoke this, we've found a form (A, B, -C) with A = a^2, where a
1376 : * isn't blacklisted and where gcd(a, B) = 1. According to ACiCANT, we should
1377 : * now proceed reducing the form (a, -B, -aC), but it is easy to show that the
1378 : * first reduction step always sends this to (-aC, B, a), and the next one,
1379 : * with q computed as usual from B and a (occupying the c position), gives a
1380 : * reduced form, whose third member is easiest to recover by going back to D.
1381 : * From this point onwards, we're once again working with single-word numbers.
1382 : * No need to track signs, just work with the abs values of the coefficients. */
1383 : static long
1384 2980 : squfof_ambig(long a, long B, long dd, GEN D)
1385 : {
1386 : long b, c, q, qa, qc, qcb, a0, b0, b1, c0;
1387 2980 : long cnt = 0; /* count reduction steps on the cycle */
1388 :
1389 2980 : q = (dd + (B>>1)) / a;
1390 2980 : qa = q * a;
1391 2980 : b = (qa - B) + qa; /* avoid overflow */
1392 : {
1393 2980 : pari_sp av = avma;
1394 2980 : c = itos(divis(shifti(subii(D, sqrs(b)), -2), a));
1395 2980 : set_avma(av);
1396 : }
1397 : #ifdef DEBUG_SQUFOF
1398 : err_printf("SQUFOF: ambigous cycle of discriminant %Ps\n", D);
1399 : err_printf("SQUFOF: Form on ambigous cycle (%ld, %ld, %ld)\n", a, b, c);
1400 : #endif
1401 :
1402 2980 : a0 = a; b0 = b1 = b; /* end of loop detection and safeguard */
1403 :
1404 : for (;;) /* reduced cycles are finite */
1405 : { /* reduction step */
1406 5214853 : c0 = c;
1407 5214853 : if (c0 > dd)
1408 1457199 : q = 1;
1409 : else
1410 3757654 : q = (dd + (b>>1)) / c0;
1411 5214853 : if (q == 1)
1412 : {
1413 2168704 : qcb = c0 - b; b = c0 + qcb; c = a - qcb;
1414 : }
1415 : else
1416 : {
1417 3046149 : qc = q*c0; qcb = qc - b; b = qc + qcb; c = a - q*qcb;
1418 : }
1419 5214853 : a = c0;
1420 :
1421 5214853 : cnt++; if (b == b1) break;
1422 :
1423 : /* safeguard against infinite loop: recognize when we've walked the entire
1424 : * cycle in vain. (I don't think this can actually happen -- exercise.) */
1425 5211873 : if (b == b0 && a == a0) return 0;
1426 :
1427 5211873 : b1 = b;
1428 : }
1429 2980 : q = a&1 ? a : a>>1;
1430 2980 : if (DEBUGLEVEL >= 4)
1431 : {
1432 0 : if (q > 1)
1433 0 : err_printf("SQUFOF: found factor %ld from ambiguous form\n"
1434 : "\tafter %ld steps on the ambiguous cycle\n",
1435 0 : q / ugcd(q,15), cnt);
1436 : else
1437 0 : err_printf("SQUFOF: ...found nothing on the ambiguous cycle\n"
1438 : "\tafter %ld steps there\n", cnt);
1439 0 : if (DEBUGLEVEL >= 6) err_printf("SQUFOF: squfof_ambig returned %ld\n", q);
1440 : }
1441 2980 : return q;
1442 : }
1443 :
1444 : #define SQUFOF_BLACKLIST_SZ 64
1445 :
1446 : /* assume 2,3,5 do not divide n */
1447 : static GEN
1448 3427 : squfof(GEN n)
1449 : {
1450 : ulong d1, d2;
1451 3427 : long tf = lgefint(n), nm4, cnt = 0;
1452 : long a1, b1, c1, dd1, L1, a2, b2, c2, dd2, L2, a, q, c, qc, qcb;
1453 : GEN D1, D2;
1454 3427 : pari_sp av = avma;
1455 : long blacklist1[SQUFOF_BLACKLIST_SZ], blacklist2[SQUFOF_BLACKLIST_SZ];
1456 3427 : long blp1 = 0, blp2 = 0;
1457 3427 : int act1 = 1, act2 = 1;
1458 :
1459 : #ifdef LONG_IS_64BIT
1460 2970 : if (tf > 3 || (tf == 3 && uel(n,2) >= (1UL << (BITS_IN_LONG-5))))
1461 : #else /* 32 bits */
1462 457 : if (tf > 4 || (tf == 4 && (ulong)(*int_MSW(n)) >= (1UL << (BITS_IN_LONG-5))))
1463 : #endif
1464 1077 : return NULL; /* n too large */
1465 :
1466 : /* now we have 5 < n < 2^59 */
1467 2350 : nm4 = mod4(n);
1468 2350 : if (nm4 == 1)
1469 : { /* n = 1 (mod4): run one iteration on D1 = n, another on D2 = 5n */
1470 1237 : D1 = n;
1471 1237 : D2 = mului(5,n); d2 = itou(sqrti(D2)); dd2 = (long)((d2>>1) + (d2&1));
1472 1237 : b2 = (long)((d2-1) | 1); /* b1, b2 will always stay odd */
1473 : }
1474 : else
1475 : { /* n = 3 (mod4): run one iteration on D1 = 3n, another on D2 = 4n */
1476 1113 : D1 = mului(3,n);
1477 1113 : D2 = shifti(n,2); dd2 = itou(sqrti(n)); d2 = dd2 << 1;
1478 1113 : b2 = (long)(d2 & (~1UL)); /* largest even below d2, will stay even */
1479 : }
1480 2350 : d1 = itou(sqrti(D1));
1481 2350 : b1 = (long)((d1-1) | 1); /* largest odd number not exceeding d1 */
1482 2350 : c1 = itos(shifti(subii(D1, sqru((ulong)b1)), -2));
1483 2350 : if (!c1) pari_err_BUG("squfof [caller of] (n or 3n is a square)");
1484 2350 : c2 = itos(shifti(subii(D2, sqru((ulong)b2)), -2));
1485 2350 : if (!c2) pari_err_BUG("squfof [caller of] (5n is a square)");
1486 2350 : L1 = (long)usqrt(d1);
1487 2350 : L2 = (long)usqrt(d2);
1488 : /* dd1 used to compute floor((d1+b1)/2) as dd1+floor(b1/2), without
1489 : * overflowing the 31bit signed integer size limit. Same for dd2. */
1490 2350 : dd1 = (long) ((d1>>1) + (d1&1));
1491 2350 : a1 = a2 = 1;
1492 :
1493 : /* The two (identity) forms (a1,b1,-c1) and (a2,b2,-c2) are now set up.
1494 : *
1495 : * a1 and c1 represent the absolute values of the a,c coefficients; we keep
1496 : * track of the sign separately, via the iteration counter cnt: when cnt is
1497 : * even, c is understood to be negative, else c is positive and a < 0.
1498 : *
1499 : * L1, L2 are the limits for blacklisting small leading coefficients
1500 : * on the principal cycle, to guarantee that when we find a square form,
1501 : * its square root will belong to an ambiguous cycle (i.e. won't be an
1502 : * earlier form on the principal cycle).
1503 : *
1504 : * When n = 3(mod 4), D2 = 12(mod 16), and b^2 is always 0 or 4 mod 16.
1505 : * It follows that 4*a*c must be 4 or 8 mod 16, respectively, so at most
1506 : * one of a,c can be divisible by 2 at most to the first power. This fact
1507 : * is used a couple of times below.
1508 : *
1509 : * The flags act1, act2 remain true while the respective cycle is still
1510 : * active; we drop them to false when we return to the identity form with-
1511 : * out having found a square form (or when the blacklist overflows, which
1512 : * shouldn't happen). */
1513 2350 : if (DEBUGLEVEL >= 4)
1514 0 : err_printf("SQUFOF: entering main loop with forms\n"
1515 : "\t(1, %ld, %ld) and (1, %ld, %ld)\n\tof discriminants\n"
1516 : "\t%Ps and %Ps, respectively\n", b1, -c1, b2, -c2, D1, D2);
1517 :
1518 : /* MAIN LOOP: walk around the principal cycle looking for a square form.
1519 : * Blacklist small leading coefficients.
1520 : *
1521 : * The reduction operator can be computed entirely in 32-bit arithmetic:
1522 : * Let q = floor(floor((d1+b1)/2)/c1) (when c1>dd1, q=1, which happens
1523 : * often enough to special-case it). Then the new b1 = (q*c1-b1) + q*c1,
1524 : * which does not overflow, and the new c1 = a1 - q*(q*c1-b1), which is
1525 : * bounded by d1 in abs size since both the old and the new a1 are positive
1526 : * and bounded by d1. */
1527 7782929 : while (act1 || act2)
1528 : {
1529 7782922 : if (act1)
1530 : { /* send first form through reduction operator if active */
1531 7782838 : c = c1;
1532 7782838 : q = (c > dd1)? 1: (dd1 + (b1>>1)) / c;
1533 7782838 : if (q == 1)
1534 3221654 : { qcb = c - b1; b1 = c + qcb; c1 = a1 - qcb; }
1535 : else
1536 4561184 : { qc = q*c; qcb = qc - b1; b1 = qc + qcb; c1 = a1 - q*qcb; }
1537 7782838 : a1 = c;
1538 :
1539 7782838 : if (a1 <= L1)
1540 : { /* blacklist this */
1541 2086 : if (blp1 >= SQUFOF_BLACKLIST_SZ) /* overflows: shouldn't happen */
1542 0 : act1 = 0; /* silently */
1543 : else
1544 : {
1545 2086 : if (DEBUGLEVEL >= 6)
1546 0 : err_printf("SQUFOF: blacklisting a = %ld on first cycle\n", a1);
1547 2086 : blacklist1[blp1++] = a1;
1548 : }
1549 : }
1550 : }
1551 7782922 : if (act2)
1552 : { /* send second form through reduction operator if active */
1553 7781872 : c = c2;
1554 7781872 : q = (c > dd2)? 1: (dd2 + (b2>>1)) / c;
1555 7781872 : if (q == 1)
1556 3231752 : { qcb = c - b2; b2 = c + qcb; c2 = a2 - qcb; }
1557 : else
1558 4550120 : { qc = q*c; qcb = qc - b2; b2 = qc + qcb; c2 = a2 - q*qcb; }
1559 7781872 : a2 = c;
1560 :
1561 7781872 : if (a2 <= L2)
1562 : { /* blacklist this */
1563 1737 : if (blp2 >= SQUFOF_BLACKLIST_SZ) /* overflows: shouldn't happen */
1564 0 : act2 = 0; /* silently */
1565 : else
1566 : {
1567 1737 : if (DEBUGLEVEL >= 6)
1568 0 : err_printf("SQUFOF: blacklisting a = %ld on second cycle\n", a2);
1569 1737 : blacklist2[blp2++] = a2;
1570 : }
1571 : }
1572 : }
1573 :
1574 : /* bump counter, loop if this is an odd iteration (i.e. if the real
1575 : * leading coefficients are negative) */
1576 7782922 : if (++cnt & 1) continue;
1577 :
1578 : /* second half of main loop entered only when the leading coefficients
1579 : * are positive (i.e., during even-numbered iterations) */
1580 :
1581 : /* examine first form if active */
1582 3891461 : if (act1 && a1 == 1) /* back to identity */
1583 : { /* drop this discriminant */
1584 7 : act1 = 0;
1585 7 : if (DEBUGLEVEL >= 4)
1586 0 : err_printf("SQUFOF: first cycle exhausted after %ld iterations,\n"
1587 : "\tdropping it\n", cnt);
1588 : }
1589 3891461 : if (act1)
1590 : {
1591 3891412 : if (uissquareall((ulong)a1, (ulong*)&a))
1592 : { /* square form */
1593 2295 : if (DEBUGLEVEL >= 4)
1594 0 : err_printf("SQUFOF: square form (%ld^2, %ld, %ld) on first cycle\n"
1595 : "\tafter %ld iterations\n", a, b1, -c1, cnt);
1596 2295 : if (a <= L1)
1597 : { /* blacklisted? */
1598 : long j;
1599 4521 : for (j = 0; j < blp1; j++)
1600 3164 : if (a == blacklist1[j]) { a = 0; break; }
1601 : }
1602 2295 : if (a > 0)
1603 : { /* not blacklisted */
1604 1357 : q = ugcd(a, b1); /* imprimitive form? */
1605 1357 : if (q > 1)
1606 : { /* q^2 divides D1 hence n [ assuming n % 3 != 0 ] */
1607 0 : set_avma(av);
1608 0 : if (DEBUGLEVEL >= 4) err_printf("SQUFOF: found factor %ld^2\n", q);
1609 0 : return mkvec3(utoipos(q), gen_2, NULL);/* exponent 2, unknown status */
1610 : }
1611 : /* chase the inverse root form back along the ambiguous cycle */
1612 1357 : q = squfof_ambig(a, b1, dd1, D1);
1613 1357 : if (nm4 == 3 && q % 3 == 0) q /= 3;
1614 1357 : if (q > 1) return gc_utoipos(av, q); /* SUCCESS! */
1615 : }
1616 938 : else if (DEBUGLEVEL >= 4) /* blacklisted */
1617 0 : err_printf("SQUFOF: ...but the root form seems to be on the "
1618 : "principal cycle\n");
1619 : }
1620 : }
1621 :
1622 : /* examine second form if active */
1623 3890349 : if (act2 && a2 == 1) /* back to identity form */
1624 : { /* drop this discriminant */
1625 14 : act2 = 0;
1626 14 : if (DEBUGLEVEL >= 4)
1627 0 : err_printf("SQUFOF: second cycle exhausted after %ld iterations,\n"
1628 : "\tdropping it\n", cnt);
1629 : }
1630 3890349 : if (act2)
1631 : {
1632 3889817 : if (uissquareall((ulong)a2, (ulong*)&a))
1633 : { /* square form */
1634 2099 : if (DEBUGLEVEL >= 4)
1635 0 : err_printf("SQUFOF: square form (%ld^2, %ld, %ld) on second cycle\n"
1636 : "\tafter %ld iterations\n", a, b2, -c2, cnt);
1637 2099 : if (a <= L2)
1638 : { /* blacklisted? */
1639 : long j;
1640 3717 : for (j = 0; j < blp2; j++)
1641 2094 : if (a == blacklist2[j]) { a = 0; break; }
1642 : }
1643 2099 : if (a > 0)
1644 : { /* not blacklisted */
1645 1623 : q = ugcd(a, b2); /* imprimitive form? */
1646 : /* NB if b2 is even, a is odd, so the gcd is always odd */
1647 1623 : if (q > 1)
1648 : { /* q^2 divides D2 hence n [ assuming n % 5 != 0 ] */
1649 0 : set_avma(av);
1650 0 : if (DEBUGLEVEL >= 4) err_printf("SQUFOF: found factor %ld^2\n", q);
1651 0 : return mkvec3(utoipos(q), gen_2, NULL);/* exponent 2, unknown status */
1652 : }
1653 : /* chase the inverse root form along the ambiguous cycle */
1654 1623 : q = squfof_ambig(a, b2, dd2, D2);
1655 1623 : if (nm4 == 1 && q % 5 == 0) q /= 5;
1656 1623 : if (q > 1) return gc_utoipos(av, q); /* SUCCESS! */
1657 : }
1658 476 : else if (DEBUGLEVEL >= 4) /* blacklisted */
1659 0 : err_printf("SQUFOF: ...but the root form seems to be on the "
1660 : "principal cycle\n");
1661 : }
1662 : }
1663 : } /* end main loop */
1664 :
1665 : /* both discriminants turned out to be useless. */
1666 7 : if (DEBUGLEVEL>=4) err_printf("SQUFOF: giving up\n");
1667 7 : return gc_NULL(av);
1668 : }
1669 :
1670 : /***********************************************************************/
1671 : /* DETECTING ODD POWERS --GN1998Jun28 */
1672 : /* Factoring engines like MPQS which ultimately rely on computing */
1673 : /* gcd(N, x^2-y^2) to find a nontrivial factor of N can't split */
1674 : /* N = p^k for an odd prime p, since (Z/p^k)^* is then cyclic. Here */
1675 : /* is an analogue of Z_issquareall() for 3rd, 5th and 7th powers. */
1676 : /* The general case is handled by is_kth_power */
1677 : /***********************************************************************/
1678 :
1679 : /* Multistage sieve. First stages work mod 211, 209, 61, 203 in this order
1680 : * (first reduce mod the product of these and then take the remainder apart).
1681 : * Second stages use 117, 31, 43, 71. Moduli which are no longer interesting
1682 : * are skipped. Everything is encoded in a table of 106 24-bit masks. We only
1683 : * need the first half of the residues. Three bits per modulus indicate which
1684 : * residues are 7th (bit 2), 5th (bit 1) or 3rd (bit 0) powers; the eight
1685 : * moduli above are assigned right-to-left. The table was generated using: */
1686 :
1687 : #if 0
1688 : L = [71, 43, 31, [O(3^2),O(13)], [O(7),O(29)], 61, [O(11),O(19)], 211];
1689 : ispow(x, N, k)=
1690 : {
1691 : if (type(N) == "t_INT", return (ispower(Mod(x,N), k)));
1692 : for (i = 1, #N, if (!ispower(x + N[i], k), return (0))); 1
1693 : }
1694 : check(r) =
1695 : {
1696 : print1(" 0");
1697 : for (i=1,#L,
1698 : N = 0;
1699 : if (ispow(r, L[i], 3), N += 1);
1700 : if (ispow(r, L[i], 5), N += 2);
1701 : if (ispow(r, L[i], 7), N += 4);
1702 : print1(N);
1703 : ); print("ul, /* ", r, " */")
1704 : }
1705 : for (r = 0, 105, check(r))
1706 : #endif
1707 : static ulong powersmod[106] = {
1708 : 077777777ul, /* 0 */
1709 : 077777777ul, /* 1 */
1710 : 013562440ul, /* 2 */
1711 : 012402540ul, /* 3 */
1712 : 013562440ul, /* 4 */
1713 : 052662441ul, /* 5 */
1714 : 016603440ul, /* 6 */
1715 : 016463450ul, /* 7 */
1716 : 013573551ul, /* 8 */
1717 : 012462540ul, /* 9 */
1718 : 012462464ul, /* 10 */
1719 : 013462771ul, /* 11 */
1720 : 012406473ul, /* 12 */
1721 : 012463641ul, /* 13 */
1722 : 052463646ul, /* 14 */
1723 : 012503446ul, /* 15 */
1724 : 013562440ul, /* 16 */
1725 : 052466440ul, /* 17 */
1726 : 012472451ul, /* 18 */
1727 : 012462454ul, /* 19 */
1728 : 032463550ul, /* 20 */
1729 : 013403664ul, /* 21 */
1730 : 013463460ul, /* 22 */
1731 : 032562565ul, /* 23 */
1732 : 012402540ul, /* 24 */
1733 : 052662441ul, /* 25 */
1734 : 032672452ul, /* 26 */
1735 : 013573551ul, /* 27 */
1736 : 012467541ul, /* 28 */
1737 : 012567640ul, /* 29 */
1738 : 032706450ul, /* 30 */
1739 : 012762452ul, /* 31 */
1740 : 033762662ul, /* 32 */
1741 : 012502562ul, /* 33 */
1742 : 032463562ul, /* 34 */
1743 : 013563440ul, /* 35 */
1744 : 016663440ul, /* 36 */
1745 : 036662550ul, /* 37 */
1746 : 012462552ul, /* 38 */
1747 : 033502450ul, /* 39 */
1748 : 012462643ul, /* 40 */
1749 : 033467540ul, /* 41 */
1750 : 017403441ul, /* 42 */
1751 : 017463462ul, /* 43 */
1752 : 017472460ul, /* 44 */
1753 : 033462470ul, /* 45 */
1754 : 052566450ul, /* 46 */
1755 : 013562640ul, /* 47 */
1756 : 032403640ul, /* 48 */
1757 : 016463450ul, /* 49 */
1758 : 016463752ul, /* 50 */
1759 : 033402440ul, /* 51 */
1760 : 012462540ul, /* 52 */
1761 : 012472540ul, /* 53 */
1762 : 053562462ul, /* 54 */
1763 : 012463465ul, /* 55 */
1764 : 012663470ul, /* 56 */
1765 : 052607450ul, /* 57 */
1766 : 012566553ul, /* 58 */
1767 : 013466440ul, /* 59 */
1768 : 012502741ul, /* 60 */
1769 : 012762744ul, /* 61 */
1770 : 012763740ul, /* 62 */
1771 : 012763443ul, /* 63 */
1772 : 013573551ul, /* 64 */
1773 : 013462471ul, /* 65 */
1774 : 052502460ul, /* 66 */
1775 : 012662463ul, /* 67 */
1776 : 012662451ul, /* 68 */
1777 : 012403550ul, /* 69 */
1778 : 073567540ul, /* 70 */
1779 : 072463445ul, /* 71 */
1780 : 072462740ul, /* 72 */
1781 : 012472442ul, /* 73 */
1782 : 012462644ul, /* 74 */
1783 : 013406650ul, /* 75 */
1784 : 052463471ul, /* 76 */
1785 : 012563474ul, /* 77 */
1786 : 013503460ul, /* 78 */
1787 : 016462441ul, /* 79 */
1788 : 016462440ul, /* 80 */
1789 : 012462540ul, /* 81 */
1790 : 013462641ul, /* 82 */
1791 : 012463454ul, /* 83 */
1792 : 013403550ul, /* 84 */
1793 : 057563540ul, /* 85 */
1794 : 017466441ul, /* 86 */
1795 : 017606471ul, /* 87 */
1796 : 053666573ul, /* 88 */
1797 : 012562561ul, /* 89 */
1798 : 013473641ul, /* 90 */
1799 : 032573440ul, /* 91 */
1800 : 016763440ul, /* 92 */
1801 : 016702640ul, /* 93 */
1802 : 033762552ul, /* 94 */
1803 : 012562550ul, /* 95 */
1804 : 052402451ul, /* 96 */
1805 : 033563441ul, /* 97 */
1806 : 012663561ul, /* 98 */
1807 : 012677560ul, /* 99 */
1808 : 012462464ul, /* 100 */
1809 : 032562642ul, /* 101 */
1810 : 013402551ul, /* 102 */
1811 : 032462450ul, /* 103 */
1812 : 012467445ul, /* 104 */
1813 : 032403440ul, /* 105 */
1814 : };
1815 :
1816 : static int
1817 4032811 : check_res(ulong x, ulong N, int shift, ulong *mask)
1818 : {
1819 4032811 : long r = x%N; if ((ulong)r> (N>>1)) r = N - r;
1820 4032811 : *mask &= (powersmod[r] >> shift);
1821 4032811 : return *mask;
1822 : }
1823 :
1824 : /* is x mod 211*209*61*203*117*31*43*71 a 3rd, 5th or 7th power ? */
1825 : int
1826 2499295 : uis_357_powermod(ulong x, ulong *mask)
1827 : {
1828 2499295 : if ( !check_res(x, 211UL, 0, mask)) return 0;
1829 1000911 : if (*mask & 3 && !check_res(x, 209UL, 3, mask)) return 0;
1830 405698 : if (*mask & 3 && !check_res(x, 61UL, 6, mask)) return 0;
1831 222781 : if (*mask & 5 && !check_res(x, 203UL, 9, mask)) return 0;
1832 57223 : if (*mask & 1 && !check_res(x, 117UL,12, mask)) return 0;
1833 32125 : if (*mask & 3 && !check_res(x, 31UL,15, mask)) return 0;
1834 23990 : if (*mask & 5 && !check_res(x, 43UL,18, mask)) return 0;
1835 6992 : if (*mask & 6 && !check_res(x, 71UL,21, mask)) return 0;
1836 3572 : return 1;
1837 : }
1838 : /* asume x > 0 and pt != NULL */
1839 : int
1840 2456679 : uis_357_power(ulong x, ulong *pt, ulong *mask)
1841 : {
1842 : double logx;
1843 2456679 : if (!odd(x))
1844 : {
1845 9081 : long v = vals(x);
1846 9081 : if (v % 7) *mask &= ~4;
1847 9081 : if (v % 5) *mask &= ~2;
1848 9081 : if (v % 3) *mask &= ~1;
1849 9081 : if (!*mask) return 0;
1850 : }
1851 2449093 : if (!uis_357_powermod(x, mask)) return 0;
1852 2983 : logx = log((double)x);
1853 3877 : while (*mask)
1854 : {
1855 : long e, b;
1856 : ulong y, ye;
1857 2983 : if (*mask & 1) { b = 1; e = 3; }
1858 879 : else if (*mask & 2) { b = 2; e = 5; }
1859 355 : else { b = 4; e = 7; }
1860 2983 : y = (ulong)(exp(logx / e) + 0.5);
1861 2983 : ye = upowuu(y,e);
1862 2983 : if (ye == x) { *pt = y; return e; }
1863 : #ifdef LONG_IS_64BIT
1864 768 : if (ye > x) y--; else y++;
1865 768 : ye = upowuu(y,e);
1866 768 : if (ye == x) { *pt = y; return e; }
1867 : #endif
1868 894 : *mask &= ~b; /* turn the bit off */
1869 : }
1870 894 : return 0;
1871 : }
1872 :
1873 : #ifndef LONG_IS_64BIT
1874 : /* as above, split in two functions */
1875 : /* is x mod 211*209*61*203 a 3rd, 5th or 7th power ? */
1876 : static int
1877 12165 : uis_357_powermod_32bit_1(ulong x, ulong *mask)
1878 : {
1879 12165 : if ( !check_res(x, 211UL, 0, mask)) return 0;
1880 6676 : if (*mask & 3 && !check_res(x, 209UL, 3, mask)) return 0;
1881 3459 : if (*mask & 3 && !check_res(x, 61UL, 6, mask)) return 0;
1882 2423 : if (*mask & 5 && !check_res(x, 203UL, 9, mask)) return 0;
1883 658 : return 1;
1884 : }
1885 : /* is x mod 117*31*43*71 a 3rd, 5th or 7th power ? */
1886 : static int
1887 658 : uis_357_powermod_32bit_2(ulong x, ulong *mask)
1888 : {
1889 658 : if (*mask & 1 && !check_res(x, 117UL,12, mask)) return 0;
1890 515 : if (*mask & 3 && !check_res(x, 31UL,15, mask)) return 0;
1891 410 : if (*mask & 5 && !check_res(x, 43UL,18, mask)) return 0;
1892 187 : if (*mask & 6 && !check_res(x, 71UL,21, mask)) return 0;
1893 138 : return 1;
1894 : }
1895 : #endif
1896 :
1897 : /* Returns 3, 5, or 7 if x is a cube (but not a 5th or 7th power), a 5th
1898 : * power (but not a 7th), or a 7th power, and in this case creates the
1899 : * base on the stack and assigns its address to *pt. Otherwise returns 0.
1900 : * x must be of type t_INT and positive; this is not checked. The *mask
1901 : * argument tells us which things to check -- bit 0: 3rd, bit 1: 5th,
1902 : * bit 2: 7th pwr; set a bit to have the corresponding power examined --
1903 : * and is updated appropriately for a possible follow-up call */
1904 : int
1905 2780354 : is_357_power(GEN x, GEN *pt, ulong *mask)
1906 : {
1907 2780354 : long lx = lgefint(x);
1908 : ulong r;
1909 : pari_sp av;
1910 : GEN y;
1911 :
1912 2780354 : if (!*mask) return 0; /* useful when running in a loop */
1913 2410418 : if (DEBUGLEVEL>4)
1914 0 : err_printf("OddPwrs: examining %ld-bit integer\n", expi(x)+1);
1915 2410418 : if (lgefint(x) == 3) {
1916 : ulong t;
1917 2348051 : long e = uis_357_power(x[2], &t, mask);
1918 2348051 : if (e)
1919 : {
1920 2069 : if (pt) *pt = utoi(t);
1921 2069 : return e;
1922 : }
1923 2345982 : return 0;
1924 : }
1925 : #ifdef LONG_IS_64BIT
1926 50202 : r = (lx == 3)? uel(x,2): umodiu(x, 6046846918939827UL);
1927 50202 : if (!uis_357_powermod(r, mask)) return 0;
1928 : #else
1929 12165 : r = (lx == 3)? uel(x,2): umodiu(x, 211*209*61*203);
1930 12165 : if (!uis_357_powermod_32bit_1(r, mask)) return 0;
1931 658 : r = (lx == 3)? uel(x,2): umodiu(x, 117*31*43*71);
1932 658 : if (!uis_357_powermod_32bit_2(r, mask)) return 0;
1933 : #endif
1934 727 : av = avma;
1935 788 : while (*mask)
1936 : {
1937 : long e, b;
1938 : /* priority to higher powers: if we have a 21st, it is easier to rediscover
1939 : * that its 7th root is a cube than that its cube root is a 7th power */
1940 727 : if (*mask & 4) { b = 4; e = 7; }
1941 625 : else if (*mask & 2) { b = 2; e = 5; }
1942 273 : else { b = 1; e = 3; }
1943 727 : y = mpround( sqrtnr(itor(x, nbits2prec(64 + bit_accuracy(lx) / e)), e) );
1944 727 : if (equalii(powiu(y,e), x))
1945 : {
1946 666 : if (!pt) return gc_int(av,e);
1947 666 : set_avma((pari_sp)y); *pt = gerepileuptoint(av, y);
1948 666 : return e;
1949 : }
1950 61 : *mask &= ~b; /* turn the bit off */
1951 61 : set_avma(av);
1952 : }
1953 61 : return 0;
1954 : }
1955 :
1956 : /* Is x a n-th power ?
1957 : * if d = NULL, n not necessarily prime, otherwise, n prime and d the
1958 : * corresponding diffptr to go on looping over primes.
1959 : * If pt != NULL, it receives the n-th root */
1960 : ulong
1961 331682 : is_kth_power(GEN x, ulong n, GEN *pt)
1962 : {
1963 : forprime_t T;
1964 : long j;
1965 : ulong q, residue;
1966 : GEN y;
1967 331682 : pari_sp av = avma;
1968 :
1969 331682 : (void)u_forprime_arith_init(&T, odd(n)? 2*n+1: n+1, ULONG_MAX, 1,n);
1970 : /* we'll start at q, smallest prime >= n */
1971 :
1972 : /* Modular checks, use small primes q congruent 1 mod n */
1973 : /* A non n-th power nevertheless passes the test with proba n^(-#checks),
1974 : * We'd like this < 1e-6 but let j = floor(log(1e-6) / log(n)) which
1975 : * ensures much less. */
1976 331692 : if (n < 16)
1977 89544 : j = 5;
1978 242148 : else if (n < 32)
1979 85375 : j = 4;
1980 156773 : else if (n < 101)
1981 134510 : j = 3;
1982 22263 : else if (n < 1001)
1983 5924 : j = 2;
1984 16339 : else if (n < 17886697) /* smallest such that smallest suitable q is > 2^32 */
1985 16303 : j = 1;
1986 : else
1987 36 : j = 0;
1988 360910 : for (; j > 0; j--)
1989 : {
1990 360032 : if (!(q = u_forprime_next(&T))) break;
1991 : /* q a prime = 1 mod n */
1992 360022 : residue = umodiu(x, q);
1993 360037 : if (residue == 0)
1994 : {
1995 161 : if (Z_lval(x,q) % n) return gc_ulong(av,0);
1996 49 : continue;
1997 : }
1998 : /* n-th power mod q ? */
1999 359876 : if (Fl_powu(residue, (q-1)/n, q) != 1) return gc_ulong(av,0);
2000 : }
2001 878 : set_avma(av);
2002 :
2003 874 : if (DEBUGLEVEL>4) err_printf("\nOddPwrs: [%lu] passed modular checks\n",n);
2004 : /* go to the horse's mouth... */
2005 874 : y = roundr( sqrtnr(itor(x, nbits2prec(expi(x)/n + 16)), n) );
2006 874 : if (!equalii(powiu(y, n), x)) {
2007 90 : if (DEBUGLEVEL>4) err_printf("\tBut it wasn't a pure power.\n");
2008 90 : return gc_ulong(av,0);
2009 : }
2010 784 : if (!pt) set_avma(av); else { set_avma((pari_sp)y); *pt = gerepileuptoint(av,y); }
2011 784 : return 1;
2012 : }
2013 :
2014 : /* is x a p^i-th power, p >= 11 prime ? Similar to is_357_power(), but instead
2015 : * of the mask, we keep the current test exponent around. Cut off when
2016 : * log_2 x^(1/k) < cutoffbits since we would have found it by trial division.
2017 : * Everything needed here (primitive roots etc.) is computed from scratch on
2018 : * the fly; compared to the size of numbers under consideration, these
2019 : * word-sized computations take negligible time.
2020 : * Any cutoffbits > 0 is safe, but direct root extraction attempts are faster
2021 : * when trial division has been used to discover very small bases. We become
2022 : * competitive at cutoffbits ~ 10 */
2023 : int
2024 55855 : is_pth_power(GEN x, GEN *pt, forprime_t *T, ulong cutoffbits)
2025 : {
2026 55855 : long cnt=0, size = expi(x) /* not +1 */;
2027 : ulong p;
2028 55855 : pari_sp av = avma;
2029 334367 : while ((p = u_forprime_next(T)) && size/p >= cutoffbits) {
2030 278556 : long v = 1;
2031 278556 : if (DEBUGLEVEL>5 && cnt++==2000)
2032 0 : { cnt=0; err_printf("%lu%% ", 100*p*cutoffbits/size); }
2033 278612 : while (is_kth_power(x, p, pt)) {
2034 56 : v *= p; x = *pt;
2035 56 : size = expi(x);
2036 : }
2037 278554 : if (v > 1)
2038 : {
2039 42 : if (DEBUGLEVEL>5) err_printf("\nOddPwrs: is a %ld power\n",v);
2040 42 : return v;
2041 : }
2042 : }
2043 55808 : if (DEBUGLEVEL>5) err_printf("\nOddPwrs: not a power\n",p);
2044 55808 : return gc_int(av,0); /* give up */
2045 : }
2046 :
2047 : /***********************************************************************/
2048 : /** FACTORIZATION (master iteration) **/
2049 : /** Driver for the various methods of finding large factors **/
2050 : /** (after trial division has cast out the very small ones). **/
2051 : /** GN1998Jun24--30 **/
2052 : /***********************************************************************/
2053 :
2054 : /* Direct use:
2055 : * ifac_start_hint(n,moebius,hint) registers with the iterative factorizer
2056 : * - an integer n (without prime factors < tridiv_bound(n))
2057 : * - registers whether or not we should terminate early if we find a square
2058 : * factor,
2059 : * - a hint about which method(s) to use.
2060 : * This must always be called first. If input is not composite, oo loop.
2061 : * The routine decomposes n nontrivially into a product of two factors except
2062 : * in squarefreeness ('Moebius') mode.
2063 : *
2064 : * ifac_start(n,moebius) same using default hint.
2065 : *
2066 : * ifac_primary_factor() returns a prime divisor (not necessarily the
2067 : * smallest) and the corresponding exponent.
2068 : *
2069 : * Encapsulated user interface: Many arithmetic functions have a 'contributor'
2070 : * ifac_xxx, to be called on any large composite cofactor left over after trial
2071 : * division by small primes: xxx is one of moebius, issquarefree, totient, etc.
2072 : *
2073 : * We never test whether the input number is prime or composite, since
2074 : * presumably it will have come out of the small factors finder stage
2075 : * (which doesn't really exist yet but which will test the left-over
2076 : * cofactor for primality once it does). */
2077 :
2078 : /* The data structure in which we preserve whatever we know about our number N
2079 : * is kept on the PARI stack, and updated as needed.
2080 : * This makes the machinery re-entrant, and avoids memory leaks when a lengthy
2081 : * factorization is interrupted. We try to keep the whole affair connected,
2082 : * and the parent object is always older than its children. This may in
2083 : * rare cases lead to some extra copying around, and knowing what is garbage
2084 : * at any given time is not trivial. See below for examples how to do it right.
2085 : * (Connectedness is destroyed if callers of ifac_main() create stuff on the
2086 : * stack in between calls. This is harmless as long as ifac_realloc() is used
2087 : * to re-create a connected object at the head of the stack just before
2088 : * collecting garbage.)
2089 : * A t_INT may well have > 10^6 distinct prime factors larger than 2^16. Since
2090 : * we need not find factors in order of increasing size, we must be prepared to
2091 : * drag a very large amount of data around. We start with a small structure
2092 : * and extend it when necessary. */
2093 :
2094 : /* The idea of the algorithm is:
2095 : * Let N0 be whatever is currently left of N after dividing off all the
2096 : * prime powers we have already returned to the caller. Then we maintain
2097 : * N0 as a product
2098 : * (1) N0 = \prod_i P_i^{e_i} * \prod_j Q_j^{f_j} * \prod_k C_k^{g_k}
2099 : * where the P_i and Q_j are distinct primes, each C_k is known composite,
2100 : * none of the P_i divides any C_k, and we also know the total ordering
2101 : * of all the P_i, Q_j and C_k; in particular, we will never try to divide
2102 : * a C_k by a larger Q_j. Some of the C_k may have common factors.
2103 : *
2104 : * Caveat implementor: Taking gcds among C_k's is very likely to cost at
2105 : * least as much time as dividing off any primes as we find them, and book-
2106 : * keeping would be tough (since D=gcd(C_1,C_2) can still have common factors
2107 : * with both C_1/D and C_2/D, and so on...).
2108 : *
2109 : * At startup, we just initialize the structure to
2110 : * (2) N = C_1^1 (composite).
2111 : *
2112 : * Whenever ifac_primary_factor() or one of the arithmetic user interface
2113 : * routines needs a primary factor, and the smallest thing in our list is P_1,
2114 : * we return that and its exponent, and remove it from our list. (When nothing
2115 : * is left, we return a sentinel value -- gen_1. And in Moebius mode, when we
2116 : * see something with exponent > 1, whether prime or composite, we return gen_0
2117 : * or 0, depending on the function). In all other cases, ifac_main() iterates
2118 : * the following steps until we have a P_1 in the smallest position.
2119 : *
2120 : * When the smallest item is C_1, as it is initially:
2121 : * (3.1) Crack C_1 into a nontrivial product U_1 * U_2 by whatever method
2122 : * comes to mind for this size. (U for 'unknown'.) Cracking will detect
2123 : * perfect powers, so we may instead see a power of some U_1 here, or even
2124 : * something of the form U_1^k*U_2^k; of course the exponent already attached
2125 : * to C_1 is taken into account in the following.
2126 : * (3.2) If we have U_1*U_2, sort the two factors (distinct: squares are caught
2127 : * in stage 3.1). N.B. U_1 and U_2 are smaller than anything else in our list.
2128 : * (3.3) Check U_1 and U_2 for primality, and flag them accordingly.
2129 : * (3.4) Iterate.
2130 : *
2131 : * When the smallest item is Q_1:
2132 : * This is the unpleasant case. We go through the entire list and try to
2133 : * divide Q_1 off each of the current C_k's, which usually fails, but may
2134 : * succeed several times. When a division was successful, the corresponding
2135 : * C_k is removed from our list, and the cofactor becomes a U_l for the moment
2136 : * unless it is 1 (which happens when C_k was a power of Q_1). When we're
2137 : * through we upgrade Q_1 to P_1 status, then do a primality check on each U_l
2138 : * and sort it back into the list either as a Q_j or as a C_k. If during the
2139 : * insertion sort we discover that some U_l equals some P_i or Q_j or C_k we
2140 : * already have, we just add U_l's exponent to that of its twin. (The sorting
2141 : * therefore happens before the primality test). Since this may produce one or
2142 : * more elements smaller than the P_1 we just confirmed, we may have to repeat
2143 : * the iteration.
2144 : * A trick avoids some Q_1 instances: just after the sweep classifying
2145 : * all current unknowns as either composites or primes, we do another downward
2146 : * sweep beginning with the largest current factor and stopping just above the
2147 : * largest current composite. Every Q_j we pass is turned into a P_i.
2148 : * (Different primes are automatically coprime among each other, and primes do
2149 : * not divide smaller composites.)
2150 : * NB: We have no use for comparing the square of a prime to N0. Normally
2151 : * we will get called after casting out only the smallest primes, and
2152 : * since we cannot guarantee that we see the large prime factors in as-
2153 : * cending order, we cannot stop when we find one larger than sqrt(N0). */
2154 :
2155 : /* Data structure: We keep everything in a single t_VEC of t_INTs. The
2156 : * first 2 components are read-only:
2157 : * 1) the first records whether we're doing full (NULL) or Moebius (gen_1)
2158 : * factorization; in the latter case subroutines return a sentinel value as
2159 : * soon as they spot an exponent > 1.
2160 : * 2) the second records the hint from factorint()'s optional flag, for use by
2161 : * ifac_crack().
2162 : *
2163 : * The remaining components (initially 15) are used in groups of three:
2164 : * [ factor (t_INT), exponent (t_INT), factor class ], where factor class is
2165 : * NULL : unknown
2166 : * gen_0: known composite C_k
2167 : * gen_1: known prime Q_j awaiting trial division
2168 : * gen_2: finished prime P_i.
2169 : * When during the division stage we re-sort a C_k-turned-U_l to a lower
2170 : * position, we rotate any intervening material upward towards its old
2171 : * slot. When a C_k was divided down to 1, its slot is left empty at
2172 : * first; similarly when the re-sorting detects a repeated factor.
2173 : * After the sorting phase, we de-fragment the list and squeeze all the
2174 : * occupied slots together to the high end, so that ifac_crack() has room
2175 : * for new factors. When this doesn't suffice, we abandon the current vector
2176 : * and allocate a somewhat larger one, defragmenting again while copying.
2177 : *
2178 : * For internal use: note that all exponents will fit into C longs, given
2179 : * PARI's lgefint field size. When we work with them, we sometimes read
2180 : * out the GEN pointer, and sometimes do an itos, whatever is more con-
2181 : * venient for the task at hand. */
2182 :
2183 : /*** Overview ***/
2184 :
2185 : /* The '*where' argument in the following points into *partial at the first of
2186 : * the three fields of the first occupied slot. It's there because the caller
2187 : * would already know where 'here' is, so we don't want to search for it again.
2188 : * We do not preserve this from one user-interface call to the next. */
2189 :
2190 : /* In the most cases, control flows from the user interface to ifac_main() and
2191 : * then to a succession of ifac_crack()s and ifac_divide()s, with (typically)
2192 : * none of the latter finding anything. */
2193 :
2194 : #define LAST(x) x+lg(x)-3
2195 : #define FIRST(x) x+3
2196 :
2197 : #define MOEBIUS(x) gel(x,1)
2198 : #define HINT(x) gel(x,2)
2199 :
2200 : /* y <- x */
2201 : INLINE void
2202 0 : SHALLOWCOPY(GEN x, GEN y) {
2203 0 : VALUE(y) = VALUE(x);
2204 0 : EXPON(y) = EXPON(x);
2205 0 : CLASS(y) = CLASS(x);
2206 0 : }
2207 : /* y <- x */
2208 : INLINE void
2209 0 : COPY(GEN x, GEN y) {
2210 0 : icopyifstack(VALUE(x), VALUE(y));
2211 0 : icopyifstack(EXPON(x), EXPON(y));
2212 0 : CLASS(y) = CLASS(x);
2213 0 : }
2214 :
2215 : /* Diagnostics */
2216 : static void
2217 0 : ifac_factor_dbg(GEN x)
2218 : {
2219 0 : GEN c = CLASS(x), v = VALUE(x);
2220 0 : if (c == gen_2) err_printf("IFAC: factor %Ps\n\tis prime (finished)\n", v);
2221 0 : else if (c == gen_1) err_printf("IFAC: factor %Ps\n\tis prime\n", v);
2222 0 : else if (c == gen_0) err_printf("IFAC: factor %Ps\n\tis composite\n", v);
2223 0 : }
2224 : static void
2225 0 : ifac_check(GEN partial, GEN where)
2226 : {
2227 0 : if (!where || where < FIRST(partial) || where > LAST(partial))
2228 0 : pari_err_BUG("ifac_check ['where' out of bounds]");
2229 0 : }
2230 : static void
2231 0 : ifac_print(GEN part, GEN where)
2232 : {
2233 0 : long l = lg(part);
2234 : GEN p;
2235 :
2236 0 : err_printf("ifac partial factorization structure: %ld slots, ", (l-3)/3);
2237 0 : if (MOEBIUS(part)) err_printf("Moebius mode, ");
2238 0 : err_printf("hint = %ld\n", itos(HINT(part)));
2239 0 : ifac_check(part, where);
2240 0 : for (p = part+3; p < part + l; p += 3)
2241 : {
2242 0 : GEN v = VALUE(p), e = EXPON(p), c = CLASS(p);
2243 0 : const char *s = "";
2244 0 : if (!v) { err_printf("[empty slot]\n"); continue; }
2245 0 : if (c == NULL) s = "unknown";
2246 0 : else if (c == gen_0) s = "composite";
2247 0 : else if (c == gen_1) s = "unfinished prime";
2248 0 : else if (c == gen_2) s = "prime";
2249 0 : else pari_err_BUG("unknown factor class");
2250 0 : err_printf("[%Ps, %Ps, %s]\n", v, e, s);
2251 : }
2252 0 : err_printf("Done.\n");
2253 0 : }
2254 :
2255 : static const long decomp_default_hint = 0;
2256 : /* assume n > 0, which we can assign to */
2257 : /* return initial data structure, see ifac_crack() for the hint argument */
2258 : static GEN
2259 6364 : ifac_start_hint(GEN n, int moebius, long hint)
2260 : {
2261 6364 : const long ifac_initial_length = 3 + 7*3;
2262 : /* codeword, moebius, hint, 7 slots -- a 512-bit product of distinct 8-bit
2263 : * primes needs at most 7 slots at a time) */
2264 6364 : GEN here, part = cgetg(ifac_initial_length, t_VEC);
2265 :
2266 6364 : MOEBIUS(part) = moebius? gen_1 : NULL;
2267 6364 : HINT(part) = stoi(hint);
2268 : /* fill first slot at the top end */
2269 6364 : here = part + ifac_initial_length - 3; /* LAST(part) */
2270 6364 : INIT(here, n,gen_1,gen_0); /* n^1: composite */
2271 44548 : while ((here -= 3) > part) ifac_delete(here);
2272 6364 : return part;
2273 : }
2274 : GEN
2275 1904 : ifac_start(GEN n, int moebius)
2276 1904 : { return ifac_start_hint(n,moebius,decomp_default_hint); }
2277 :
2278 : /* Return next nonempty slot after 'here', NULL if none exist */
2279 : static GEN
2280 18442 : ifac_find(GEN partial)
2281 : {
2282 18442 : GEN scan, end = partial + lg(partial);
2283 :
2284 : #ifdef IFAC_DEBUG
2285 : ifac_check(partial, partial);
2286 : #endif
2287 135045 : for (scan = partial+3; scan < end; scan += 3)
2288 128912 : if (VALUE(scan)) return scan;
2289 6133 : return NULL;
2290 : }
2291 :
2292 : /* Defragment: squeeze out unoccupied slots above *where. Unoccupied slots
2293 : * arise when a composite factor dissolves completely whilst dividing off a
2294 : * prime, or when ifac_resort() spots a coincidence and merges two factors.
2295 : * Update *where */
2296 : static void
2297 224 : ifac_defrag(GEN *partial, GEN *where)
2298 : {
2299 224 : GEN scan_new = LAST(*partial), scan_old;
2300 :
2301 672 : for (scan_old = scan_new; scan_old >= *where; scan_old -= 3)
2302 : {
2303 448 : if (!VALUE(scan_old)) continue; /* empty slot */
2304 448 : if (scan_old < scan_new) SHALLOWCOPY(scan_old, scan_new);
2305 448 : scan_new -= 3; /* point at next slot to be written */
2306 : }
2307 224 : scan_new += 3; /* back up to last slot written */
2308 224 : *where = scan_new;
2309 1344 : while ((scan_new -= 3) > *partial) ifac_delete(scan_new); /* erase junk */
2310 224 : }
2311 :
2312 : /* Move to a larger main vector, updating *where if it points into it, and
2313 : * *partial in any case. Can be used as a specialized gcopy before
2314 : * a gerepileupto() (pass 0 as the new length). Normally, one would pass
2315 : * new_lg=1 to let this function guess the new size. To be used sparingly.
2316 : * Complex version of ifac_defrag(), combined with reallocation. If new_lg
2317 : * is 0, use the old length, so this acts just like gcopy except that the
2318 : * 'where' pointer is carried along; if it is 1, we make an educated guess.
2319 : * Exception: If new_lg is 0, the vector is full to the brim, and the first
2320 : * entry is composite, we make it longer to avoid being called again a
2321 : * microsecond later. It is safe to call this with *where = NULL:
2322 : * if it doesn't point anywhere within the old structure, it is left alone */
2323 : static void
2324 0 : ifac_realloc(GEN *partial, GEN *where, long new_lg)
2325 : {
2326 0 : long old_lg = lg(*partial);
2327 : GEN newpart, scan_new, scan_old;
2328 :
2329 0 : if (new_lg == 1)
2330 0 : new_lg = 2*old_lg - 6; /* from 7 slots to 13 to 25... */
2331 0 : else if (new_lg <= old_lg) /* includes case new_lg == 0 */
2332 : {
2333 0 : GEN first = *partial + 3;
2334 0 : new_lg = old_lg;
2335 : /* structure full and first entry composite or unknown */
2336 0 : if (VALUE(first) && (CLASS(first) == gen_0 || CLASS(first)==NULL))
2337 0 : new_lg += 6; /* give it a little more breathing space */
2338 : }
2339 0 : newpart = cgetg(new_lg, t_VEC);
2340 0 : if (DEBUGMEM >= 3)
2341 0 : err_printf("IFAC: new partial factorization structure (%ld slots)\n",
2342 0 : (new_lg - 3)/3);
2343 0 : MOEBIUS(newpart) = MOEBIUS(*partial);
2344 0 : icopyifstack(HINT(*partial), HINT(newpart));
2345 : /* Downward sweep through the old *partial. Pick up 'where' and carry it
2346 : * over if we pass it. (Only useful if it pointed at a nonempty slot.)
2347 : * Factors are COPY'd so that we again have a nice object (parent older
2348 : * than children, connected), except the one factor that may still be living
2349 : * in a clone where n originally was; exponents are similarly copied if they
2350 : * aren't global constants; class-of-factor fields are global constants so we
2351 : * need only copy them as pointers. Caller may then do a gerepileupto() */
2352 0 : scan_new = newpart + new_lg - 3; /* LAST(newpart) */
2353 0 : scan_old = *partial + old_lg - 3; /* LAST(*partial) */
2354 0 : for (; scan_old > *partial + 2; scan_old -= 3)
2355 : {
2356 0 : if (*where == scan_old) *where = scan_new;
2357 0 : if (!VALUE(scan_old)) continue; /* skip empty slots */
2358 0 : COPY(scan_old, scan_new); scan_new -= 3;
2359 : }
2360 0 : scan_new += 3; /* back up to last slot written */
2361 0 : while ((scan_new -= 3) > newpart) ifac_delete(scan_new);
2362 0 : *partial = newpart;
2363 0 : }
2364 :
2365 : /* Re-sort one (typically unknown) entry from washere to a new position,
2366 : * rotating intervening entries upward to fill the vacant space. If the new
2367 : * position is the same as the old one, or the new value of the entry coincides
2368 : * with a value already occupying a lower slot, then we just add exponents (and
2369 : * use the 'more known' class, and return 1 immediately when in Moebius mode).
2370 : * Slots between *where and washere must be in sorted order, so a sweep using
2371 : * this to re-sort several unknowns must proceed upward, see ifac_resort().
2372 : * Bubble-sort-of-thing sort. Won't be exercised frequently, so this is ok */
2373 : static void
2374 112 : ifac_sort_one(GEN *where, GEN washere)
2375 : {
2376 112 : GEN old, scan = washere - 3;
2377 : GEN value, exponent, class0, class1;
2378 : long cmp_res;
2379 :
2380 112 : if (scan < *where) return; /* nothing to do, washere==*where */
2381 112 : value = VALUE(washere);
2382 112 : exponent = EXPON(washere);
2383 112 : class0 = CLASS(washere);
2384 112 : cmp_res = -1; /* sentinel */
2385 112 : while (scan >= *where) /* at least once */
2386 : {
2387 112 : if (VALUE(scan))
2388 : { /* current slot nonempty, check against where */
2389 112 : cmp_res = cmpii(value, VALUE(scan));
2390 112 : if (cmp_res >= 0) break; /* have found where to stop */
2391 : }
2392 : /* copy current slot upward by one position and move pointers down */
2393 0 : SHALLOWCOPY(scan, scan+3);
2394 0 : scan -= 3;
2395 : }
2396 112 : scan += 3;
2397 : /* At this point there are the following possibilities:
2398 : * 1) cmp_res == -1. Either value is less than that at *where, or *where was
2399 : * pointing at vacant slots and any factors we saw en route were larger than
2400 : * value. At any rate, scan == *where now, and scan is pointing at an empty
2401 : * slot, into which we'll stash our entry.
2402 : * 2) cmp_res == 0. The entry at scan-3 is the one, we compare class0
2403 : * fields and add exponents, and put it all into the vacated scan slot,
2404 : * NULLing the one at scan-3 (and possibly updating *where).
2405 : * 3) cmp_res == 1. The slot at scan is the one to store our entry into. */
2406 112 : if (cmp_res)
2407 : {
2408 112 : if (cmp_res < 0 && scan != *where)
2409 0 : pari_err_BUG("ifact_sort_one [misaligned partial]");
2410 112 : INIT(scan, value, exponent, class0); return;
2411 : }
2412 : /* case cmp_res == 0: repeated factor detected */
2413 0 : if (DEBUGLEVEL >= 4)
2414 0 : err_printf("IFAC: repeated factor %Ps\n\tin ifac_sort_one\n", value);
2415 0 : old = scan - 3;
2416 : /* if old class0 was composite and new is prime, or vice versa, complain
2417 : * (and if one class0 was unknown and the other wasn't, use the known one) */
2418 0 : class1 = CLASS(old);
2419 0 : if (class0) /* should never be used */
2420 : {
2421 0 : if (class1)
2422 : {
2423 0 : if (class0 == gen_0 && class1 != gen_0)
2424 0 : pari_err_BUG("ifac_sort_one (composite = prime)");
2425 0 : else if (class0 != gen_0 && class1 == gen_0)
2426 0 : pari_err_BUG("ifac_sort_one (prime = composite)");
2427 0 : else if (class0 == gen_2)
2428 0 : CLASS(scan) = class0;
2429 : }
2430 : else
2431 0 : CLASS(scan) = class0;
2432 : }
2433 : /* else stay with the existing known class0 */
2434 0 : CLASS(scan) = class1;
2435 : /* in any case, add exponents */
2436 0 : if (EXPON(old) == gen_1 && exponent == gen_1)
2437 0 : EXPON(scan) = gen_2;
2438 : else
2439 0 : EXPON(scan) = addii(EXPON(old), exponent);
2440 : /* move the value over and null out the vacated slot below */
2441 0 : old = scan - 3;
2442 0 : *scan = *old;
2443 0 : ifac_delete(old);
2444 : /* finally, see whether *where should be pulled in */
2445 0 : if (old == *where) *where += 3;
2446 : }
2447 :
2448 : /* Sort all current unknowns downward to where they belong. Sweeps in the
2449 : * upward direction. Not needed after ifac_crack(), only when ifac_divide()
2450 : * returned true. Update *where. */
2451 : static void
2452 112 : ifac_resort(GEN *partial, GEN *where)
2453 : {
2454 : GEN scan, end;
2455 112 : ifac_defrag(partial, where); end = LAST(*partial);
2456 336 : for (scan = *where; scan <= end; scan += 3)
2457 224 : if (VALUE(scan) && !CLASS(scan)) ifac_sort_one(where, scan); /*unknown*/
2458 112 : ifac_defrag(partial, where); /* remove newly created gaps */
2459 112 : }
2460 :
2461 : /* Let x be a t_INT known not to have small divisors (< 2^14). Return 0 if x
2462 : * is a proven composite. Return 1 if we believe it to be prime (fully proven
2463 : * prime if factor_proven is set). */
2464 : int
2465 34214 : ifac_isprime(GEN x)
2466 : {
2467 34214 : if (!BPSW_psp_nosmalldiv(x)) return 0; /* composite */
2468 20515 : if (factor_proven && ! BPSW_isprime(x))
2469 : {
2470 0 : pari_warn(warner,
2471 : "IFAC: pseudo-prime %Ps\n\tis not prime. PLEASE REPORT!\n", x);
2472 0 : return 0;
2473 : }
2474 20515 : return 1;
2475 : }
2476 :
2477 : static int
2478 12377 : ifac_checkprime(GEN x)
2479 : {
2480 12377 : int res = ifac_isprime(VALUE(x));
2481 12377 : CLASS(x) = res? gen_1: gen_0;
2482 12377 : if (DEBUGLEVEL>2) ifac_factor_dbg(x);
2483 12377 : return res;
2484 : }
2485 :
2486 : /* Determine primality or compositeness of all current unknowns, and set
2487 : * class Q primes to finished (class P) if everything larger is already
2488 : * known to be prime. When after_crack >= 0, only look at the
2489 : * first after_crack things in the list (do nothing when it's 0) */
2490 : static void
2491 6464 : ifac_whoiswho(GEN *partial, GEN *where, long after_crack)
2492 : {
2493 6464 : GEN scan, scan_end = LAST(*partial);
2494 :
2495 : #ifdef IFAC_DEBUG
2496 : ifac_check(*partial, *where);
2497 : #endif
2498 6464 : if (after_crack == 0) return;
2499 5766 : if (after_crack > 0) /* check at most after_crack entries */
2500 5654 : scan = *where + 3*(after_crack - 1); /* assert(scan <= scan_end) */
2501 : else
2502 322 : for (scan = scan_end; scan >= *where; scan -= 3)
2503 : {
2504 217 : if (CLASS(scan))
2505 : { /* known class of factor */
2506 105 : if (CLASS(scan) == gen_0) break;
2507 105 : if (CLASS(scan) == gen_1)
2508 : {
2509 0 : if (DEBUGLEVEL>=3)
2510 : {
2511 0 : err_printf("IFAC: factor %Ps\n\tis prime (no larger composite)\n",
2512 0 : VALUE(*where));
2513 0 : err_printf("IFAC: prime %Ps\n\tappears with exponent = %ld\n",
2514 0 : VALUE(*where), itos(EXPON(*where)));
2515 : }
2516 0 : CLASS(scan) = gen_2;
2517 : }
2518 105 : continue;
2519 : }
2520 112 : if (!ifac_checkprime(scan)) break; /* must disable Q-to-P */
2521 105 : CLASS(scan) = gen_2; /* P_i, finished prime */
2522 105 : if (DEBUGLEVEL>2) ifac_factor_dbg(scan);
2523 : }
2524 : /* go on, Q-to-P trick now disabled */
2525 17263 : for (; scan >= *where; scan -= 3)
2526 : {
2527 11497 : if (CLASS(scan)) continue;
2528 11476 : (void)ifac_checkprime(scan); /* Qj | Ck */
2529 : }
2530 : }
2531 :
2532 : /* Divide all current composites by first (prime, class Q) entry, updating its
2533 : * exponent, and turning it into a finished prime (class P). Return 1 if any
2534 : * such divisions succeeded (in Moebius mode, the update may then not have
2535 : * been completed), or 0 if none of them succeeded. Doesn't modify *where.
2536 : * Here we normally do not check that the first entry is a not-finished
2537 : * prime. Stack management: we may allocate a new exponent */
2538 : static long
2539 11582 : ifac_divide(GEN *partial, GEN *where, long moebius_mode)
2540 : {
2541 11582 : GEN scan, scan_end = LAST(*partial);
2542 11582 : long res = 0, exponent, newexp, otherexp;
2543 :
2544 : #ifdef IFAC_DEBUG
2545 : ifac_check(*partial, *where);
2546 : if (CLASS(*where) != gen_1)
2547 : pari_err_BUG("ifac_divide [division by composite or finished prime]");
2548 : if (!VALUE(*where)) pari_err_BUG("ifac_divide [division by nothing]");
2549 : #endif
2550 11582 : newexp = exponent = itos(EXPON(*where));
2551 11582 : if (exponent > 1 && moebius_mode) return 1;
2552 : /* should've been caught by caller */
2553 :
2554 17586 : for (scan = *where+3; scan <= scan_end; scan += 3)
2555 : {
2556 6011 : if (CLASS(scan) != gen_0) continue; /* the other thing ain't composite */
2557 431 : otherexp = 0;
2558 : /* divide in place to keep stack clutter minimal */
2559 550 : while (dvdiiz(VALUE(scan), VALUE(*where), VALUE(scan)))
2560 : {
2561 126 : if (moebius_mode) return 1; /* immediately */
2562 119 : if (!otherexp) otherexp = itos(EXPON(scan));
2563 119 : newexp += otherexp;
2564 : }
2565 424 : if (newexp > exponent) /* did anything happen? */
2566 : {
2567 112 : EXPON(*where) = (newexp == 2 ? gen_2 : utoipos(newexp));
2568 112 : exponent = newexp;
2569 112 : if (is_pm1((GEN)*scan)) /* factor dissolved completely */
2570 : {
2571 0 : ifac_delete(scan);
2572 0 : if (DEBUGLEVEL >= 4)
2573 0 : err_printf("IFAC: a factor was a power of another prime factor\n");
2574 : } else {
2575 112 : CLASS(scan) = NULL; /* at any rate it's Unknown now */
2576 112 : if (DEBUGLEVEL >= 4)
2577 0 : err_printf("IFAC: a factor was divisible by another prime factor,\n"
2578 : "\tleaving a cofactor = %Ps\n", VALUE(scan));
2579 : }
2580 112 : res = 1;
2581 112 : if (DEBUGLEVEL >= 5)
2582 0 : err_printf("IFAC: prime %Ps\n\tappears at least to the power %ld\n",
2583 0 : VALUE(*where), newexp);
2584 : }
2585 : } /* for */
2586 11575 : CLASS(*where) = gen_2; /* make it a finished prime */
2587 11575 : if (DEBUGLEVEL >= 3)
2588 0 : err_printf("IFAC: prime %Ps\n\tappears with exponent = %ld\n",
2589 0 : VALUE(*where), newexp);
2590 11575 : return res;
2591 : }
2592 :
2593 : /* found out our integer was factor^exp. Update */
2594 : static void
2595 985 : update_pow(GEN where, GEN factor, long exp, pari_sp *av)
2596 : {
2597 985 : GEN ex = EXPON(where);
2598 985 : if (DEBUGLEVEL>3)
2599 0 : err_printf("IFAC: found %Ps =\n\t%Ps ^%ld\n", *where, factor, exp);
2600 985 : affii(factor, VALUE(where)); set_avma(*av);
2601 985 : if (ex == gen_1)
2602 782 : { EXPON(where) = exp == 2? gen_2: utoipos(exp); *av = avma; }
2603 203 : else if (ex == gen_2)
2604 182 : { EXPON(where) = utoipos(exp<<1); *av = avma; }
2605 : else
2606 21 : affsi(exp * itos(ex), EXPON(where));
2607 985 : }
2608 : /* hint = 0 : Use a default strategy
2609 : * hint & 1 : avoid MPQS
2610 : * hint & 2 : avoid first-stage ECM (may fall back to ECM if MPQS gives up)
2611 : * hint & 4 : avoid Pollard and SQUFOF stages.
2612 : * hint & 8 : avoid final ECM; may flag a composite as prime. */
2613 : #define get_hint(partial) (itos(HINT(*partial)) & 15)
2614 :
2615 : /* Complete ifac_crack's job when a factoring engine splits the current factor
2616 : * into a product of three or more new factors. Makes room for them if
2617 : * necessary, sorts them, gives them the right exponents and class. Returns the
2618 : * number of factors actually written, which may be less than #facvec if there
2619 : * are duplicates. Vectors of factors (cf pollardbrent() or mpqs()) contain
2620 : * 'slots' of 3 GENs per factor, interpreted as in our partial factorization
2621 : * data structure. Thus engines can tell us what they already know about
2622 : * factors being prime/composite or appearing to a power larger than thefirst.
2623 : * Don't collect garbage. No diagnostics: engine has printed what it found.
2624 : * facvec contains slots of three components per factor; repeated factors are
2625 : * allowed (their classes shouldn't contradict each other whereas their
2626 : * exponents will be added up) */
2627 : static long
2628 1220 : ifac_insert_multiplet(GEN *partial, GEN *where, GEN facvec, long moebius_mode)
2629 : {
2630 1220 : long j,k=1, lfv=lg(facvec)-1, nf=lfv/3, room=(long)(*where-*partial);
2631 : /* one of the factors will go into the *where slot, so room is now 3 times
2632 : * the number of slots we can use */
2633 1220 : long needroom = lfv - room;
2634 1220 : GEN newexp, cur, sorted, auxvec = cgetg(nf+1, t_VEC), factor;
2635 1220 : long E = itos(EXPON(*where)); /* the old exponent */
2636 :
2637 1220 : if (DEBUGLEVEL >= 5) /* squfof may return a single squared factor as a set */
2638 0 : err_printf("IFAC: incorporating set of %ld factor(s)\n", nf);
2639 1220 : if (needroom > 0) ifac_realloc(partial, where, lg(*partial) + needroom);
2640 :
2641 : /* create sort permutation from the values of the factors */
2642 3842 : for (j=nf; j; j--) gel(auxvec,j) = gel(facvec,3*j-2);
2643 1220 : sorted = ZV_indexsort(auxvec);
2644 : /* store factors, beginning at *where, and catching any duplicates */
2645 1220 : cur = facvec + 3*sorted[nf]-2; /* adjust for triple spacing */
2646 1220 : VALUE(*where) = VALUE(cur);
2647 1220 : newexp = EXPON(cur);
2648 : /* if new exponent is 1, the old exponent already in place will do */
2649 1220 : if (newexp != gen_1) EXPON(*where) = mului(E,newexp);
2650 1220 : CLASS(*where) = CLASS(cur);
2651 1220 : if (DEBUGLEVEL >= 6) err_printf("\tstored (largest) factor no. %ld...\n", nf);
2652 :
2653 2622 : for (j=nf-1; j; j--)
2654 : {
2655 : GEN e;
2656 1402 : cur = facvec + 3*sorted[j]-2;
2657 1402 : factor = VALUE(cur);
2658 1402 : if (equalii(factor, VALUE(*where)))
2659 : {
2660 0 : if (DEBUGLEVEL >= 6)
2661 0 : err_printf("\tfactor no. %ld is a duplicate%s\n", j, (j>1? "...": ""));
2662 : /* update exponent, ignore class which would already have been set,
2663 : * then forget current factor */
2664 0 : newexp = EXPON(cur);
2665 0 : if (newexp != gen_1) /* new exp > 1 */
2666 0 : e = addiu(EXPON(*where), E * itou(newexp));
2667 0 : else if (EXPON(*where) == gen_1 && E == 1)
2668 0 : e = gen_2;
2669 : else
2670 0 : e = addiu(EXPON(*where), E);
2671 0 : EXPON(*where) = e;
2672 :
2673 0 : if (moebius_mode) return 0; /* stop now, with exponent updated */
2674 0 : continue;
2675 : }
2676 :
2677 1402 : *where -= 3;
2678 1402 : CLASS(*where) = CLASS(cur); /* class as given */
2679 1402 : newexp = EXPON(cur);
2680 1402 : if (newexp != gen_1) /* new exp > 1 */
2681 28 : e = (E == 1 && newexp == gen_2)? gen_2: mului(E, newexp);
2682 : else /* inherit parent's exponent */
2683 1374 : e = (E == 1 ? gen_1 : (E == 2 ? gen_2 : utoipos(E)));
2684 1402 : EXPON(*where) = e;
2685 : /* keep components younger than *partial */
2686 1402 : icopyifstack(factor, VALUE(*where));
2687 1402 : k++;
2688 1402 : if (DEBUGLEVEL >= 6)
2689 0 : err_printf("\tfactor no. %ld was unique%s\n", j, j>1? " (so far)...": "");
2690 : }
2691 1220 : return k;
2692 : }
2693 :
2694 : /* Split the first (composite) entry. There _must_ already be room for another
2695 : * factor below *where, and *where is updated. Two cases:
2696 : * - entry = factor^k is a pure power: factor^k is inserted, leaving *where
2697 : * unchanged;
2698 : * - entry = factor * cofactor (not necessarily coprime): both factors are
2699 : * inserted in the correct order, updating *where
2700 : * The inserted factors class is set to unknown, they inherit the exponent
2701 : * (or a multiple thereof) of their ancestor.
2702 : *
2703 : * Returns number of factors written into the structure, normally 2 (1 if pure
2704 : * power, maybe > 2 if a factoring engine returned a vector of factors instead
2705 : * of a single factor). Can reallocate the data structure in the
2706 : * vector-of-factors case, not in the most common single-factor case.
2707 : * Stack housekeeping: this routine may create one or more objects (a new
2708 : * factor, or possibly several, and perhaps one or more new exponents > 2) */
2709 : static long
2710 6359 : ifac_crack(GEN *partial, GEN *where, long moebius_mode)
2711 : {
2712 6359 : long cmp_res, hint = get_hint(partial);
2713 : GEN factor, exponent;
2714 :
2715 : #ifdef IFAC_DEBUG
2716 : ifac_check(*partial, *where);
2717 : if (*where < *partial + 6)
2718 : pari_err_BUG("ifac_crack ['*where' out of bounds]");
2719 : if (!(VALUE(*where)) || typ(VALUE(*where)) != t_INT)
2720 : pari_err_BUG("ifac_crack [incorrect VALUE(*where)]");
2721 : if (CLASS(*where) != gen_0)
2722 : pari_err_BUG("ifac_crack [operand not known composite]");
2723 : #endif
2724 :
2725 6359 : if (DEBUGLEVEL>2) {
2726 0 : err_printf("IFAC: cracking composite\n\t%Ps\n", **where);
2727 0 : if (DEBUGLEVEL>3) err_printf("IFAC: checking for pure square\n");
2728 : }
2729 : /* MPQS cannot factor prime powers. Look for pure powers even if MPQS is
2730 : * blocked by hint: fast and useful in bounded factorization */
2731 : {
2732 : forprime_t T;
2733 6359 : ulong exp = 1, mask = 7;
2734 6359 : long good = 0;
2735 6359 : pari_sp av = avma;
2736 6359 : (void)u_forprime_init(&T, 11, ULONG_MAX);
2737 : /* crack squares */
2738 7295 : while (Z_issquareall(VALUE(*where), &factor))
2739 : {
2740 943 : good = 1; /* remember we succeeded once */
2741 943 : update_pow(*where, factor, 2, &av);
2742 1641 : if (moebius_mode) return 0; /* no need to carry on */
2743 : }
2744 6394 : while ( (exp = is_357_power(VALUE(*where), &factor, &mask)) )
2745 : {
2746 42 : good = 1; /* remember we succeeded once */
2747 42 : update_pow(*where, factor, exp, &av);
2748 42 : if (moebius_mode) return 0; /* no need to carry on */
2749 : }
2750 : /* cutoff at 14 bits as trial division must have found everything below */
2751 6352 : while ( (exp = is_pth_power(VALUE(*where), &factor, &T, 15)) )
2752 : {
2753 0 : good = 1; /* remember we succeeded once */
2754 0 : update_pow(*where, factor, exp, &av);
2755 0 : if (moebius_mode) return 0; /* no need to carry on */
2756 : }
2757 :
2758 6352 : if (good && hint != 15 && ifac_checkprime(*where))
2759 : { /* our composite was a prime power */
2760 698 : if (DEBUGLEVEL>3)
2761 0 : err_printf("IFAC: factor %Ps\n\tis prime\n", VALUE(*where));
2762 698 : return 0; /* bypass subsequent ifac_whoiswho() call */
2763 : }
2764 : } /* pure power stage */
2765 :
2766 5654 : factor = NULL;
2767 5654 : if (!(hint & 4))
2768 : { /* pollardbrent() Rho usually gets a first chance */
2769 5605 : if (DEBUGLEVEL >= 4) err_printf("IFAC: trying Pollard-Brent rho method\n");
2770 5605 : factor = pollardbrent(VALUE(*where));
2771 5605 : if (!factor)
2772 : { /* Shanks' squfof() */
2773 3427 : if (DEBUGLEVEL >= 4)
2774 0 : err_printf("IFAC: trying Shanks' SQUFOF, will fail silently if input\n"
2775 : " is too large for it.\n");
2776 3427 : factor = squfof(VALUE(*where));
2777 : }
2778 : }
2779 5654 : if (!factor && !(hint & 2))
2780 : { /* First ECM stage */
2781 1077 : if (DEBUGLEVEL >= 4) err_printf("IFAC: trying Lenstra-Montgomery ECM\n");
2782 1077 : factor = ellfacteur(VALUE(*where), 0); /* do not insist */
2783 : }
2784 5654 : if (!factor && !(hint & 1))
2785 : { /* MPQS stage */
2786 1101 : if (DEBUGLEVEL >= 4) err_printf("IFAC: trying MPQS\n");
2787 1101 : factor = mpqs(VALUE(*where));
2788 : }
2789 5654 : if (!factor)
2790 : {
2791 14 : if (!(hint & 8))
2792 : { /* still no luck? Final ECM stage, guaranteed to succeed */
2793 7 : if (DEBUGLEVEL >= 4)
2794 0 : err_printf("IFAC: forcing ECM, may take some time\n");
2795 7 : factor = ellfacteur(VALUE(*where), 1);
2796 : }
2797 : else
2798 : { /* limited factorization */
2799 7 : if (DEBUGLEVEL >= 2)
2800 : {
2801 0 : if (hint != 15)
2802 0 : pari_warn(warner, "IFAC: unfactored composite declared prime");
2803 : else
2804 0 : pari_warn(warner, "IFAC: untested integer declared prime");
2805 :
2806 : /* don't print it out at level 3 or above, where it would appear
2807 : * several times before and after this message already */
2808 0 : if (DEBUGLEVEL == 2) err_printf("\t%Ps\n", VALUE(*where));
2809 : }
2810 7 : CLASS(*where) = gen_1; /* might as well trial-divide by it... */
2811 7 : return 1;
2812 : }
2813 : }
2814 5647 : if (typ(factor) == t_VEC) /* delegate this case */
2815 1220 : return ifac_insert_multiplet(partial, where, factor, moebius_mode);
2816 : /* typ(factor) == t_INT */
2817 : /* got single integer back: work out the cofactor (in place) */
2818 4427 : if (!dvdiiz(VALUE(*where), factor, VALUE(*where)))
2819 : {
2820 0 : err_printf("IFAC: factoring %Ps\n", VALUE(*where));
2821 0 : err_printf("\tyielded 'factor' %Ps\n\twhich isn't!\n", factor);
2822 0 : pari_err_BUG("factoring");
2823 : }
2824 : /* factoring engines report the factor found; tell about the cofactor */
2825 4427 : if (DEBUGLEVEL >= 4) err_printf("IFAC: cofactor = %Ps\n", VALUE(*where));
2826 :
2827 : /* The two factors are 'factor' and VALUE(*where), find out which is larger */
2828 4427 : cmp_res = cmpii(factor, VALUE(*where));
2829 4427 : CLASS(*where) = NULL; /* mark factor /cofactor 'unknown' */
2830 4427 : exponent = EXPON(*where);
2831 4427 : *where -= 3;
2832 4427 : CLASS(*where) = NULL; /* mark factor /cofactor 'unknown' */
2833 4427 : icopyifstack(exponent, EXPON(*where));
2834 4427 : if (cmp_res < 0)
2835 3762 : VALUE(*where) = factor; /* common case */
2836 665 : else if (cmp_res > 0)
2837 : { /* factor > cofactor, rearrange */
2838 665 : GEN old = *where + 3;
2839 665 : VALUE(*where) = VALUE(old); /* move cofactor pointer to lowest slot */
2840 665 : VALUE(old) = factor; /* save factor */
2841 : }
2842 0 : else pari_err_BUG("ifac_crack [Z_issquareall miss]");
2843 4427 : return 2;
2844 : }
2845 :
2846 : /* main loop: iterate until smallest entry is a finished prime; returns
2847 : * a 'where' pointer, or NULL if nothing left, or gen_0 in Moebius mode if
2848 : * we aren't squarefree */
2849 : static GEN
2850 17488 : ifac_main(GEN *partial)
2851 : {
2852 17488 : const long moebius_mode = !!MOEBIUS(*partial);
2853 17488 : GEN here = ifac_find(*partial);
2854 : long nf;
2855 :
2856 17488 : if (!here) return NULL; /* nothing left */
2857 : /* loop until first entry is a finished prime. May involve reallocations,
2858 : * thus updates of *partial */
2859 29600 : while (CLASS(here) != gen_2)
2860 : {
2861 17941 : if (CLASS(here) == gen_0) /* composite: crack it */
2862 : { /* make sure there's room for another factor */
2863 6359 : if (here < *partial + 6)
2864 : {
2865 0 : ifac_defrag(partial, &here);
2866 0 : if (here < *partial + 6) ifac_realloc(partial, &here, 1); /* no luck */
2867 : }
2868 6359 : nf = ifac_crack(partial, &here, moebius_mode);
2869 6359 : if (moebius_mode && EXPON(here) != gen_1) /* that was a power */
2870 : {
2871 7 : if (DEBUGLEVEL >= 3)
2872 0 : err_printf("IFAC: main loop: repeated new factor\n\t%Ps\n", *here);
2873 7 : return gen_0;
2874 : }
2875 : /* deal with the new unknowns. No sort: ifac_crack did it */
2876 6352 : ifac_whoiswho(partial, &here, nf);
2877 6352 : continue;
2878 : }
2879 11582 : if (CLASS(here) == gen_1) /* prime but not yet finished: finish it */
2880 : {
2881 11582 : if (ifac_divide(partial, &here, moebius_mode))
2882 : {
2883 119 : if (moebius_mode)
2884 : {
2885 7 : if (DEBUGLEVEL >= 3)
2886 0 : err_printf("IFAC: main loop: another factor was divisible by\n"
2887 : "\t%Ps\n", *here);
2888 7 : return gen_0;
2889 : }
2890 112 : ifac_resort(partial, &here); /* sort new cofactors down */
2891 112 : ifac_whoiswho(partial, &here, -1);
2892 : }
2893 11575 : continue;
2894 : }
2895 0 : pari_err_BUG("ifac_main [nonexistent factor class]");
2896 : } /* while */
2897 11659 : if (moebius_mode && EXPON(here) != gen_1)
2898 : {
2899 0 : if (DEBUGLEVEL >= 3)
2900 0 : err_printf("IFAC: after main loop: repeated old factor\n\t%Ps\n", *here);
2901 0 : return gen_0;
2902 : }
2903 11659 : if (DEBUGLEVEL >= 4)
2904 : {
2905 0 : nf = (*partial + lg(*partial) - here - 3)/3;
2906 0 : if (nf)
2907 0 : err_printf("IFAC: main loop: %ld factor%s left\n", nf, (nf>1)? "s": "");
2908 : else
2909 0 : err_printf("IFAC: main loop: this was the last factor\n");
2910 : }
2911 11659 : if (factor_add_primes && !(get_hint(partial) & 8))
2912 : {
2913 0 : GEN p = VALUE(here);
2914 0 : if (lgefint(p)>3 || uel(p,2) > 0x1000000UL) (void)addprimes(p);
2915 : }
2916 11659 : return here;
2917 : }
2918 :
2919 : /* Encapsulated routines */
2920 :
2921 : /* prime/exponent pairs need to appear contiguously on the stack, but we also
2922 : * need our data structure somewhere, and we don't know in advance how many
2923 : * primes will turn up. The following discipline achieves this: When
2924 : * ifac_decomp() is called, n should point at an object older than the oldest
2925 : * small prime/exponent pair (ifactor() guarantees this).
2926 : * We allocate sufficient space to accommodate several pairs -- eleven pairs
2927 : * ought to fit in a space not much larger than n itself -- before calling
2928 : * ifac_start(). If we manage to complete the factorization before we run out
2929 : * of space, we free the data structure and cull the excess reserved space
2930 : * before returning. When we do run out, we have to leapfrog to generate more
2931 : * (guesstimating the requirements from what is left in the partial
2932 : * factorization structure); room for fresh pairs is allocated at the head of
2933 : * the stack, followed by an ifac_realloc() to reconnect the data structure and
2934 : * move it out of the way, followed by a few pointer tweaks to connect the new
2935 : * pairs space to the old one. This whole affair translates into a surprisingly
2936 : * compact routine. */
2937 :
2938 : /* find primary factors of n; destroy n */
2939 : static long
2940 3365 : ifac_decomp(GEN n, long hint)
2941 : {
2942 3365 : pari_sp av = avma;
2943 3365 : long nb = 0;
2944 3365 : GEN part, here, workspc, pairs = (GEN)av;
2945 :
2946 : /* workspc will be doled out in pairs of smaller t_INTs. For n = prod p^{e_p}
2947 : * (p not necessarily prime), need room to store all p and e_p [ cgeti(3) ],
2948 : * bounded by
2949 : * sum_{p | n} ( log_{2^BIL} (p) + 6 ) <= log_{2^BIL} n + 6 log_2 n */
2950 3365 : workspc = new_chunk((expi(n) + 1) * 7);
2951 3365 : part = ifac_start_hint(n, 0, hint);
2952 : for (;;)
2953 : {
2954 9983 : here = ifac_main(&part);
2955 9983 : if (!here) break;
2956 6618 : if (gc_needed(av,1))
2957 : {
2958 : long offset;
2959 0 : if(DEBUGMEM>1)
2960 : {
2961 0 : pari_warn(warnmem,"[2] ifac_decomp");
2962 0 : ifac_print(part, here);
2963 : }
2964 0 : ifac_realloc(&part, &here, 0);
2965 0 : offset = here - part;
2966 0 : part = gerepileupto((pari_sp)workspc, part);
2967 0 : here = part + offset;
2968 : }
2969 6618 : nb++;
2970 6618 : pairs = icopy_avma(VALUE(here), (pari_sp)pairs);
2971 6618 : pairs = icopy_avma(EXPON(here), (pari_sp)pairs);
2972 6618 : ifac_delete(here);
2973 : }
2974 3365 : set_avma((pari_sp)pairs);
2975 3365 : if (DEBUGLEVEL >= 3)
2976 0 : err_printf("IFAC: found %ld large prime (power) factor%s.\n",
2977 : nb, (nb>1? "s": ""));
2978 3365 : return nb;
2979 : }
2980 :
2981 : /***********************************************************************/
2982 : /** ARITHMETIC FUNCTIONS WITH EARLY-ABORT **/
2983 : /** needing direct access to the factoring machinery to avoid work: **/
2984 : /** e.g. if we find a square factor, moebius returns 0, core doesn't **/
2985 : /** need to factor it, etc. **/
2986 : /***********************************************************************/
2987 : /* memory management */
2988 : static void
2989 0 : ifac_GC(pari_sp av, GEN *part)
2990 : {
2991 0 : GEN here = NULL;
2992 0 : if(DEBUGMEM>1) pari_warn(warnmem,"ifac_xxx");
2993 0 : ifac_realloc(part, &here, 0);
2994 0 : *part = gerepileupto(av, *part);
2995 0 : }
2996 :
2997 : /* destroys n */
2998 : static long
2999 211 : ifac_moebius(GEN n)
3000 : {
3001 211 : long mu = 1;
3002 211 : pari_sp av = avma;
3003 211 : GEN part = ifac_start(n, 1);
3004 : for(;;)
3005 394 : {
3006 : long v;
3007 : GEN p;
3008 605 : if (!ifac_next(&part,&p,&v)) return v? 0: mu;
3009 394 : mu = -mu;
3010 394 : if (gc_needed(av,1)) ifac_GC(av,&part);
3011 : }
3012 : }
3013 :
3014 : int
3015 658 : ifac_read(GEN part, GEN *p, long *e)
3016 : {
3017 658 : GEN here = ifac_find(part);
3018 658 : if (!here) return 0;
3019 340 : *p = VALUE(here);
3020 340 : *e = EXPON(here)[2];
3021 340 : return 1;
3022 : }
3023 : void
3024 296 : ifac_skip(GEN part)
3025 : {
3026 296 : GEN here = ifac_find(part);
3027 296 : if (here) ifac_delete(here);
3028 296 : }
3029 :
3030 : /* destroys n */
3031 : static int
3032 7 : ifac_ispowerful(GEN n)
3033 : {
3034 7 : pari_sp av = avma;
3035 7 : GEN part = ifac_start(n, 0);
3036 : for(;;)
3037 7 : {
3038 : long e;
3039 : GEN p;
3040 14 : if (!ifac_read(part,&p,&e)) return 1;
3041 : /* power: skip */
3042 7 : if (e != 1 || Z_isanypower(p,NULL)) { ifac_skip(part); continue; }
3043 0 : if (!ifac_next(&part,&p,&e)) return 1;
3044 0 : if (e == 1) return 0;
3045 0 : if (gc_needed(av,1)) ifac_GC(av,&part);
3046 : }
3047 : }
3048 : /* destroys n */
3049 : static GEN
3050 311 : ifac_core(GEN n)
3051 : {
3052 311 : GEN m = gen_1, c = cgeti(lgefint(n));
3053 311 : pari_sp av = avma;
3054 311 : GEN part = ifac_start(n, 0);
3055 : for(;;)
3056 333 : {
3057 : long e;
3058 : GEN p;
3059 644 : if (!ifac_read(part,&p,&e)) return m;
3060 : /* square: skip */
3061 333 : if (!odd(e) || Z_issquare(p)) { ifac_skip(part); continue; }
3062 44 : if (!ifac_next(&part,&p,&e)) return m;
3063 44 : if (odd(e)) m = mulii(m, p);
3064 44 : if (gc_needed(av,1)) { affii(m,c); m=c; ifac_GC(av,&part); }
3065 : }
3066 : }
3067 :
3068 : /* Where to stop trial dividing in factorization. Guaranteed >= 2^14 */
3069 : ulong
3070 1019123 : tridiv_bound(GEN n)
3071 : {
3072 1019123 : ulong l = (ulong)expi(n) + 1;
3073 1019123 : if (l <= 32) return 1UL<<14;
3074 226620 : if (l <= 512) return (l-16) << 10;
3075 598 : return 1UL<<19; /* Rho is generally faster above this */
3076 : }
3077 :
3078 : /* return a value <= (48 << 10) = 49152 < primelinit */
3079 : ulong
3080 28119496 : tridiv_boundu(ulong n)
3081 : {
3082 : #ifdef LONG_IS_64BIT
3083 24267777 : if (n & HIGHMASK)
3084 792901 : return ((ulong)expu(n) + 1 - 16) << 10;
3085 : #else
3086 : (void)n;
3087 : #endif
3088 27326595 : return 1UL<<14;
3089 : }
3090 :
3091 : /* destroys n */
3092 : static void
3093 1095 : ifac_factoru(GEN n, long hint, GEN P, GEN E, long *pi)
3094 : {
3095 1095 : GEN part = ifac_start_hint(n, 0, hint);
3096 : for(;;)
3097 2112 : {
3098 : long v;
3099 : GEN p;
3100 3207 : if (!ifac_next(&part,&p,&v)) return;
3101 2112 : P[*pi] = itou(p);
3102 2112 : E[*pi] = v;
3103 2112 : (*pi)++;
3104 : }
3105 : }
3106 : /* destroys n */
3107 : static long
3108 1074 : ifac_moebiusu(GEN n)
3109 : {
3110 1074 : GEN part = ifac_start(n, 1);
3111 1074 : long s = 1;
3112 : for(;;)
3113 2148 : {
3114 : long v;
3115 : GEN p;
3116 3222 : if (!ifac_next(&part,&p,&v)) return v? 0: s;
3117 2148 : s = -s;
3118 : }
3119 : }
3120 :
3121 : INLINE ulong
3122 242425157 : u_forprime_next_fast(forprime_t *T)
3123 : {
3124 242425157 : if (*(T->d))
3125 : {
3126 242430740 : NEXT_PRIME_VIADIFF(T->p, T->d);
3127 242430740 : return T->p > T->b ? 0: T->p;
3128 : }
3129 0 : return u_forprime_next(T);
3130 : }
3131 :
3132 : /* uisprime(n) knowing n has no prime divisor
3133 : * < all if all != 0,
3134 : * < tridiv_boundu(n) >= 2^14 if all = 0 */
3135 : static int
3136 52601 : uisprime_nosmall(ulong n, ulong all)
3137 : {
3138 52601 : return (!all || all > 661) ? uisprime_661(n): uisprime(n);
3139 : }
3140 : /* Factor n and output [p,e] where
3141 : * p, e are vecsmall with n = prod{p[i]^e[i]}. If all != 0:
3142 : * if pU1 is not NULL, set *pU1 and *pU2 so that unfactored composite is
3143 : * U1^U2 with U1 not a pure power; else include it in factorization */
3144 : static GEN
3145 31770073 : factoru_sign(ulong n, ulong all, long hint, ulong *pU1, ulong *pU2)
3146 : {
3147 : GEN f, E, E2, P, P2;
3148 : pari_sp av;
3149 : ulong p;
3150 31770073 : long i, oldi = -1;
3151 : forprime_t S;
3152 :
3153 31770073 : if (pU1) *pU1 = *pU2 = 1;
3154 31770073 : if (n == 0) retmkvec2(mkvecsmall(0), mkvecsmall(1));
3155 31770073 : if (n == 1) retmkvec2(cgetg(1,t_VECSMALL), cgetg(1,t_VECSMALL));
3156 :
3157 31382744 : f = cgetg(3,t_VEC); av = avma;
3158 : /* enough room to store <= 15 primes and exponents (OK if n < 2^64) */
3159 31380922 : (void)new_chunk(16*2);
3160 31380211 : P = cgetg(16, t_VECSMALL); i = 1;
3161 31379652 : E = cgetg(16, t_VECSMALL);
3162 31379734 : if (!all || all > 2)
3163 : {
3164 : ulong lim;
3165 31379691 : long v = vals(n);
3166 31379678 : if (v)
3167 : {
3168 11718814 : P[1] = 2; E[1] = v; i = 2;
3169 11718814 : n >>= v; if (n == 1) goto END;
3170 : }
3171 28303725 : lim = all? all-1: tridiv_boundu(n);
3172 28304461 : if (!(hint & 16) && lim >= 128) /* expu(lim) >= 7 */
3173 148104 : { /* fast trial division */
3174 14085969 : GEN PR = prodprimes();
3175 14086072 : long nPR = lg(PR)-1, b = minss(nPR, expu(lim)-6);
3176 14086101 : ulong nr = ugcd(n, umodiu(gel(PR,b), n));
3177 14087013 : if (nr != 1)
3178 : {
3179 14018859 : GEN F = factoru_sign(nr, all, 1 + 2 + 16, NULL, NULL), Q = gel(F,1);
3180 14018091 : long j, l = lg(Q);
3181 37966654 : for (j = 1; j < l; j++)
3182 : {
3183 23947895 : ulong p = uel(Q,j);
3184 23947895 : if (all && p >= all) break; /* may occur for last p */
3185 23947889 : E[i] = u_lvalrem(n, p, &n); /* > 0 */
3186 23948563 : P[i] = p; i++;
3187 : }
3188 14018765 : if (n == 1) goto END;
3189 : }
3190 : }
3191 : else
3192 : {
3193 14218492 : u_forprime_init(&S, 3, lim);
3194 105692561 : while ( (p = u_forprime_next_fast(&S)) )
3195 : {
3196 : int stop;
3197 : /* tiny integers without small factors are often primes */
3198 105692553 : if (p == 673)
3199 : {
3200 14321999 : if (uisprime_661(n)) { P[i] = n; E[i] = 1; i++; goto END; }
3201 100911 : oldi = i;
3202 : }
3203 105692187 : v = u_lvalrem_stop(&n, p, &stop);
3204 105692289 : if (v) {
3205 12653955 : P[i] = p;
3206 12653955 : E[i] = v; i++;
3207 : }
3208 105692289 : if (stop) {
3209 14220722 : if (n != 1) { P[i] = n; E[i] = 1; i++; }
3210 14220722 : goto END;
3211 : }
3212 : }
3213 : }
3214 : }
3215 : /* if i > oldi (includes oldi = -1) we don't know that n is composite */
3216 148541 : if (all)
3217 : { /* smallfact: look for easy pure powers then stop */
3218 : #ifdef LONG_IS_64BIT
3219 82356 : ulong mask = all > 563 ? (all > 7129 ? 1: 3): 7;
3220 : #else
3221 13544 : ulong mask = all > 22 ? (all > 83 ? 1: 3): 7;
3222 : #endif
3223 95900 : long k = 1, ex;
3224 96334 : while (uissquareall(n, &n)) k <<= 1;
3225 95907 : while ( (ex = uis_357_power(n, &n, &mask)) ) k *= ex;
3226 95900 : if (pU1 && (i == oldi || !uisprime(n)))
3227 254 : { *pU1 = n; *pU2 = (ulong)k; }
3228 : else
3229 95646 : { P[i] = n; E[i] = k; i++; }
3230 95900 : goto END;
3231 : }
3232 : /* we don't known that n is composite ? */
3233 52641 : if (oldi != i && uisprime_nosmall(n,all)) { P[i]=n; E[i]=1; i++; goto END; }
3234 :
3235 : {
3236 : GEN perm;
3237 1095 : ifac_factoru(utoipos(n), hint, P, E, &i);
3238 1095 : setlg(P, i);
3239 1095 : perm = vecsmall_indexsort(P);
3240 1095 : P = vecsmallpermute(P, perm);
3241 1095 : E = vecsmallpermute(E, perm);
3242 : }
3243 31384397 : END:
3244 31384397 : set_avma(av);
3245 31383200 : P2 = cgetg(i, t_VECSMALL); gel(f,1) = P2;
3246 31381506 : E2 = cgetg(i, t_VECSMALL); gel(f,2) = E2;
3247 91772634 : while (--i >= 1) { P2[i] = P[i]; E2[i] = E[i]; }
3248 31381013 : return f;
3249 : }
3250 : GEN
3251 3777509 : factoru(ulong n)
3252 3777509 : { return factoru_sign(n, 0, decomp_default_hint, NULL, NULL); }
3253 :
3254 : ulong
3255 0 : radicalu(ulong n)
3256 : {
3257 0 : pari_sp av = avma;
3258 0 : return gc_long(av, zv_prod(gel(factoru(n),1)));
3259 : }
3260 :
3261 : long
3262 54173 : moebiusu_fact(GEN f)
3263 : {
3264 54173 : GEN E = gel(f,2);
3265 54173 : long i, l = lg(E);
3266 93541 : for (i = 1; i < l; i++)
3267 57813 : if (E[i] > 1) return 0;
3268 35728 : return odd(l)? 1: -1;
3269 : }
3270 :
3271 : long
3272 2510404 : moebiusu(ulong n)
3273 : {
3274 : pari_sp av;
3275 : ulong p;
3276 : long s, v, test_prime;
3277 : forprime_t S;
3278 :
3279 2510404 : switch(n)
3280 : {
3281 0 : case 0: (void)check_arith_non0(gen_0,"moebius");/*error*/
3282 568507 : case 1: return 1;
3283 106644 : case 2: return -1;
3284 : }
3285 1853561 : v = vals(n);
3286 1852175 : if (v == 0)
3287 1739131 : s = 1;
3288 : else
3289 : {
3290 113044 : if (v > 1) return 0;
3291 24756 : n >>= 1;
3292 24756 : s = -1;
3293 : }
3294 1763887 : av = avma;
3295 1763887 : u_forprime_init(&S, 3, tridiv_boundu(n));
3296 1774071 : test_prime = 0;
3297 31251731 : while ((p = u_forprime_next_fast(&S)))
3298 : {
3299 : int stop;
3300 : /* tiny integers without small factors are often primes */
3301 31263766 : if (p == 673)
3302 : {
3303 3752 : test_prime = 0;
3304 1781022 : if (uisprime_661(n)) return gc_long(av,-s);
3305 : }
3306 31262318 : v = u_lvalrem_stop(&n, p, &stop);
3307 31253236 : if (v) {
3308 1323961 : if (v > 1) return gc_long(av,0);
3309 1141876 : test_prime = 1;
3310 1141876 : s = -s;
3311 : }
3312 31071151 : if (stop) return gc_long(av, n==1? s: -s);
3313 : }
3314 1270 : set_avma(av);
3315 1524 : if (test_prime && uisprime_661(n)) return -s;
3316 : else
3317 : {
3318 1074 : long t = ifac_moebiusu(utoipos(n));
3319 1074 : set_avma(av);
3320 1074 : if (t == 0) return 0;
3321 1074 : return (s == t)? 1: -1;
3322 : }
3323 : }
3324 :
3325 : long
3326 55687 : moebius(GEN n)
3327 : {
3328 55687 : pari_sp av = avma;
3329 : GEN F;
3330 : ulong p;
3331 : long i, l, s, v;
3332 : forprime_t S;
3333 :
3334 55687 : if ((F = check_arith_non0(n,"moebius")))
3335 : {
3336 : GEN E;
3337 762 : F = clean_Z_factor(F);
3338 728 : E = gel(F,2);
3339 728 : l = lg(E);
3340 1428 : for(i = 1; i < l; i++)
3341 980 : if (!equali1(gel(E,i))) return gc_long(av,0);
3342 448 : return gc_long(av, odd(l)? 1: -1);
3343 : }
3344 55185 : if (lgefint(n) == 3) return moebiusu(uel(n,2));
3345 808 : p = mod4(n); if (!p) return 0;
3346 808 : if (p == 2) { s = -1; n = shifti(n, -1); } else { s = 1; n = icopy(n); }
3347 808 : setabssign(n);
3348 :
3349 808 : u_forprime_init(&S, 3, tridiv_bound(n));
3350 2322026 : while ((p = u_forprime_next_fast(&S)))
3351 : {
3352 : int stop;
3353 2321446 : v = Z_lvalrem_stop(&n, p, &stop);
3354 2321446 : if (v)
3355 : {
3356 1540 : if (v > 1) return gc_long(av,0);
3357 1312 : s = -s;
3358 1312 : if (stop) return gc_long(av, is_pm1(n)? s: -s);
3359 : }
3360 : }
3361 580 : l = lg(primetab);
3362 590 : for (i = 1; i < l; i++)
3363 : {
3364 25 : v = Z_pvalrem(n, gel(primetab,i), &n);
3365 25 : if (v)
3366 : {
3367 25 : if (v > 1) return gc_long(av,0);
3368 11 : s = -s;
3369 11 : if (is_pm1(n)) return gc_long(av,s);
3370 : }
3371 : }
3372 565 : if (ifac_isprime(n)) return gc_long(av,-s);
3373 : /* large composite without small factors */
3374 211 : v = ifac_moebius(n);
3375 211 : return gc_long(av, s < 0? -v: v); /* correct also if v==0 */
3376 : }
3377 :
3378 : long
3379 1708 : ispowerful(GEN n)
3380 : {
3381 1708 : pari_sp av = avma;
3382 : GEN F;
3383 : ulong p, bound;
3384 : long i, l, v;
3385 : forprime_t S;
3386 :
3387 1708 : if ((F = check_arith_all(n, "ispowerful")))
3388 : {
3389 742 : GEN p, P = gel(F,1), E = gel(F,2);
3390 742 : if (lg(P) == 1) return 1; /* 1 */
3391 728 : p = gel(P,1);
3392 728 : if (!signe(p)) return 1; /* 0 */
3393 707 : i = is_pm1(p)? 2: 1; /* skip -1 */
3394 707 : l = lg(E);
3395 980 : for (; i < l; i++)
3396 847 : if (equali1(gel(E,i))) return 0;
3397 133 : return 1;
3398 : }
3399 966 : if (!signe(n)) return 1;
3400 :
3401 952 : if (mod4(n) == 2) return 0;
3402 623 : n = shifti(n, -vali(n));
3403 623 : if (is_pm1(n)) return 1;
3404 546 : setabssign(n);
3405 546 : bound = tridiv_bound(n);
3406 546 : u_forprime_init(&S, 3, bound);
3407 307797 : while ((p = u_forprime_next_fast(&S)))
3408 : {
3409 : int stop;
3410 307790 : v = Z_lvalrem_stop(&n, p, &stop);
3411 307790 : if (v)
3412 : {
3413 742 : if (v == 1) return gc_long(av,0);
3414 203 : if (stop) return gc_long(av, is_pm1(n));
3415 : }
3416 : }
3417 7 : l = lg(primetab);
3418 7 : for (i = 1; i < l; i++)
3419 : {
3420 0 : v = Z_pvalrem(n, gel(primetab,i), &n);
3421 0 : if (v)
3422 : {
3423 0 : if (v == 1) return gc_long(av,0);
3424 0 : if (is_pm1(n)) return gc_long(av,1);
3425 : }
3426 : }
3427 : /* no need to factor: must be p^2 or not powerful */
3428 7 : if (cmpii(powuu(bound+1, 3), n) > 0) return gc_long(av, Z_issquare(n));
3429 :
3430 7 : if (ifac_isprime(n)) return gc_long(av,0);
3431 : /* large composite without small factors */
3432 7 : return gc_long(av, ifac_ispowerful(n));
3433 : }
3434 :
3435 : ulong
3436 59255 : coreu_fact(GEN f)
3437 : {
3438 59255 : GEN P = gel(f,1), E = gel(f,2);
3439 59255 : long i, l = lg(P), m = 1;
3440 106696 : for (i = 1; i < l; i++)
3441 : {
3442 47441 : ulong p = P[i], e = E[i];
3443 47441 : if (e & 1) m *= p;
3444 : }
3445 59255 : return m;
3446 : }
3447 : ulong
3448 59255 : coreu(ulong n)
3449 : {
3450 : pari_sp av;
3451 59255 : if (n == 0) return 0;
3452 59255 : av = avma; return gc_ulong(av, coreu_fact(factoru(n)));
3453 : }
3454 : GEN
3455 706445 : core(GEN n)
3456 : {
3457 706445 : pari_sp av = avma;
3458 : GEN m, F;
3459 : ulong p;
3460 : long i, l, v;
3461 : forprime_t S;
3462 :
3463 706445 : if ((F = check_arith_all(n, "core")))
3464 : {
3465 646877 : GEN p, x, P = gel(F,1), E = gel(F,2);
3466 646877 : long j = 1;
3467 646877 : if (lg(P) == 1) return gen_1;
3468 646849 : p = gel(P,1);
3469 646849 : if (!signe(p)) return gen_0;
3470 646807 : l = lg(P); x = cgetg(l, t_VEC);
3471 2284321 : for (i = 1; i < l; i++)
3472 1637533 : if (mpodd(gel(E,i))) gel(x,j++) = gel(P,i);
3473 646788 : setlg(x, j); return ZV_prod(x);
3474 : }
3475 59578 : switch(lgefint(n))
3476 : {
3477 28 : case 2: return gen_0;
3478 59199 : case 3:
3479 59199 : p = coreu(uel(n,2));
3480 59199 : return signe(n) > 0? utoipos(p): utoineg(p);
3481 : }
3482 :
3483 351 : m = signe(n) < 0? gen_m1: gen_1;
3484 351 : n = absi_shallow(n);
3485 350 : u_forprime_init(&S, 2, tridiv_bound(n));
3486 5016671 : while ((p = u_forprime_next_fast(&S)))
3487 : {
3488 : int stop;
3489 5016342 : v = Z_lvalrem_stop(&n, p, &stop);
3490 5016342 : if (v)
3491 : {
3492 963 : if (v & 1) m = muliu(m, p);
3493 963 : if (stop)
3494 : {
3495 21 : if (!is_pm1(n)) m = mulii(m, n);
3496 21 : return gerepileuptoint(av, m);
3497 : }
3498 : }
3499 : }
3500 329 : l = lg(primetab);
3501 823 : for (i = 1; i < l; i++)
3502 : {
3503 502 : GEN q = gel(primetab,i);
3504 502 : v = Z_pvalrem(n, q, &n);
3505 502 : if (v)
3506 : {
3507 32 : if (v & 1) m = mulii(m, q);
3508 32 : if (is_pm1(n)) return gerepileuptoint(av, m);
3509 : }
3510 : }
3511 321 : if (ifac_isprime(n)) { m = mulii(m, n); return gerepileuptoint(av, m); }
3512 311 : if (m == gen_1) n = icopy(n); /* ifac_core destroys n */
3513 : /* large composite without small factors */
3514 311 : return gerepileuptoint(av, mulii(m, ifac_core(n)));
3515 : }
3516 :
3517 : long
3518 0 : Z_issmooth(GEN m, ulong lim)
3519 : {
3520 0 : pari_sp av = avma;
3521 0 : ulong p = 2;
3522 : forprime_t S;
3523 0 : u_forprime_init(&S, 2, lim);
3524 0 : while ((p = u_forprime_next_fast(&S)))
3525 : {
3526 : int stop;
3527 0 : (void)Z_lvalrem_stop(&m, p, &stop);
3528 0 : if (stop) return gc_long(av, abscmpiu(m,lim) <= 0);
3529 : }
3530 0 : return gc_long(av,0);
3531 : }
3532 :
3533 : GEN
3534 198680 : Z_issmooth_fact(GEN m, ulong lim)
3535 : {
3536 198680 : pari_sp av = avma;
3537 : GEN F, P, E;
3538 : ulong p;
3539 198680 : long i = 1, l = expi(m)+1;
3540 : forprime_t S;
3541 198607 : P = cgetg(l, t_VECSMALL);
3542 198642 : E = cgetg(l, t_VECSMALL);
3543 198621 : F = mkmat2(P,E);
3544 198717 : u_forprime_init(&S, 2, lim);
3545 51018247 : while ((p = u_forprime_next_fast(&S)))
3546 : {
3547 : long v;
3548 : int stop;
3549 50919175 : if ((v = Z_lvalrem_stop(&m, p, &stop)))
3550 : {
3551 753646 : P[i] = p;
3552 753646 : E[i] = v; i++;
3553 753646 : if (stop)
3554 : {
3555 136325 : if (abscmpiu(m,lim) > 0) break;
3556 115651 : if (m[2] > 1) { P[i] = m[2]; E[i] = 1; i++; }
3557 115651 : setlg(P, i);
3558 116017 : setlg(E, i); return gc_const((pari_sp)F, F);
3559 : }
3560 : }
3561 : }
3562 74656 : return gc_NULL(av);
3563 : }
3564 :
3565 : /***********************************************************************/
3566 : /** **/
3567 : /** COMPUTING THE MATRIX OF PRIME DIVISORS AND EXPONENTS **/
3568 : /** **/
3569 : /***********************************************************************/
3570 : static GEN
3571 273215 : aux_end(GEN M, GEN n, long nb)
3572 : {
3573 273215 : GEN P,E, z = (GEN)avma;
3574 : long i;
3575 :
3576 273215 : guncloneNULL(n);
3577 273215 : P = cgetg(nb+1,t_COL);
3578 273215 : E = cgetg(nb+1,t_COL);
3579 1883147 : for (i=nb; i; i--)
3580 : { /* allow a stackdummy in the middle */
3581 1744651 : while (typ(z) != t_INT) z += lg(z);
3582 1609932 : gel(E,i) = z; z += lg(z);
3583 1609932 : gel(P,i) = z; z += lg(z);
3584 : }
3585 273215 : gel(M,1) = P;
3586 273215 : gel(M,2) = E;
3587 273215 : return sort_factor(M, (void*)&abscmpii, cmp_nodata);
3588 : }
3589 :
3590 : static void
3591 1603314 : STORE(long *nb, GEN x, long e) { (*nb)++; (void)x; (void)utoipos(e); }
3592 : static void
3593 1550164 : STOREu(long *nb, ulong x, long e) { STORE(nb, utoipos(x), e); }
3594 : static void
3595 53129 : STOREi(long *nb, GEN x, long e) { STORE(nb, icopy(x), e); }
3596 : /* no prime less than p divides n; return 1 if factored completely */
3597 : static int
3598 26101 : special_primes(GEN n, ulong p, long *nb, GEN T)
3599 : {
3600 26101 : long i, l = lg(T);
3601 26101 : if (l > 1)
3602 : { /* pp = square of biggest p tried so far */
3603 540 : long pp[] = { evaltyp(t_INT)|_evallg(4), 0,0,0 };
3604 540 : pari_sp av = avma; affii(sqru(p), pp); set_avma(av);
3605 :
3606 1184 : for (i = 1; i < l; i++)
3607 777 : if (dvdiiz(n, gel(T,i), n))
3608 : {
3609 329 : long k = 1; while (dvdiiz(n, gel(T,i), n)) k++;
3610 231 : STOREi(nb, gel(T,i), k);
3611 231 : if (abscmpii(pp, n) > 0)
3612 : {
3613 133 : if (!is_pm1(n)) STOREi(nb, n, 1);
3614 133 : return 1;
3615 : }
3616 : }
3617 : }
3618 25968 : return 0;
3619 : }
3620 :
3621 : /* factor(sn*|n|), where sn = -1 or 1.
3622 : * all != 0 : only look for prime divisors < all. If pU is not NULL,
3623 : * set it to unfactored composite */
3624 : static GEN
3625 14247533 : ifactor_sign(GEN n, ulong all, long hint, long sn, GEN *pU)
3626 : {
3627 : GEN M, N;
3628 : pari_sp av;
3629 14247533 : long nb = 0, nb0 = -1, i;
3630 : ulong lim;
3631 : forprime_t T;
3632 :
3633 14247533 : if (lgefint(n) == 3)
3634 : { /* small integer */
3635 13974365 : GEN f, Pf, Ef, P, E, F = cgetg(3, t_MAT);
3636 : ulong U1, U2;
3637 : long l;
3638 13974591 : av = avma;
3639 : /* enough room to store <= 15 primes and exponents (OK if n < 2^64) */
3640 13974591 : (void)new_chunk((15*3 + 15 + 1) * 2);
3641 13974585 : f = factoru_sign(uel(n,2), all, hint, pU? &U1: NULL, pU? &U2: NULL);
3642 13974848 : set_avma(av);
3643 13974846 : Pf = gel(f,1);
3644 13974846 : Ef = gel(f,2);
3645 13974846 : l = lg(Pf);
3646 13974846 : if (sn < 0)
3647 : { /* add sign */
3648 6181 : long L = l+1;
3649 6181 : gel(F,1) = P = cgetg(L, t_COL);
3650 6181 : gel(F,2) = E = cgetg(L, t_COL);
3651 6181 : gel(P,1) = gen_m1; P++;
3652 6181 : gel(E,1) = gen_1; E++;
3653 : }
3654 : else
3655 : {
3656 13968665 : gel(F,1) = P = cgetg(l, t_COL);
3657 13968666 : gel(F,2) = E = cgetg(l, t_COL);
3658 : }
3659 43956745 : for (i = 1; i < l; i++)
3660 : {
3661 29981940 : gel(P,i) = utoipos(Pf[i]);
3662 29981928 : gel(E,i) = utoipos(Ef[i]);
3663 : }
3664 13974805 : if (pU) *pU = U1 == 1? NULL: mkvec2(utoipos(U1), utoipos(U2));
3665 13974798 : return F;
3666 : }
3667 273168 : if (pU) *pU = NULL;
3668 273168 : M = cgetg(3,t_MAT);
3669 273215 : if (sn < 0) STORE(&nb, utoineg(1), 1);
3670 273215 : if (is_pm1(n)) return aux_end(M,NULL,nb);
3671 :
3672 273215 : n = N = gclone(n); setabssign(n);
3673 : /* trial division bound; look for primes <= lim; nb is the number of
3674 : * distinct prime factors so far; if nb0 >= 0, it records the value of nb
3675 : * for which we made a successful compositeness test: if later nb = nb0,
3676 : * we know that n is composite */
3677 273215 : lim = 1;
3678 273215 : if (!all || all > 2)
3679 : { /* trial divide p < all if all != 0, else p <= tridiv_bound() >= 2^14 */
3680 : ulong maxp, p;
3681 : pari_sp av2;
3682 273201 : i = vali(n);
3683 273201 : if (i)
3684 : {
3685 136141 : STOREu(&nb, 2, i);
3686 136141 : av = avma; affii(shifti(n,-i), n); set_avma(av);
3687 : }
3688 273201 : if (is_pm1(n)) return aux_end(M,n,nb);
3689 273066 : lim = all? all-1: tridiv_bound(n);
3690 273066 : maxp = maxprime();
3691 273066 : if (!(hint & 16) && lim >= 128) /* expu(lim) >= 7 */
3692 24705 : { /* fast trial division */
3693 235950 : GEN nr, PR = prodprimes();
3694 235950 : long nPR = lg(PR)-1, b = minss(nPR, expu(lim)-6);
3695 235950 : av = avma; nr = gcdii(gel(PR,b), n);
3696 235950 : if (is_pm1(nr)) { set_avma(av); av2 = av; }
3697 : else
3698 : {
3699 234849 : GEN F = ifactor_sign(nr, all, 1 + 2 + 16, 1, NULL), P = gel(F,1);
3700 234849 : long l = lg(P);
3701 234849 : av2 = avma;
3702 1270652 : for (i = 1; i < l; i++)
3703 : {
3704 1036705 : pari_sp av3 = avma;
3705 1036705 : GEN gp = gel(P,i);
3706 1036705 : ulong p = gp[2];
3707 : long k;
3708 1036705 : if (lgefint(gp)>3 || (all && p>=all)) break; /* may occur for last p */
3709 1035803 : k = Z_lvalrem(n, p, &n); /* > 0 */
3710 1035803 : affii(n, N); n = N; set_avma(av3);
3711 1035803 : STOREu(&nb, p, k);
3712 : }
3713 234849 : if (is_pm1(n))
3714 : {
3715 211245 : stackdummy(av, av2);
3716 211245 : return aux_end(M,n,nb);
3717 : }
3718 : }
3719 : }
3720 : else
3721 : { /* naive trial division */
3722 37116 : av = avma;
3723 37116 : u_forprime_init(&T, 3, minss(lim, maxp)); av2 = avma;
3724 : /* first pass: known to fit in private prime table */
3725 46839304 : while ((p = u_forprime_next_fast(&T)))
3726 : {
3727 46837922 : pari_sp av3 = avma;
3728 : int stop;
3729 46837922 : long k = Z_lvalrem_stop(&n, p, &stop);
3730 46837922 : if (k)
3731 : {
3732 378220 : affii(n, N); n = N; set_avma(av3);
3733 378220 : STOREu(&nb, p, k);
3734 : }
3735 : /* prodeuler(p=2,16381,1-1/p) ~ 0.0578; if probability of being prime
3736 : * knowing P^-(n) > 16381 is at least 10%, try BPSW */
3737 46837922 : if (!stop && p == 16381)
3738 : {
3739 4969 : if (bit_accuracy_mul(lgefint(n), 0.0578 * M_LN2) < 10)
3740 4969 : { nb0 = nb; stop = ifac_isprime(n); }
3741 : }
3742 46837922 : if (stop)
3743 : {
3744 35734 : if (!is_pm1(n)) STOREi(&nb, n, 1);
3745 35734 : stackdummy(av, av2);
3746 35734 : return aux_end(M,n,nb);
3747 : }
3748 : }
3749 : }
3750 26087 : stackdummy(av, av2);
3751 26087 : if (lim > maxp)
3752 : { /* second pass, usually empty: outside private prime table */
3753 4458 : av = avma; u_forprime_init(&T, maxp+1, lim); av2 = avma;
3754 4458 : while ((p = u_forprime_next(&T)))
3755 : {
3756 0 : pari_sp av3 = avma;
3757 : int stop;
3758 0 : long k = Z_lvalrem_stop(&n, p, &stop);
3759 0 : if (k)
3760 : {
3761 0 : affii(n, N); n = N; set_avma(av3);
3762 0 : STOREu(&nb, p, k);
3763 : }
3764 0 : if (stop)
3765 : {
3766 0 : if (!is_pm1(n)) STOREi(&nb, n, 1);
3767 0 : stackdummy(av, av2);
3768 0 : return aux_end(M,n,nb);
3769 : }
3770 : }
3771 4458 : stackdummy(av, av2);
3772 : }
3773 : }
3774 26101 : if (special_primes(n, lim, &nb, primetab)) return aux_end(M,n, nb);
3775 : /* if nb > nb0 (includes nb0 = -1) we don't know that n is composite */
3776 25968 : if (all)
3777 : { /* smallfact: look for easy pure powers then stop. Cf Z_isanypower */
3778 : GEN x;
3779 : long k;
3780 17110 : av = avma;
3781 17110 : k = isanypower_nosmalldiv(n, &x); /* may miss a power if all < 2^14 */
3782 17110 : if (k > 1) { affii(x, n); nb0 = -1; }
3783 17110 : if (pU)
3784 : {
3785 : GEN F;
3786 12498 : if (abscmpiu(n, lim) <= 0
3787 12498 : || cmpii(n, sqru(lim)) <= 0
3788 8019 : || (all >= (1<<14)
3789 7273 : && (nb>nb0 && bit_accuracy(lgefint(n))<2048 && ifac_isprime(n))))
3790 12498 : { set_avma(av); STOREi(&nb, n, k); return aux_end(M,n, nb); }
3791 5307 : set_avma(av); F = aux_end(M, NULL, nb); /* don't destroy n */
3792 5307 : *pU = mkvec2(icopy(n), utoipos(k)); /* composite cofactor */
3793 5307 : gunclone(n); return F;
3794 : }
3795 4612 : set_avma(av); STOREi(&nb, n, k);
3796 4612 : if (DEBUGLEVEL >= 2) {
3797 0 : pari_warn(warner,
3798 0 : "IFAC: untested %ld-bit integer declared prime", expi(n)+1);
3799 0 : if (expi(n) <= 256) err_printf("\t%Ps\n", n);
3800 : }
3801 : }
3802 8858 : else if (nb > nb0 && ifac_isprime(n)) STOREi(&nb, n, 1);
3803 3365 : else nb += ifac_decomp(n, hint);
3804 13470 : return aux_end(M,n, nb);
3805 : }
3806 :
3807 : static GEN
3808 9302059 : ifactor(GEN n, ulong all, long hint)
3809 : {
3810 9302059 : long s = signe(n);
3811 9302059 : if (!s) retmkmat2(mkcol(gen_0), mkcol(gen_1));
3812 9302010 : return ifactor_sign(n, all, hint, s, NULL);
3813 : }
3814 :
3815 : int
3816 7505 : ifac_next(GEN *part, GEN *p, long *e)
3817 : {
3818 7505 : GEN here = ifac_main(part);
3819 7505 : if (here == gen_0) { *p = NULL; *e = 1; return 0; }
3820 7491 : if (!here) { *p = NULL; *e = 0; return 0; }
3821 5041 : *p = VALUE(here);
3822 5041 : *e = EXPON(here)[2];
3823 5041 : ifac_delete(here); return 1;
3824 : }
3825 :
3826 : /* see before ifac_crack for current semantics of 'hint' (factorint's 'flag') */
3827 : GEN
3828 10290 : factorint(GEN n, long flag)
3829 : {
3830 : GEN F;
3831 10290 : if ((F = check_arith_all(n,"factorint"))) return gcopy(F);
3832 10276 : return ifactor(n,0,flag);
3833 : }
3834 :
3835 : GEN
3836 19345 : Z_factor_limit(GEN n, ulong all)
3837 : {
3838 19345 : if (!all) all = GP_DATA->primelimit + 1;
3839 19345 : return ifactor(n, all, decomp_default_hint);
3840 : }
3841 : GEN
3842 888124 : absZ_factor_limit_strict(GEN n, ulong all, GEN *pU)
3843 : {
3844 : GEN F, U;
3845 888124 : if (!signe(n))
3846 : {
3847 0 : if (pU) *pU = NULL;
3848 0 : retmkmat2(mkcol(gen_0), mkcol(gen_1));
3849 : }
3850 888124 : if (!all) all = GP_DATA->primelimit + 1;
3851 888124 : F = ifactor_sign(n, all, decomp_default_hint, 1, &U);
3852 888156 : if (pU) *pU = U;
3853 888156 : return F;
3854 : }
3855 : GEN
3856 276857 : absZ_factor_limit(GEN n, ulong all)
3857 : {
3858 276857 : if (!signe(n)) retmkmat2(mkcol(gen_0), mkcol(gen_1));
3859 276857 : if (!all) all = GP_DATA->primelimit + 1;
3860 276857 : return ifactor_sign(n, all, decomp_default_hint, 1, NULL);
3861 : }
3862 : GEN
3863 9272387 : Z_factor(GEN n)
3864 9272387 : { return ifactor(n,0,decomp_default_hint); }
3865 : GEN
3866 3545390 : absZ_factor(GEN n)
3867 : {
3868 3545390 : if (!signe(n)) retmkmat2(mkcol(gen_0), mkcol(gen_1));
3869 3545376 : return ifactor_sign(n, 0, decomp_default_hint, 1, NULL);
3870 : }
3871 : /* Factor until the unfactored part is smaller than limit. Return the
3872 : * factored part. Hence factorback(output) may be smaller than n */
3873 : GEN
3874 98 : Z_factor_until(GEN n, GEN limit)
3875 : {
3876 98 : pari_sp av = avma;
3877 98 : long s = signe(n), eq;
3878 : GEN q, F, U;
3879 :
3880 98 : if (!s) retmkmat2(mkcol(gen_0), mkcol(gen_1));
3881 98 : F = ifactor_sign(n, tridiv_bound(n), decomp_default_hint, s, &U);
3882 98 : if (!U) return F;
3883 28 : q = gel(U,1); /* composite, q^eq = unfactored part */
3884 28 : eq = itou(gel(U,2));
3885 28 : if (cmpii(eq == 1? q: powiu(q, eq), limit) > 0)
3886 : { /* factor further */
3887 21 : long l2 = expi(q)+1;
3888 : GEN P2, E2, F2, part;
3889 21 : if (eq > 1) limit = sqrtnint(limit, eq);
3890 21 : P2 = coltrunc_init(l2);
3891 21 : E2 = coltrunc_init(l2); F2 = mkmat2(P2,E2);
3892 21 : part = ifac_start(icopy(q), 0); /* ifac_next would destroy q */
3893 : for(;;)
3894 0 : {
3895 : long e;
3896 : GEN p;
3897 21 : if (!ifac_next(&part,&p,&e)) break;
3898 21 : vectrunc_append(P2, p);
3899 21 : vectrunc_append(E2, utoipos(e * eq));
3900 21 : q = diviiexact(q, powiu(p, e));
3901 21 : if (cmpii(q, limit) <= 0) break;
3902 : }
3903 21 : F2 = sort_factor(F2, (void*)&abscmpii, cmp_nodata);
3904 21 : F = merge_factor(F, F2, (void*)&abscmpii, cmp_nodata);
3905 : }
3906 28 : return gerepilecopy(av, F);
3907 : }
3908 :
3909 : static void
3910 109441935 : matsmalltrunc_append(GEN m, ulong p, ulong e)
3911 : {
3912 109441935 : GEN P = gel(m,1), E = gel(m,2);
3913 109441935 : long l = lg(P);
3914 109441935 : P[l] = p; lg_increase(P);
3915 109475300 : E[l] = e; lg_increase(E);
3916 109441281 : }
3917 : static GEN
3918 44059554 : matsmalltrunc_init(long l)
3919 : {
3920 44059554 : GEN P = vecsmalltrunc_init(l);
3921 43882287 : GEN E = vecsmalltrunc_init(l); return mkvec2(P,E);
3922 : }
3923 :
3924 : /* return optimal N s.t. omega(b) <= N for all b <= x */
3925 : long
3926 42394 : maxomegau(ulong x)
3927 : { /* P=primes(15); for(i=1,15, print([i, vecprod(P[1..i])])) */
3928 42394 : if (x < 30030UL)/* rare trivial cases */
3929 : {
3930 28335 : if (x < 2UL) return 0;
3931 10108 : if (x < 6UL) return 1;
3932 4508 : if (x < 30UL) return 2;
3933 3801 : if (x < 210UL) return 3;
3934 3577 : if (x < 2310UL) return 4;
3935 3507 : return 5;
3936 : }
3937 14059 : if (x < 510510UL) return 6; /* most frequent case */
3938 6867 : if (x < 9699690UL) return 7;
3939 7 : if (x < 223092870UL) return 8;
3940 : #ifdef LONG_IS_64BIT
3941 6 : if (x < 6469693230UL) return 9;
3942 0 : if (x < 200560490130UL) return 10;
3943 0 : if (x < 7420738134810UL) return 11;
3944 0 : if (x < 304250263527210UL) return 12;
3945 0 : if (x < 13082761331670030UL) return 13;
3946 0 : if (x < 614889782588491410UL) return 14;
3947 0 : return 15;
3948 : #else
3949 1 : return 9;
3950 : #endif
3951 : }
3952 : /* return optimal N s.t. omega(b) <= N for all odd b <= x */
3953 : long
3954 2969 : maxomegaoddu(ulong x)
3955 : { /* P=primes(15+1); for(i=1,15, print([i, vecprod(P[2..i+1])])) */
3956 2969 : if (x < 255255UL)/* rare trivial cases */
3957 : {
3958 1784 : if (x < 3UL) return 0;
3959 1784 : if (x < 15UL) return 1;
3960 1784 : if (x < 105UL) return 2;
3961 1784 : if (x < 1155UL) return 3;
3962 1756 : if (x < 15015UL) return 4;
3963 1756 : return 5;
3964 : }
3965 1185 : if (x < 4849845UL) return 6; /* most frequent case */
3966 0 : if (x < 111546435UL) return 7;
3967 0 : if (x < 3234846615UL) return 8;
3968 : #ifdef LONG_IS_64BIT
3969 0 : if (x < 100280245065UL) return 9;
3970 0 : if (x < 3710369067405UL) return 10;
3971 0 : if (x < 152125131763605UL) return 11;
3972 0 : if (x < 6541380665835015UL) return 12;
3973 0 : if (x < 307444891294245705UL) return 13;
3974 0 : if (x < 16294579238595022365UL) return 14;
3975 0 : return 15;
3976 : #else
3977 0 : return 9;
3978 : #endif
3979 : }
3980 :
3981 : /* If a <= c <= b , factoru(c) = L[c-a+1] */
3982 : GEN
3983 31929 : vecfactoru_i(ulong a, ulong b)
3984 : {
3985 31929 : ulong k, p, n = b-a+1, N = maxomegau(b) + 1;
3986 31929 : GEN v = const_vecsmall(n, 1);
3987 31929 : GEN L = cgetg(n+1, t_VEC);
3988 : forprime_t T;
3989 21092469 : for (k = 1; k <= n; k++) gel(L,k) = matsmalltrunc_init(N);
3990 31929 : u_forprime_init(&T, 2, usqrt(b));
3991 877736 : while ((p = u_forprime_next(&T)))
3992 : { /* p <= sqrt(b) */
3993 855062 : ulong pk = p, K = ulogint(b, p);
3994 2967132 : for (k = 1; k <= K; k++)
3995 : {
3996 2121326 : ulong j, t = a / pk, ap = t * pk;
3997 2121326 : if (ap < a) { ap += pk; t++; }
3998 : /* t = (j+a-1) \ pk */
3999 2121326 : t %= p;
4000 60606564 : for (j = ap-a+1; j <= n; j += pk)
4001 : {
4002 58494506 : if (t) { v[j] *= pk; matsmalltrunc_append(gel(L,j), p,k); }
4003 58485238 : if (++t == p) t = 0;
4004 : }
4005 2112058 : pk *= p;
4006 : }
4007 : }
4008 : /* complete factorisation of non-sqrt(b)-smooth numbers */
4009 21458391 : for (k = 1, N = a; k <= n; k++, N++)
4010 21435525 : if (uel(v,k) != N) matsmalltrunc_append(gel(L,k), N/uel(v,k),1UL);
4011 27287 : return L;
4012 : }
4013 : GEN
4014 0 : vecfactoru(ulong a, ulong b)
4015 : {
4016 0 : pari_sp av = avma;
4017 0 : return gerepilecopy(av, vecfactoru_i(a,b));
4018 : }
4019 :
4020 : /* Assume a and b odd, return L s.t. L[k] = factoru(a + 2*(k-1))
4021 : * If a <= c <= b odd, factoru(c) = L[(c-a)>>1 + 1] */
4022 : GEN
4023 2969 : vecfactoroddu_i(ulong a, ulong b)
4024 : {
4025 2969 : ulong k, p, n = ((b-a)>>1) + 1, N = maxomegaoddu(b) + 1;
4026 2969 : GEN v = const_vecsmall(n, 1);
4027 2969 : GEN L = cgetg(n+1, t_VEC);
4028 : forprime_t T;
4029 :
4030 23482401 : for (k = 1; k <= n; k++) gel(L,k) = matsmalltrunc_init(N);
4031 2969 : u_forprime_init(&T, 3, usqrt(b));
4032 238495 : while ((p = u_forprime_next(&T)))
4033 : { /* p <= sqrt(b) */
4034 248413 : ulong pk = p, K = ulogint(b, p);
4035 833116 : for (k = 1; k <= K; k++)
4036 : {
4037 597590 : ulong j, t = (a / pk) | 1UL, ap = t * pk;
4038 : /* t and ap are odd, ap multiple of pk = p^k */
4039 597590 : if (ap < a) { ap += pk<<1; t+=2; }
4040 : /* c=t*p^k by steps of 2*p^k; factorization of c*=p^k if (t,p)=1 */
4041 597590 : t %= p;
4042 43207867 : for (j = ((ap-a)>>1)+1; j <= n; j += pk)
4043 : {
4044 42623230 : if (t) { v[j] *= pk; matsmalltrunc_append(gel(L,j), p,k); }
4045 42610277 : t += 2; if (t >= p) t -= p;
4046 : }
4047 584637 : pk *= p;
4048 : }
4049 : }
4050 : /* complete factorisation of non-sqrt(b)-smooth numbers */
4051 23545812 : for (k = 1, N = a; k <= n; k++, N+=2)
4052 23544605 : if (uel(v,k) != N) matsmalltrunc_append(gel(L,k), N/uel(v,k),1UL);
4053 1207 : return L;
4054 : }
4055 : GEN
4056 0 : vecfactoroddu(ulong a, ulong b)
4057 : {
4058 0 : pari_sp av = avma;
4059 0 : return gerepilecopy(av, vecfactoroddu_i(a,b));
4060 : }
4061 :
4062 : /* If 0 <= a <= c <= b; L[c-a+1] = factoru(c)[,1] if c squarefree, else NULL */
4063 : GEN
4064 7014 : vecfactorsquarefreeu(ulong a, ulong b)
4065 : {
4066 7014 : ulong k, p, n = b-a+1, N = maxomegau(b) + 1;
4067 7014 : GEN v = const_vecsmall(n, 1);
4068 7014 : GEN L = cgetg(n+1, t_VEC);
4069 : forprime_t T;
4070 14007238 : for (k = 1; k <= n; k++) gel(L,k) = vecsmalltrunc_init(N);
4071 7014 : u_forprime_init(&T, 2, usqrt(b));
4072 838334 : while ((p = u_forprime_next(&T)))
4073 : { /* p <= sqrt(b), kill nonsquarefree */
4074 831320 : ulong j, pk = p*p, t = a / pk, ap = t * pk;
4075 831320 : if (ap < a) ap += pk;
4076 7160090 : for (j = ap-a+1; j <= n; j += pk) gel(L,j) = NULL;
4077 :
4078 831320 : t = a / p; ap = t * p;
4079 831320 : if (ap < a) { ap += p; t++; }
4080 30551556 : for (j = ap-a+1; j <= n; j += p, t++)
4081 29720236 : if (gel(L,j)) { v[j] *= p; vecsmalltrunc_append(gel(L,j), p); }
4082 : }
4083 : /* complete factorisation of non-sqrt(b)-smooth numbers */
4084 14007238 : for (k = 1, N = a; k <= n; k++, N++)
4085 14000224 : if (gel(L,k) && uel(v,k) != N) vecsmalltrunc_append(gel(L,k), N/uel(v,k));
4086 7014 : return L;
4087 : }
4088 : /* If 0 <= a <= c <= b; L[c-a+1] = factoru(c)[,1] if c squarefree and coprime
4089 : * to all the primes in sorted zv P, else NULL */
4090 : GEN
4091 3374 : vecfactorsquarefreeu_coprime(ulong a, ulong b, GEN P)
4092 : {
4093 3374 : ulong k, p, n = b-a+1, sqb = usqrt(b), N = maxomegau(b) + 1;
4094 3374 : GEN v = const_vecsmall(n, 1);
4095 3374 : GEN L = cgetg(n+1, t_VEC);
4096 : forprime_t T;
4097 30402008 : for (k = 1; k <= n; k++) gel(L,k) = vecsmalltrunc_init(N);
4098 3374 : u_forprime_init(&T, 2, sqb);
4099 94007 : while ((p = u_forprime_next(&T)))
4100 : { /* p <= sqrt(b), kill nonsquarefree */
4101 90633 : ulong j, t, ap, bad = zv_search(P, p), pk = bad ? p: p * p;
4102 90633 : t = a / pk; ap = t * pk; if (ap < a) ap += pk;
4103 21529451 : for (j = ap-a+1; j <= n; j += pk) gel(L,j) = NULL;
4104 90633 : if (bad) continue;
4105 :
4106 86846 : t = a / p; ap = t * p;
4107 86846 : if (ap < a) { ap += p; t++; }
4108 39482007 : for (j = ap-a+1; j <= n; j += p, t++)
4109 39395161 : if (gel(L,j)) { v[j] *= p; vecsmalltrunc_append(gel(L,j), p); }
4110 : }
4111 3374 : if (uel(P,lg(P)-1) <= sqb) P = NULL;
4112 : /* complete factorisation of non-sqrt(b)-smooth numbers */
4113 30402008 : for (k = 1, N = a; k <= n; k++, N++)
4114 30398634 : if (gel(L,k) && uel(v,k) != N)
4115 : {
4116 9750299 : ulong q = N / uel(v,k);
4117 9750299 : if (!P || !zv_search(P, q)) vecsmalltrunc_append(gel(L,k), q);
4118 : }
4119 3374 : return L;
4120 : }
4121 :
4122 : GEN
4123 49 : vecsquarefreeu(ulong a, ulong b)
4124 : {
4125 49 : ulong j, k, p, n = b-a+1;
4126 49 : GEN L = const_vecsmall(n, 1);
4127 : forprime_t T;
4128 49 : u_forprime_init(&T, 2, usqrt(b));
4129 462 : while ((p = u_forprime_next(&T)))
4130 : { /* p <= sqrt(b), kill nonsquarefree */
4131 413 : ulong pk = p*p, t = a / pk, ap = t * pk;
4132 413 : if (ap < a) { ap += pk; t++; }
4133 : /* t = (j+a-1) \ pk */
4134 21777 : for (j = ap-a+1; j <= n; j += pk, t++) L[j] = 0;
4135 : }
4136 48258 : for (k = j = 1; k <= n; k++)
4137 48209 : if (L[k]) L[j++] = a+k-1;
4138 49 : setlg(L,j); return L;
4139 : }
|