Line data Source code
1 : #line 2 "../src/kernel/none/halfgcd.c"
2 : /* Copyright (C) 2019 The PARI group.
3 :
4 : This file is part of the PARI/GP package.
5 :
6 : PARI/GP is free software; you can redistribute it and/or modify it under the
7 : terms of the GNU General Public License as published by the Free Software
8 : Foundation; either version 2 of the License, or (at your option) any later
9 : version. It is distributed in the hope that it will be useful, but WITHOUT
10 : ANY WARRANTY WHATSOEVER.
11 :
12 : Check the License for details. You should have received a copy of it, along
13 : with the package; see the file 'COPYING'. If not, write to the Free Software
14 : Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */
15 :
16 : GEN
17 2338 : ZM2_sqr(GEN A)
18 : {
19 2338 : GEN a = gcoeff(A,1,1), b = gcoeff(A,1,2), a2 = sqri(a);
20 2338 : GEN c = gcoeff(A,2,1), d = gcoeff(A,2,2), d2 = sqri(d), t = addii(a,d);
21 2338 : if (equalii(b, c)) /* symetric, 3S + 1M */
22 : {
23 322 : GEN b2 = sqri(b), M = cgetg(3, t_MAT), tb = mulii(b, t);
24 322 : gel(M,1) = mkcol2(addii(a2, b2), tb);
25 322 : gel(M,2) = mkcol2(tb, addii(b2, d2)); return M;
26 : }
27 : else
28 : { /* general, 2S + 3M */
29 2016 : GEN bc = mulii(b, c);
30 2016 : retmkmat2(mkcol2(addii(bc, a2), mulii(c, t)),
31 : mkcol2(mulii(b, t), addii(bc, d2)));
32 : }
33 : }
34 :
35 : GEN
36 7285559 : ZM2_mul(GEN A, GEN B)
37 : {
38 7285559 : const long t = ZM2_MUL_LIMIT+2;
39 7285559 : GEN A11=gcoeff(A,1,1),A12=gcoeff(A,1,2), B11=gcoeff(B,1,1),B12=gcoeff(B,1,2);
40 7285559 : GEN A21=gcoeff(A,2,1),A22=gcoeff(A,2,2), B21=gcoeff(B,2,1),B22=gcoeff(B,2,2);
41 7285559 : if (lgefint(A11) < t || lgefint(B11) < t || lgefint(A22) < t || lgefint(B22) < t
42 128265 : || lgefint(A12) < t || lgefint(B12) < t || lgefint(A21) < t || lgefint(B21) < t)
43 : { /* 8M */
44 7159963 : GEN a = mulii(A11, B11), b = mulii(A12, B21);
45 7159231 : GEN c = mulii(A11, B12), d = mulii(A12, B22);
46 7159153 : GEN e = mulii(A21, B11), f = mulii(A22, B21);
47 7159265 : GEN g = mulii(A21, B12), h = mulii(A22, B22);
48 7159115 : retmkmat2(mkcol2(addii(a,b), addii(e,f)), mkcol2(addii(c,d), addii(g,h)));
49 : } else
50 : { /* Strassen: 7M */
51 125596 : GEN M1 = mulii(addii(A11,A22), addii(B11,B22));
52 125596 : GEN M2 = mulii(addii(A21,A22), B11);
53 125596 : GEN M3 = mulii(A11, subii(B12,B22));
54 125596 : GEN M4 = mulii(A22, subii(B21,B11));
55 125596 : GEN M5 = mulii(addii(A11,A12), B22);
56 125596 : GEN M6 = mulii(subii(A21,A11), addii(B11,B12));
57 125596 : GEN M7 = mulii(subii(A12,A22), addii(B21,B22));
58 125596 : GEN T1 = addii(M1,M4), T2 = subii(M7,M5);
59 125596 : GEN T3 = subii(M1,M2), T4 = addii(M3,M6);
60 125596 : retmkmat2(mkcol2(addii(T1,T2), addii(M2,M4)),
61 : mkcol2(addii(M3,M5), addii(T3,T4)));
62 : }
63 : }
64 :
65 : static GEN
66 711 : matid2(void)
67 : {
68 711 : retmkmat2(mkcol2(gen_1,gen_0),
69 : mkcol2(gen_0,gen_1));
70 : }
71 :
72 : /* Return M*[q,1;1,0] */
73 : static GEN
74 3063872 : mulq(GEN M, GEN q)
75 : {
76 3063872 : GEN u, v, res = cgetg(3, t_MAT);
77 3063859 : u = addii(mulii(gcoeff(M,1,1), q), gcoeff(M,1,2));
78 3063880 : v = addii(mulii(gcoeff(M,2,1), q), gcoeff(M,2,2));
79 3063858 : gel(res,1) = mkcol2(u, v);
80 3063853 : gel(res,2) = gel(M,1);
81 3063853 : return res;
82 : }
83 : static GEN
84 71 : mulqab(GEN M, GEN q, GEN *ap, GEN *bp)
85 : {
86 71 : GEN b = subii(*ap, mulii(*bp, q));
87 71 : *ap = *bp; *bp = b;
88 71 : return mulq(M,q);
89 : }
90 :
91 : /* Return M*[q,1;1,0]^-1 */
92 :
93 : static GEN
94 1284 : mulqi(GEN M, GEN q, GEN *ap, GEN *bp)
95 : {
96 : GEN u, v, res, a;
97 1284 : a = addii(mulii(*ap, q), *bp);
98 1284 : *bp = *ap; *ap = a;
99 1284 : res = cgetg(3, t_MAT);
100 1284 : u = subii(gcoeff(M,1,1),mulii(gcoeff(M,1,2), q));
101 1284 : v = subii(gcoeff(M,2,1),mulii(gcoeff(M,2,2), q));
102 1284 : gel(res,1) = gel(M,2);
103 1284 : gel(res,2) = mkcol2(u,v);
104 1284 : return res;
105 : }
106 :
107 : /* test whether n is a power of 2 */
108 : static long
109 23150430 : isint2n(GEN n)
110 : {
111 : GEN x;
112 23150430 : long lx = lgefint(n), i;
113 23150430 : if (lx == 2) return 0;
114 23150430 : x = int_MSW(n);
115 23150430 : if (*(ulong*)x != 1UL<<expu(*(ulong*)x) ) return 0;
116 545975 : for (i = 3; i < lx; i++)
117 : {
118 538590 : x = int_precW(x); if (*x) return 0;
119 : }
120 7385 : return 1;
121 : }
122 :
123 : static long
124 23150577 : uexpi(GEN a)
125 23150577 : { return expi(a)+!isint2n(a); }
126 :
127 : static GEN
128 123177 : FIXUP0(GEN M, GEN *a, GEN *b, long m)
129 : {
130 123177 : long cnt=0;
131 156259 : while (expi(*b) >= m)
132 : {
133 33082 : GEN r, q = dvmdii(*a, *b, &r);
134 33082 : *a = *b; *b = r;
135 33082 : M = mulq(M, q);
136 33082 : cnt++;
137 : };
138 123177 : if (cnt>6) pari_err_BUG("FIXUP0");
139 123177 : return M;
140 : }
141 :
142 : static long
143 6496300 : signdet(GEN Q)
144 : {
145 6496300 : long a = Mod4(gcoeff(Q,1,1)), b = Mod4(gcoeff(Q,1,2));
146 6496271 : long c = Mod4(gcoeff(Q,2,1)), d = Mod4(gcoeff(Q,2,2));
147 6497490 : return ((a*d-b*c)&3)==1 ? 1 : -1;
148 : }
149 :
150 : static GEN
151 6313495 : ZM_inv2(GEN M)
152 : {
153 6313495 : long e = signdet(M);
154 9997510 : if (e==1) return mkmat22(gcoeff(M,2,2),negi(gcoeff(M,1,2)),
155 3683211 : negi(gcoeff(M,2,1)),gcoeff(M,1,1));
156 2631155 : else return mkmat22(negi(gcoeff(M,2,2)),gcoeff(M,1,2),
157 2631230 : gcoeff(M,2,1),negi(gcoeff(M,1,1)));
158 : }
159 :
160 : static GEN
161 1284 : lastq(GEN Q)
162 : {
163 1284 : GEN p = gcoeff(Q,1,1), q = gcoeff(Q,1,2), s = gcoeff(Q,2,2);
164 1284 : if (signe(q)==0) pari_err_BUG("halfgcd");
165 1284 : if (signe(s)==0) return p;
166 1284 : if (equali1(q)) return subiu(p,1);
167 1284 : return divii(p, q);
168 : }
169 :
170 : static GEN
171 7802 : mulT(GEN Q, GEN *ap, GEN *bp)
172 : {
173 7802 : *ap = addii(*ap, *bp);
174 7802 : *bp = negi(*bp);
175 7802 : return mkmat2(gel(Q,1),
176 7802 : mkcol2(subii(gcoeff(Q,1,1), gcoeff(Q,1,2))
177 7802 : , subii(gcoeff(Q,2,1), gcoeff(Q,2,2))));
178 : }
179 :
180 : static GEN
181 182910 : FIXUP1(GEN M, GEN a, GEN b, long m, long t, GEN *ap, GEN *bp)
182 : {
183 182910 : GEN Q = gel(M,1), a0 = gel(M,2), b0 = gel(M,3);
184 182910 : GEN q, am = remi2n(a, m), bm = remi2n(b, m);
185 183053 : if (signdet(Q)==-1)
186 : {
187 121165 : *ap = subii(mulii(bm, gcoeff(Q,1,2)),mulii(am, gcoeff(Q,2,2)));
188 121129 : *bp = subii(mulii(am, gcoeff(Q,2,1)),mulii(bm, gcoeff(Q,1,1)));
189 121305 : *ap = addii(*ap, shifti(addii(a0, gcoeff(Q,2,2)), m));
190 121265 : *bp = addii(*bp, shifti(subii(b0, gcoeff(Q,2,1)), m));
191 120795 : if (signe(*bp) >= 0)
192 112910 : return Q;
193 7885 : if (expi(addii(*ap,*bp)) >= m+t)
194 7802 : return mulT(Q, ap ,bp);
195 83 : q = lastq(Q);
196 83 : Q = mulqi(Q, q, ap, bp);
197 83 : if (cmpiu(q, 2)>=0)
198 71 : return mulqab(Q, subiu(q,1), ap, bp);
199 : else
200 12 : return mulqi(Q, lastq(Q), ap, bp);
201 : }
202 : else
203 : {
204 61771 : *ap = subii(mulii(am, gcoeff(Q,2,2)),mulii(bm, gcoeff(Q,1,2)));
205 61771 : *bp = subii(mulii(bm, gcoeff(Q,1,1)),mulii(am, gcoeff(Q,2,1)));
206 61771 : *ap = addii(*ap, shifti(subii(a0, gcoeff(Q,2,2)), m));
207 61771 : *bp = addii(*bp, shifti(addii(b0, gcoeff(Q,2,1)), m));
208 61771 : if (expi(*ap) >= m+t)
209 60582 : return FIXUP0(Q, ap, bp, m+t);
210 : else
211 1189 : return signe(gcoeff(Q,1,2))==0? Q: mulqi(Q, lastq(Q), ap, bp);
212 : }
213 : }
214 :
215 : static long
216 23088054 : magic_threshold(GEN a)
217 23088054 : { return (3+uexpi(a))>>1; }
218 :
219 : static GEN
220 15044169 : HGCD_basecase(GEN y, GEN x)
221 : {
222 15044169 : pari_sp av = avma;
223 : GEN d, d1, q, r;
224 : GEN u, u1, v, v1;
225 : ulong xu, xu1, xv, xv1; /* Lehmer stage recurrence matrix */
226 : int lhmres; /* Lehmer stage return value */
227 :
228 15044169 : long m = magic_threshold(y);
229 :
230 : /* There is no special case for single-word numbers since this is
231 : * mainly meant to be used with large moduli. */
232 15044246 : if (cmpii(y,x) <= 0)
233 : {
234 97737 : d = x; d1 = y;
235 97737 : u = gen_1; u1 = gen_0;
236 97737 : v = gen_0; v1 = gen_1;
237 : } else
238 : {
239 14946479 : d = y; d1 = x;
240 14946479 : u = gen_0; u1 = gen_1;
241 14946479 : v = gen_1; v1 = gen_0;
242 : }
243 34533142 : while (lgefint(d) > 3 && expi(d1) >= m + BITS_IN_LONG + 1)
244 : {
245 : /* do a Lehmer-Jebelean round */
246 19832310 : lhmres = lgcdii((ulong *)d, (ulong *)d1, &xu, &xu1, &xv, &xv1, 0);
247 :
248 19839536 : if (lhmres)
249 : {
250 18808374 : if (lhmres == 1 || lhmres == -1)
251 : {
252 469823 : if (xv1 == 1)
253 : {
254 403509 : r = subii(d,d1); d = d1; d1 = r;
255 403486 : r = addii(u,u1); u = u1; u1 = r;
256 403388 : r = addii(v,v1); v = v1; v1 = r;
257 : }
258 : else
259 : {
260 66314 : r = subii(d, mului(xv1,d1)); d = d1; d1 = r;
261 66314 : r = addii(u, mului(xv1,u1)); u = u1; u1 = r;
262 66314 : r = addii(v, mului(xv1,v1)); v = v1; v1 = r;
263 : }
264 : }
265 : else
266 : {
267 18338551 : r = subii(muliu(d,xu), muliu(d1,xv));
268 18333804 : d1 = subii(muliu(d,xu1), muliu(d1,xv1)); d = r;
269 18333680 : r = addii(muliu(u,xu), muliu(u1,xv));
270 18331964 : u1 = addii(muliu(u,xu1), muliu(u1,xv1)); u = r;
271 18331827 : r = addii(muliu(v,xu), muliu(v1,xv));
272 18331705 : v1 = addii(muliu(v,xu1), muliu(v1,xv1)); v = r;
273 18334983 : if (lhmres&1) togglesign(d); else togglesign(d1);
274 : }
275 : } /* lhmres != 0 */
276 19835669 : if (expi(d1) < m) break;
277 :
278 19489364 : if (lhmres <= 0 && signe(d1))
279 : {
280 1085664 : q = dvmdii(d,d1,&r);
281 1085677 : d = d1; d1 = r;
282 1085677 : r = addii(u, mulii(q,u1)); u = u1; u1 = r;
283 1085284 : r = addii(v, mulii(q,v1)); v = v1; v1 = r;
284 : }
285 19488980 : if (gc_needed(av,1))
286 : {
287 0 : if(DEBUGMEM>1) pari_warn(warnmem,"ratlift");
288 0 : gerepileall(av, 6, &d, &d1, &u, &u1, &v, &v1);
289 : }
290 : }
291 84636328 : while (expi(d1) >= m)
292 : {
293 69592091 : GEN r, q = dvmdii(d,d1, &r);
294 69601506 : d = d1; d1 = r; swap(u,u1); swap(v,v1);
295 69601506 : u1 = addii(mulii(u, q), u1);
296 69593188 : v1 = addii(mulii(v, q), v1);
297 : }
298 15036480 : return gerepilecopy(av, mkvec3(mkmat22(u1,u,v1,v), d, d1));
299 : }
300 :
301 : static GEN HGCD(GEN x, GEN y);
302 :
303 : /*
304 : Based on
305 : Klaus Thull and Chee K. Yap,
306 : A unified approach to HGCD algorithms for polynomials andintegers,
307 : 1990, Manuscript.
308 : URL: http://cs.nyu.edu/cs/faculty/yap/papers.
309 : */
310 :
311 : static GEN
312 120682 : HGCD_split(GEN a, GEN b)
313 : {
314 120682 : pari_sp av = avma;
315 120682 : long m = magic_threshold(a), t, l, k, tp;
316 : GEN a0, b0, ap, bp, c, d, c0, d0, cp, dp, R, S, T, q, r;
317 120589 : if (signe(b) < 0 || cmpii(a,b)<0) pari_err_BUG("HGCD_split");
318 120617 : if (expi(b) < m)
319 450 : return gerepilecopy(av, mkvec3(matid2(), a, b));
320 120114 : a0 = addiu(shifti(a, -m), 1);
321 120207 : if (cmpiu(a0,7) <= 0)
322 : {
323 0 : R = FIXUP0(matid2(), &a, &b, m);
324 0 : return gerepilecopy(av, mkvec3(R, a, b));
325 : }
326 120223 : b0 = shifti(b,-m);
327 120448 : t = magic_threshold(a0);
328 120170 : R = FIXUP1(HGCD(a0,b0),a, b, m, t, &ap, &bp);
329 119971 : if (expi(bp) < m)
330 57278 : return gerepilecopy(av, mkvec3(R, ap, bp));
331 62595 : q = dvmdii(ap, bp, &r);
332 62595 : c = bp; d = r;
333 62595 : if (cmpiu(shifti(c,-m),6) <= 0)
334 : {
335 21 : R = FIXUP0(mulq(R, q), &c, &d, m);
336 21 : return gerepilecopy(av, mkvec3(R, c, d));
337 : }
338 62573 : l = uexpi(c);
339 62573 : k = 2*m-l-1; if (k<0) pari_err_BUG("halfgcd");
340 62573 : c0 = addiu(shifti(c, -k), 1); if (cmpiu(c0,8)<0) pari_err_BUG("halfgcd");
341 62574 : d0 = shifti(d, -k);
342 62574 : tp = magic_threshold(c0);
343 62574 : S = FIXUP1(HGCD(c0,d0), c, d, k, tp, &cp, &dp);
344 62574 : if (!(expi(cp)>=m+1 && m+1 > expi(dp))) pari_err_BUG("halfgcd");
345 62574 : T = FIXUP0(ZM2_mul(mulq(R, q), S), &cp, &dp, m);
346 62574 : return gerepilecopy(av, mkvec3(T, cp, dp));
347 : }
348 :
349 : static GEN
350 15164596 : HGCD(GEN x, GEN y)
351 : {
352 15164596 : if (lgefint(y)-2 < HALFGCD_LIMIT)
353 15044126 : return HGCD_basecase(x, y);
354 : else
355 120470 : return HGCD_split(x, y);
356 : }
357 :
358 : static GEN
359 29421552 : HGCD0(GEN x, GEN y)
360 : {
361 29421552 : if (signe(y) >= 0 && cmpii(x, y) >= 0)
362 14978145 : return HGCD(x, y);
363 14443394 : if (cmpii(x, y) < 0)
364 : {
365 12049503 : GEN M = HGCD0(y, x), Q = gel(M,1);
366 12048185 : return mkvec3(mkmat22(gcoeff(Q,2,1),gcoeff(Q,2,2),gcoeff(Q,1,1),gcoeff(Q,1,2)),
367 12049885 : gel(M,2),gel(M,3));
368 : } /* Now y <= x*/
369 2394014 : if (signe(x) <= 0)
370 : { /* y <= x <=0 */
371 3856 : GEN M = HGCD(negi(y), negi(x)), Q = gel(M,1);
372 3856 : return mkvec3(mkmat22(negi(gcoeff(Q,2,1)),negi(gcoeff(Q,2,2)),
373 3856 : negi(gcoeff(Q,1,1)),negi(gcoeff(Q,1,2))),
374 3856 : gel(M,2),gel(M,3));
375 : }
376 : else /* y <= 0 <=x */
377 : {
378 2390158 : GEN M = HGCD0(x, negi(y)), Q = gel(M,1);
379 2390158 : return mkvec3(mkmat22(gcoeff(Q,1,1),gcoeff(Q,1,2),negi(gcoeff(Q,2,1)),negi(gcoeff(Q,2,2))),
380 2390158 : gel(M,2),gel(M,3));
381 : }
382 : }
383 :
384 : GEN
385 6313961 : halfgcdii(GEN A, GEN B)
386 : {
387 6313961 : pari_sp av = avma;
388 6313961 : GEN M, Q, a, b, m = abscmpii(A, B)>0 ? A: B;
389 6313955 : M = HGCD0(A,B); Q = gel(M,1); a = gel(M,2); b = gel(M,3);
390 9280636 : while (signe(b) && abscmpii(sqri(b), m) >= 0)
391 : {
392 2967988 : GEN r, q = dvmdii(a, b, &r);
393 2967956 : a = b; b = r;
394 2967956 : Q = mulq(Q, q);
395 : }
396 6313000 : return gerepilecopy(av, mkvec2(ZM_inv2(Q),mkcol2(a,b)));
397 : }
|