Line data Source code
1 : /* Copyright (C) 2008 The PARI group.
2 :
3 : This file is part of the PARI/GP package.
4 :
5 : PARI/GP is free software; you can redistribute it and/or modify it under the
6 : terms of the GNU General Public License as published by the Free Software
7 : Foundation; either version 2 of the License, or (at your option) any later
8 : version. It is distributed in the hope that it will be useful, but WITHOUT
9 : ANY WARRANTY WHATSOEVER.
10 :
11 : Check the License for details. You should have received a copy of it, along
12 : with the package; see the file 'COPYING'. If not, write to the Free Software
13 : Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */
14 :
15 : #include "pari.h"
16 : #include "paripriv.h"
17 :
18 : #define DEBUGLEVEL DEBUGLEVEL_qflll
19 :
20 : static int
21 45828 : RgM_is_square_mat(GEN x) { long l = lg(x); return l == 1 || l == lgcols(x); }
22 :
23 : static long
24 4178700 : ZM_is_upper(GEN R)
25 : {
26 4178700 : long i,j, l = lg(R);
27 4178700 : if (l != lgcols(R)) return 0;
28 8072770 : for(i = 1; i < l; i++)
29 8713047 : for(j = 1; j < i; j++)
30 4478179 : if (signe(gcoeff(R,i,j))) return 0;
31 252635 : return 1;
32 : }
33 :
34 : static long
35 606176 : ZM_is_knapsack(GEN R)
36 : {
37 606176 : long i,j, l = lg(R);
38 606176 : if (l != lgcols(R)) return 0;
39 843349 : for(i = 2; i < l; i++)
40 2899018 : for(j = 1; j < l; j++)
41 2661845 : if ( i!=j && signe(gcoeff(R,i,j))) return 0;
42 92442 : return 1;
43 : }
44 :
45 : static long
46 1182560 : ZM_is_lower(GEN R)
47 : {
48 1182560 : long i,j, l = lg(R);
49 1182560 : if (l != lgcols(R)) return 0;
50 2058994 : for(i = 1; i < l; i++)
51 2384047 : for(j = 1; j < i; j++)
52 1291951 : if (signe(gcoeff(R,j,i))) return 0;
53 34903 : return 1;
54 : }
55 :
56 : static GEN
57 34904 : RgM_flip(GEN R)
58 : {
59 : GEN M;
60 : long i,j,l;
61 34904 : M = cgetg_copy(R, &l);
62 181322 : for(i = 1; i < l; i++)
63 : {
64 146418 : gel(M,i) = cgetg(l, t_COL);
65 910402 : for(j = 1; j < l; j++)
66 763984 : gmael(M,i,j) = gmael(R,l-i, l-j);
67 : }
68 34904 : return M;
69 : }
70 :
71 : static GEN
72 0 : RgM_flop(GEN R)
73 : {
74 : GEN M;
75 : long i,j,l;
76 0 : M = cgetg_copy(R, &l);
77 0 : for(i = 1; i < l; i++)
78 : {
79 0 : gel(M,i) = cgetg(l, t_COL);
80 0 : for(j = 1; j < l; j++)
81 0 : gmael(M,i,j) = gmael(R,i, l-j);
82 : }
83 0 : return M;
84 : }
85 :
86 : /* Assume x and y has same type! */
87 : INLINE int
88 4080033 : mpabscmp(GEN x, GEN y)
89 : {
90 4080033 : return (typ(x)==t_INT) ? abscmpii(x,y) : abscmprr(x,y);
91 : }
92 :
93 : /****************************************************************************/
94 : /*** FLATTER ***/
95 : /****************************************************************************/
96 : /* Implementation of "FLATTER" algorithm based on
97 : * <https://eprint.iacr.org/2023/237>
98 : * Fast Practical Lattice Reduction through Iterated Compression
99 : *
100 : * Keegan Ryan, University of California, San Diego
101 : * Nadia Heninger, University of California, San Diego. BA20230925 */
102 : static long
103 1338551 : drop(GEN R)
104 : {
105 1338551 : long i, n = lg(R)-1;
106 1338551 : long s = 0, m = mpexpo(gcoeff(R, 1, 1));
107 5418584 : for (i = 2; i <= n; ++i)
108 : {
109 4080033 : if (mpabscmp(gcoeff(R, i, i), gcoeff(R, i - 1, i - 1)) >= 0)
110 : {
111 2761303 : s += m - mpexpo(gcoeff(R, i - 1, i - 1));
112 2761303 : m = mpexpo(gcoeff(R, i, i));
113 : }
114 : }
115 1338551 : s += m - mpexpo(gcoeff(R, n, n));
116 1338551 : return s;
117 : }
118 :
119 : static long
120 1335584 : potential(GEN R)
121 : {
122 1335584 : long i, n = lg(R)-1;
123 1335584 : long s = 0, mul = n-1;;
124 6734142 : for (i = 1; i <= n; i++, mul-=2) s += mul * mpexpo(gcoeff(R,i,i));
125 1335584 : return s;
126 : }
127 :
128 : /* U upper-triangular invertible:
129 : * Bound on the exponent of the condition number of U.
130 : * Algo 8.13 in Higham, Accuracy and stability of numercal algorithms. */
131 : static long
132 4689343 : condition_bound(GEN U, int lower)
133 : {
134 4689343 : long n = lg(U)-1, e, i, j;
135 : GEN y;
136 4689343 : pari_sp av = avma;
137 4689343 : y = cgetg(n+1, t_VECSMALL);
138 4689342 : e = y[n] = -gexpo(gcoeff(U,n,n));
139 18638233 : for (i=n-1; i>0; i--)
140 : {
141 13948891 : long s = 0;
142 49574993 : for (j=i+1; j<=n; j++)
143 35626103 : s = maxss(s, (lower? gexpo(gcoeff(U,j,i)): gexpo(gcoeff(U,i,j))) + y[j]);
144 13948890 : y[i] = s - gexpo(gcoeff(U,i,i));
145 13948892 : e = maxss(e, y[i]);
146 : }
147 4689342 : return gc_long(av, gexpo(U) + e);
148 : }
149 :
150 : static long
151 5149454 : gsisinv(GEN M)
152 : {
153 5149454 : long i, l = lg(M);
154 25904283 : for (i = 1; i < l; ++i)
155 20755237 : if (! signe(gmael(M, i, i))) return 0;
156 5149046 : return 1;
157 : }
158 :
159 : INLINE long
160 7414186 : nbits2prec64(long n)
161 : {
162 7414186 : return nbits2prec(((n+63)>>6)<<6);
163 : }
164 :
165 : static long
166 5806200 : spread(GEN R)
167 : {
168 5806200 : long i, n = lg(R)-1, m = mpexpo(gcoeff(R, 1, 1)), M = m;
169 23359476 : for (i = 2; i <= n; ++i)
170 : {
171 17553277 : long e = mpexpo(gcoeff(R, i, i));
172 17553273 : if (e < m) m = e;
173 17553273 : if (e > M) M = e;
174 : }
175 5806199 : return M - m;
176 : }
177 :
178 : static long
179 4689343 : GS_extraprec(GEN L, int lower)
180 : {
181 4689343 : long C = condition_bound(L, lower), S = spread(L), n = lg(L)-1;
182 4689344 : return maxss(2*S+2*n, C-S-2*n); /* = 2*S + 2*n + maxss(0, C-3*S-4*n) */
183 : }
184 :
185 : static GEN
186 2967 : RgM_Cholesky_dynprec(GEN M)
187 : {
188 2967 : pari_sp ltop = avma;
189 : GEN L;
190 2967 : long minprec = lg(M) + 30, bitprec = minprec, prec;
191 : while (1)
192 4877 : {
193 : long mbitprec;
194 7844 : prec = nbits2prec64(bitprec);
195 7844 : L = RgM_Cholesky(RgM_gtofp(M, prec), prec); /* upper-triangular */
196 7844 : if (!L)
197 : {
198 1458 : bitprec *= 2;
199 1458 : set_avma(ltop);
200 1458 : continue;
201 : }
202 6386 : mbitprec = minprec + GS_extraprec(L, 0);
203 6386 : if (bitprec >= mbitprec)
204 2967 : break;
205 3419 : bitprec = maxss((4*bitprec)/3, mbitprec);
206 3419 : set_avma(ltop);
207 : }
208 2967 : return gerepilecopy(ltop, L);
209 : }
210 :
211 : static GEN
212 1312 : gramschmidt_upper(GEN M)
213 : {
214 1312 : long bitprec = lg(M)-1 + 31 + GS_extraprec(M, 0);
215 1312 : return RgM_gtofp(M, nbits2prec64(bitprec));
216 : }
217 :
218 : static GEN
219 2671164 : gramschmidt_dynprec(GEN M)
220 : {
221 2671164 : pari_sp ltop = avma;
222 2671164 : long minprec = lg(M) + 30, bitprec = minprec;
223 2671164 : if (ZM_is_upper(M)) return gramschmidt_upper(M);
224 : while (1)
225 3608340 : {
226 : GEN B, Q, L;
227 6278192 : long prec = nbits2prec64(bitprec), mbitprec;
228 6278186 : if (!QR_init(RgM_gtofp(M, prec), &B, &Q, &L, prec) || !gsisinv(L))
229 : {
230 1596538 : bitprec *= 2;
231 1596538 : set_avma(ltop);
232 1596547 : continue;
233 : }
234 4681645 : mbitprec = minprec + GS_extraprec(L, 1);
235 4681646 : if (bitprec >= mbitprec)
236 2669853 : return gerepilecopy(ltop, shallowtrans(L));
237 2011793 : bitprec = maxss((4*bitprec)/3, mbitprec);
238 2011793 : set_avma(ltop);
239 : }
240 : }
241 : /* return -T1 * round(T1^-1*(R1^-1*R2)*T3) */
242 : static GEN
243 1335584 : sizered(GEN T1, GEN T3, GEN R1, GEN R2)
244 : {
245 1335584 : pari_sp ltop = avma;
246 : long e;
247 1335584 : return gerepileupto(ltop, ZM_mul(ZM_neg(T1), grndtoi(gmul(ZM_inv(T1,NULL),
248 : RgM_mul(RgM_mul(RgM_inv_upper(R1), R2), T3)), &e)));
249 : }
250 :
251 : static GEN
252 1335584 : flat(GEN M, long flag, GEN *pt_T, long *pt_s, long *pt_pot)
253 : {
254 1335584 : pari_sp ltop = avma;
255 : GEN R, R1, R2, R3, T1, T2, T3, T, S;
256 1335584 : long k = lg(M)-1, n = k>>1, n2 = k - n, m = n>>1;
257 1335584 : long keepfirst = flag & LLL_KEEP_FIRST, inplace = flag & LLL_INPLACE;
258 : /* for k = 3, we want n = 1; n2 = 2; m = 0 */
259 : /* for k = 5, n = 2; n2 = 3; m = 1 */
260 1335584 : R = gramschmidt_dynprec(M);
261 1335584 : R1 = matslice(R, 1, n, 1, n);
262 1335584 : R2 = matslice(R, 1, n, n + 1, k);
263 1335584 : R3 = matslice(R, n + 1, k, n + 1, k);
264 1335584 : T1 = lllfp(R1, 0.99, LLL_IM| LLL_UPPER| LLL_NOCERTIFY| (keepfirst ? LLL_KEEP_FIRST: 0));
265 1335584 : T3 = lllfp(R3, 0.99, LLL_IM| LLL_UPPER| LLL_NOCERTIFY);
266 1335584 : T2 = sizered(T1, T3, R1, R2);
267 1335582 : T = shallowmatconcat(mkmat22(T1,T2,gen_0,T3));
268 1335584 : M = ZM_mul(M, T);
269 1335582 : R = gramschmidt_dynprec(M);
270 1335584 : R3 = matslice(R, m + 1, m + n2, m + 1, m + n2);
271 1335584 : T3 = lllfp(R3, 0.99, LLL_IM| LLL_UPPER| LLL_NOCERTIFY);
272 2671168 : S = shallowmatconcat(diagonal(
273 576087 : m == 0 ? mkvec2(T3, matid(k - m - n2))
274 0 : : m+n2 == k ? mkvec2(matid(m), T3)
275 759497 : : mkvec3(matid(m), T3, matid(k - m - n2))));
276 1335584 : M = ZM_mul(M, S);
277 1335582 : if (!inplace) *pt_T = ZM_mul(T, S);
278 1335584 : *pt_s = drop(R);
279 1335584 : *pt_pot = potential(R);
280 1335584 : return gc_all(ltop, inplace ? 1: 2, &M, pt_T);
281 : }
282 :
283 : static GEN
284 618256 : ZM_flatter(GEN M, long flag)
285 : {
286 618256 : pari_sp ltop = avma, btop;
287 618256 : long i, n = lg(M)-1;
288 618256 : long s = -1, pot = LONG_MAX;
289 : GEN T;
290 : pari_timer ti;
291 618256 : long lti = 1;
292 618256 : long inplace = flag & LLL_INPLACE, cert = !(flag & LLL_NOCERTIFY);
293 618256 : T = matid(n);
294 618258 : if (DEBUGLEVEL>=3)
295 : {
296 0 : timer_start(&ti);
297 0 : if (cert)
298 0 : err_printf("flatter dim = %ld size = %ld\n", n, ZM_max_expi(M));
299 : }
300 618258 : btop = avma;
301 618258 : for (i = 1;;i++)
302 717326 : {
303 : long t, pot2;
304 1335584 : GEN U, M2 = flat(M, flag, &U, &t, &pot2);
305 1335584 : if (t==0) { s = t; break; }
306 760526 : if (s >= 0)
307 : {
308 435516 : if (s==t && pot>=pot2)
309 43200 : break;
310 392316 : if (s<t && i > 20)
311 : {
312 0 : if (DEBUGLEVEL >= 3) err_printf("BACK:%ld:%ld:%g\n", n, i, s);
313 0 : break;
314 : }
315 : }
316 717326 : if (DEBUGLEVEL>=3 && (cert || timer_get(&ti) > 1000))
317 : {
318 0 : if (i==lti)
319 0 : timer_printf(&ti, "FLATTER, dim %ld, step %ld: \t slope=%0.10g \t pot=%0.10g", n, i, ((double)t)/n, ((double)pot2)/(n*(n+1)));
320 : else
321 0 : timer_printf(&ti, "FLATTER, dim %ld, steps %ld-%ld: \t slope=%0.10g \t pot=%0.10g", n, lti,i, ((double)t)/n, ((double)pot2)/(n*(n+1)));
322 0 : lti = i+1;
323 : }
324 717326 : s = t;
325 717326 : pot = pot2;
326 717326 : M = M2;
327 717326 : if (!inplace) T = ZM_mul(T, U);
328 717326 : if (gc_needed(btop, 1))
329 0 : gerepileall(btop, 2, &M, &T);
330 : }
331 618258 : if (DEBUGLEVEL>=3 && (cert || timer_get(&ti) > 1000))
332 : {
333 0 : if (i==lti)
334 0 : timer_printf(&ti, "FLATTER, dim %ld, final \t slope=%0.10g \t pot=%0.10g", n, ((double)s)/n, ((double)pot)/(n*(n+1)));
335 : else
336 0 : timer_printf(&ti, "FLATTER, dim %ld, steps %ld-final:\t slope=%0.10g \t pot=%0.10g", n, lti, ((double)s)/n, ((double)pot)/(n*(n+1)));
337 : }
338 618258 : return gerepilecopy(ltop, inplace ? M: T);
339 : }
340 :
341 : static GEN
342 616255 : ZM_flatter_rank(GEN M, long rank, long flag)
343 : {
344 : pari_timer ti;
345 616255 : pari_sp ltop = avma;
346 : GEN T;
347 616255 : long i, n = lg(M)-1;
348 616255 : if (rank == n) return ZM_flatter(M, flag);
349 3778 : T = matid(n);
350 3778 : if (DEBUGLEVEL>=3) timer_start(&ti);
351 3778 : for (i = 1;; i++)
352 2001 : {
353 5779 : GEN S = ZM_flatter(vconcat(gshift(M,i),matid(n)), flag);
354 5779 : if (DEBUGLEVEL>=3)
355 0 : timer_printf(&ti,"FLATTERRANK step %ld: %ld",i,expi(gnorml2(S)));
356 5779 : if (ZM_isidentity(S)) break;
357 2001 : T = ZM_mul(T, S);
358 2001 : M = ZM_mul(M, S);
359 2001 : if (gc_needed(ltop, 1))
360 0 : gerepileall(ltop, 2, &M, &T);
361 : }
362 3778 : return gerepilecopy(ltop, T);
363 : }
364 :
365 : static GEN
366 2967 : flattergram_i(GEN M, long flag, long *pt_s)
367 : {
368 2967 : pari_sp ltop = avma;
369 : GEN R, T;
370 2967 : R = RgM_Cholesky_dynprec(M);
371 2967 : *pt_s = drop(R);
372 2967 : T = lllfp(R, 0.99, LLL_IM| LLL_UPPER| LLL_NOCERTIFY | (flag&LLL_KEEP_FIRST));
373 2967 : return gerepilecopy(ltop, T);
374 : }
375 :
376 : static GEN
377 961 : ZM_flattergram(GEN M, long flag)
378 : {
379 961 : pari_sp ltop = avma, btop;
380 : GEN T;
381 961 : long i, n = lg(M)-1;
382 : pari_timer ti;
383 961 : long s = -1;
384 961 : if (DEBUGLEVEL>=3)
385 : {
386 0 : timer_start(&ti);
387 0 : err_printf("FLATTERGRAM dim = %ld size = %ld\n", n, ZM_max_expi(M));
388 : }
389 961 : btop = avma;
390 961 : T = matid(n);
391 961 : for (i = 1;; i++)
392 2006 : {
393 : long t;
394 2967 : GEN S = flattergram_i(M, flag, &t);
395 2967 : t = expi(gnorml2(S));
396 2967 : if (t==0) { s = t; break; }
397 2967 : if (s)
398 : {
399 2967 : double st = s-t;
400 2967 : if (st == 0)
401 961 : break;
402 2006 : if (st < 0 && i > 20)
403 : {
404 0 : if (DEBUGLEVEL >= 3) err_printf("BACK:%ld:%ld:%0.10g\n", n, i, ((double)s)/n);
405 0 : break;
406 : }
407 : }
408 2006 : T = ZM_mul(T, S);
409 2006 : M = qf_ZM_apply(M, S);
410 2006 : s = t;
411 2006 : if (DEBUGLEVEL >= 3)
412 0 : timer_printf(&ti, "FLATTERGRAM, dim %ld step %ld, slope=%0.10g", n, i, ((double)s)/n);
413 2006 : if (gc_needed(btop, 1))
414 0 : gerepileall(btop, 2, &M, &T);
415 : }
416 961 : if (DEBUGLEVEL >= 3)
417 0 : timer_printf(&ti, "FLATTERGRAM, dim %ld, slope=%0.10g", n, ((double)s)/n);
418 961 : return gerepilecopy(ltop, T);
419 : }
420 :
421 : static GEN
422 961 : ZM_flattergram_rank(GEN M, long rank, long flag)
423 : {
424 : pari_timer ti;
425 961 : pari_sp ltop = avma;
426 : GEN T;
427 961 : long i, n = lg(M)-1;
428 961 : if (rank == n) return ZM_flattergram(M, flag);
429 0 : T = matid(n);
430 0 : if (DEBUGLEVEL>=3) timer_start(&ti);
431 0 : for (i = 1;; i++)
432 0 : {
433 0 : GEN S = ZM_flattergram(RgM_Rg_add(gshift(M, i), gen_1), flag);
434 0 : if (DEBUGLEVEL>=3)
435 0 : timer_printf(&ti,"FLATTERGRAMRANK step %ld: %ld",i,expi(gnorml2(S)));
436 0 : if (ZM_isidentity(S)) break;
437 0 : T = ZM_mul(T, S);
438 0 : M = qf_ZM_apply(M, S);
439 0 : if (gc_needed(ltop, 1))
440 0 : gerepileall(ltop, 2, &M, &T);
441 : }
442 0 : return gerepilecopy(ltop, T);
443 : }
444 :
445 : static double
446 10701844 : pari_rint(double a)
447 : {
448 : #ifdef HAS_RINT
449 10701844 : return rint(a);
450 : #else
451 : const double two_to_52 = 4.5035996273704960e+15;
452 : double fa = fabs(a);
453 : double r = two_to_52 + fa;
454 : if (fa >= two_to_52) {
455 : r = a;
456 : } else {
457 : r = r - two_to_52;
458 : if (a < 0) r = -r;
459 : }
460 : return r;
461 : #endif
462 : }
463 :
464 : /* default quality ratio for LLL */
465 : static const double LLLDFT = 0.99;
466 :
467 : /* assume flag & (LLL_KER|LLL_IM|LLL_ALL). LLL_INPLACE implies LLL_IM */
468 : static GEN
469 769893 : lll_trivial(GEN x, long flag)
470 : {
471 769893 : if (lg(x) == 1)
472 : { /* dim x = 0 */
473 15497 : if (! (flag & LLL_ALL)) return cgetg(1,t_MAT);
474 28 : retmkvec2(cgetg(1,t_MAT), cgetg(1,t_MAT));
475 : }
476 : /* dim x = 1 */
477 754396 : if (gequal0(gel(x,1)))
478 : {
479 153 : if (flag & LLL_KER) return matid(1);
480 153 : if (flag & (LLL_IM|LLL_INPLACE)) return cgetg(1,t_MAT);
481 28 : retmkvec2(matid(1), cgetg(1,t_MAT));
482 : }
483 754241 : if (flag & LLL_INPLACE) return gcopy(x);
484 650858 : if (flag & LLL_KER) return cgetg(1,t_MAT);
485 650858 : if (flag & LLL_IM) return matid(1);
486 29 : retmkvec2(cgetg(1,t_MAT), (flag & LLL_GRAM)? gcopy(x): matid(1));
487 : }
488 :
489 : /* vecslice(x,#x-k,#x) in place. Works for t_MAT, t_VEC/t_COL */
490 : static GEN
491 2055512 : vectail_inplace(GEN x, long k)
492 : {
493 2055512 : if (!k) return x;
494 57747 : x[k] = ((ulong)x[0] & ~LGBITS) | _evallg(lg(x) - k);
495 57747 : return x + k;
496 : }
497 :
498 : /* k = dim Kernel */
499 : static GEN
500 2129402 : lll_finish(GEN h, long k, long flag)
501 : {
502 : GEN g;
503 2129402 : if (!(flag & (LLL_IM|LLL_KER|LLL_ALL|LLL_INPLACE))) return h;
504 2055538 : if (flag & (LLL_IM|LLL_INPLACE)) return vectail_inplace(h, k);
505 96 : if (flag & LLL_KER) { setlg(h,k+1); return h; }
506 68 : g = vecslice(h,1,k); /* done first: vectail_inplace kills h */
507 70 : return mkvec2(g, vectail_inplace(h, k));
508 : }
509 :
510 : /* y * z * 2^e, e >= 0; y,z t_INT */
511 : INLINE GEN
512 315966 : mulshift(GEN y, GEN z, long e)
513 : {
514 315966 : long ly = lgefint(y), lz;
515 : pari_sp av;
516 : GEN t;
517 315966 : if (ly == 2) return gen_0;
518 46411 : lz = lgefint(z);
519 46411 : av = avma; (void)new_chunk(ly+lz+nbits2lg(e)); /* HACK */
520 46411 : t = mulii(z, y);
521 46411 : set_avma(av); return shifti(t, e);
522 : }
523 :
524 : /* x - y * z * 2^e, e >= 0; x,y,z t_INT */
525 : INLINE GEN
526 1493610 : submulshift(GEN x, GEN y, GEN z, long e)
527 : {
528 1493610 : long lx = lgefint(x), ly, lz;
529 : pari_sp av;
530 : GEN t;
531 1493610 : if (!e) return submulii(x, y, z);
532 1484437 : if (lx == 2) { t = mulshift(y, z, e); togglesign(t); return t; }
533 1187473 : ly = lgefint(y);
534 1187473 : if (ly == 2) return icopy(x);
535 952899 : lz = lgefint(z);
536 952899 : av = avma; (void)new_chunk(lx+ly+lz+nbits2lg(e)); /* HACK */
537 952899 : t = shifti(mulii(z, y), e);
538 952899 : set_avma(av); return subii(x, t);
539 : }
540 : static void
541 18512954 : subzi(GEN *a, GEN b)
542 : {
543 18512954 : pari_sp av = avma;
544 18512954 : b = subii(*a, b);
545 18512920 : if (lgefint(b)<=lg(*a) && isonstack(*a)) { affii(b,*a); set_avma(av); }
546 2103717 : else *a = b;
547 18512966 : }
548 :
549 : static void
550 17769238 : addzi(GEN *a, GEN b)
551 : {
552 17769238 : pari_sp av = avma;
553 17769238 : b = addii(*a, b);
554 17769232 : if (lgefint(b)<=lg(*a) && isonstack(*a)) { affii(b,*a); set_avma(av); }
555 1876061 : else *a = b;
556 17769269 : }
557 :
558 : /* x - u*y * 2^e */
559 : INLINE GEN
560 4130903 : submuliu2n(GEN x, GEN y, ulong u, long e)
561 : {
562 : pari_sp av;
563 4130903 : long ly = lgefint(y);
564 4130903 : if (ly == 2) return x;
565 2833517 : av = avma;
566 2833517 : (void)new_chunk(3+ly+lgefint(x)+nbits2lg(e)); /* HACK */
567 2835595 : y = shifti(mului(u,y), e);
568 2834440 : set_avma(av); return subii(x, y);
569 : }
570 : /* *x -= u*y * 2^e */
571 : INLINE void
572 262957 : submulzu2n(GEN *x, GEN y, ulong u, long e)
573 : {
574 : pari_sp av;
575 262957 : long ly = lgefint(y);
576 262957 : if (ly == 2) return;
577 172642 : av = avma;
578 172642 : (void)new_chunk(3+ly+lgefint(*x)+nbits2lg(e)); /* HACK */
579 172642 : y = shifti(mului(u,y), e);
580 172642 : set_avma(av); return subzi(x, y);
581 : }
582 :
583 : /* x + u*y * 2^e */
584 : INLINE GEN
585 4043110 : addmuliu2n(GEN x, GEN y, ulong u, long e)
586 : {
587 : pari_sp av;
588 4043110 : long ly = lgefint(y);
589 4043110 : if (ly == 2) return x;
590 2778652 : av = avma;
591 2778652 : (void)new_chunk(3+ly+lgefint(x)+nbits2lg(e)); /* HACK */
592 2780643 : y = shifti(mului(u,y), e);
593 2779479 : set_avma(av); return addii(x, y);
594 : }
595 :
596 : /* *x += u*y * 2^e */
597 : INLINE void
598 270968 : addmulzu2n(GEN *x, GEN y, ulong u, long e)
599 : {
600 : pari_sp av;
601 270968 : long ly = lgefint(y);
602 270968 : if (ly == 2) return;
603 178036 : av = avma;
604 178036 : (void)new_chunk(3+ly+lgefint(*x)+nbits2lg(e)); /* HACK */
605 178036 : y = shifti(mului(u,y), e);
606 178036 : set_avma(av); return addzi(x, y);
607 : }
608 :
609 : /* n < 10; gerepileall supporting &NULL arguments. Maybe rename and export ? */
610 : INLINE void
611 4665 : gc_lll(pari_sp av, int n, ...)
612 : {
613 : int i, j;
614 : GEN *gptr[10];
615 : size_t s;
616 4665 : va_list a; va_start(a, n);
617 13995 : for (i=j=0; i<n; i++)
618 : {
619 9330 : GEN *x = va_arg(a,GEN*);
620 9330 : if (*x) { gptr[j++] = x; *x = (GEN)copy_bin(*x); }
621 : }
622 4665 : va_end(a); set_avma(av);
623 11594 : for (--j; j>=0; j--) *gptr[j] = bin_copy((GENbin*)*gptr[j]);
624 4665 : s = pari_mainstack->top - pari_mainstack->bot;
625 : /* size of saved objects ~ stacksize / 4 => overflow */
626 4665 : if (av - avma > (s >> 2))
627 : {
628 0 : size_t t = avma - pari_mainstack->bot;
629 0 : av = avma; new_chunk((s + t) / sizeof(long)); set_avma(av); /* double */
630 : }
631 4665 : }
632 :
633 : /********************************************************************/
634 : /** **/
635 : /** FPLLL (adapted from D. Stehle's code) **/
636 : /** **/
637 : /********************************************************************/
638 : /* Babai* and fplll* are a conversion to libpari API and data types
639 : of fplll-1.3 by Damien Stehle'.
640 :
641 : Copyright 2005, 2006 Damien Stehle'.
642 :
643 : This program is free software; you can redistribute it and/or modify it
644 : under the terms of the GNU General Public License as published by the
645 : Free Software Foundation; either version 2 of the License, or (at your
646 : option) any later version.
647 :
648 : This program implements ideas from the paper "Floating-point LLL Revisited",
649 : by Phong Nguyen and Damien Stehle', in the Proceedings of Eurocrypt'2005,
650 : Springer-Verlag; and was partly inspired by Shoup's NTL library:
651 : http://www.shoup.net/ntl/ */
652 :
653 : /* x t_REAL, |x| >= 1/2. Test whether |x| <= 3/2 */
654 : static int
655 320494 : absrsmall2(GEN x)
656 : {
657 320494 : long e = expo(x), l, i;
658 320494 : if (e < 0) return 1;
659 169856 : if (e > 0 || (ulong)x[2] > (3UL << (BITS_IN_LONG-2))) return 0;
660 : /* line above assumes l > 2. OK since x != 0 */
661 57040 : l = lg(x); for (i = 3; i < l; i++) if (x[i]) return 0;
662 49085 : return 1;
663 : }
664 : /* x t_REAL; test whether |x| <= 1/2 */
665 : static int
666 554845 : absrsmall(GEN x)
667 : {
668 : long e, l, i;
669 554845 : if (!signe(x)) return 1;
670 550686 : e = expo(x); if (e < -1) return 1;
671 325904 : if (e > -1 || (ulong)x[2] > HIGHBIT) return 0;
672 6119 : l = lg(x); for (i = 3; i < l; i++) if (x[i]) return 0;
673 5410 : return 1;
674 : }
675 :
676 : static void
677 31555371 : rotate(GEN A, long k2, long k)
678 : {
679 : long i;
680 31555371 : GEN B = gel(A,k2);
681 100677362 : for (i = k2; i > k; i--) gel(A,i) = gel(A,i-1);
682 31555371 : gel(A,k) = B;
683 31555371 : }
684 :
685 : /************************* FAST version (double) ************************/
686 : #define dmael(x,i,j) ((x)[i][j])
687 : #define del(x,i) ((x)[i])
688 :
689 : static double *
690 34480355 : cget_dblvec(long d)
691 34480355 : { return (double*) stack_malloc_align(d*sizeof(double), sizeof(double)); }
692 :
693 : static double **
694 8276333 : cget_dblmat(long d) { return (double **) cgetg(d, t_VECSMALL); }
695 :
696 : static double
697 160927326 : itodbl_exp(GEN x, long *e)
698 : {
699 160927326 : pari_sp av = avma;
700 160927326 : GEN r = itor(x,DEFAULTPREC);
701 160915251 : *e = expo(r); setexpo(r,0);
702 160910897 : return gc_double(av, rtodbl(r));
703 : }
704 :
705 : static double
706 117109634 : dbldotproduct(double *x, double *y, long n)
707 : {
708 : long i;
709 117109634 : double sum = del(x,1) * del(y,1);
710 1375812600 : for (i=2; i<=n; i++) sum += del(x,i) * del(y,i);
711 117109634 : return sum;
712 : }
713 :
714 : static double
715 2440683 : dbldotsquare(double *x, long n)
716 : {
717 : long i;
718 2440683 : double sum = del(x,1) * del(x,1);
719 8092330 : for (i=2; i<=n; i++) sum += del(x,i) * del(x,i);
720 2440683 : return sum;
721 : }
722 :
723 : static long
724 24633233 : set_line(double *appv, GEN v, long n)
725 : {
726 24633233 : long i, maxexp = 0;
727 24633233 : pari_sp av = avma;
728 24633233 : GEN e = cgetg(n+1, t_VECSMALL);
729 185546103 : for (i = 1; i <= n; i++)
730 : {
731 160925416 : del(appv,i) = itodbl_exp(gel(v,i), e+i);
732 160911330 : if (e[i] > maxexp) maxexp = e[i];
733 : }
734 185637293 : for (i = 1; i <= n; i++) del(appv,i) = ldexp(del(appv,i), e[i]-maxexp);
735 24620687 : set_avma(av); return maxexp;
736 : }
737 :
738 : static void
739 34332126 : dblrotate(double **A, long k2, long k)
740 : {
741 : long i;
742 34332126 : double *B = del(A,k2);
743 108651984 : for (i = k2; i > k; i--) del(A,i) = del(A,i-1);
744 34332126 : del(A,k) = B;
745 34332126 : }
746 : /* update G[kappa][i] from appB */
747 : static void
748 22397340 : setG_fast(double **appB, long n, double **G, long kappa, long a, long b)
749 : { long i;
750 100906234 : for (i = a; i <= b; i++)
751 78509153 : dmael(G,kappa,i) = dbldotproduct(del(appB,kappa), del(appB,i), n);
752 22397081 : }
753 : /* update G[i][kappa] from appB */
754 : static void
755 16844877 : setG2_fast(double **appB, long n, double **G, long kappa, long a, long b)
756 : { long i;
757 55449757 : for (i = a; i <= b; i++)
758 38604918 : dmael(G,i,kappa) = dbldotproduct(del(appB,kappa), del(appB,i), n);
759 16844839 : }
760 : const long EX0 = -2; /* uninitialized; any value less than expo(0.51) = -1 */
761 :
762 : #ifdef LONG_IS_64BIT
763 : typedef long s64;
764 : #define addmuliu64_inplace addmuliu_inplace
765 : #define submuliu64_inplace submuliu_inplace
766 : #define submuliu642n submuliu2n
767 : #define addmuliu642n addmuliu2n
768 : #else
769 : typedef long long s64;
770 : typedef unsigned long long u64;
771 :
772 : INLINE GEN
773 19619546 : u64toi(u64 x)
774 : {
775 : GEN y;
776 : ulong h;
777 19619546 : if (!x) return gen_0;
778 19619546 : h = x>>32;
779 19619546 : if (!h) return utoipos(x);
780 1136203 : y = cgetipos(4);
781 1136203 : *int_LSW(y) = x&0xFFFFFFFF;
782 1136203 : *int_MSW(y) = x>>32;
783 1136203 : return y;
784 : }
785 :
786 : INLINE GEN
787 668569 : u64toineg(u64 x)
788 : {
789 : GEN y;
790 : ulong h;
791 668569 : if (!x) return gen_0;
792 668569 : h = x>>32;
793 668569 : if (!h) return utoineg(x);
794 668569 : y = cgetineg(4);
795 668569 : *int_LSW(y) = x&0xFFFFFFFF;
796 668569 : *int_MSW(y) = x>>32;
797 668569 : return y;
798 : }
799 : INLINE GEN
800 9438234 : addmuliu64_inplace(GEN x, GEN y, u64 u) { return addmulii(x, y, u64toi(u)); }
801 :
802 : INLINE GEN
803 9496657 : submuliu64_inplace(GEN x, GEN y, u64 u) { return submulii(x, y, u64toi(u)); }
804 :
805 : INLINE GEN
806 668569 : addmuliu642n(GEN x, GEN y, u64 u, long e) { return submulshift(x, y, u64toineg(u), e); }
807 :
808 : INLINE GEN
809 684655 : submuliu642n(GEN x, GEN y, u64 u, long e) { return submulshift(x, y, u64toi(u), e); }
810 :
811 : #endif
812 :
813 : /* Babai's Nearest Plane algorithm (iterative); see Babai() */
814 : static int
815 29912870 : Babai_fast(pari_sp av, long kappa, GEN *pB, GEN *pU, double **mu, double **r,
816 : double *s, double **appB, GEN expoB, double **G,
817 : long a, long zeros, long maxG, double eta)
818 : {
819 29912870 : GEN B = *pB, U = *pU;
820 29912870 : const long n = nbrows(B), d = U ? lg(U)-1: 0;
821 29912688 : long k, aa = (a > zeros)? a : zeros+1;
822 29912688 : long emaxmu = EX0, emax2mu = EX0;
823 : s64 xx;
824 29912688 : int did_something = 0;
825 : /* N.B: we set d = 0 (resp. n = 0) to avoid updating U (resp. B) */
826 :
827 17050445 : for (;;) {
828 46963133 : int go_on = 0;
829 46963133 : long i, j, emax3mu = emax2mu;
830 :
831 46963133 : if (gc_needed(av,2))
832 : {
833 194 : if(DEBUGMEM>1) pari_warn(warnmem,"Babai[1], a=%ld", aa);
834 194 : gc_lll(av,2,&B,&U);
835 : }
836 : /* Step2: compute the GSO for stage kappa */
837 46962017 : emax2mu = emaxmu; emaxmu = EX0;
838 179861155 : for (j=aa; j<kappa; j++)
839 : {
840 132899899 : double g = dmael(G,kappa,j);
841 571204916 : for (k = zeros+1; k < j; k++) g -= dmael(mu,j,k) * dmael(r,kappa,k);
842 132899899 : dmael(r,kappa,j) = g;
843 132899899 : dmael(mu,kappa,j) = dmael(r,kappa,j) / dmael(r,j,j);
844 132899899 : emaxmu = maxss(emaxmu, expoB[kappa]-expoB[j]);
845 : }
846 : /* maxmu doesn't decrease fast enough */
847 46961256 : if (emax3mu != EX0 && emax3mu <= emax2mu + 5) {*pB = B; *pU = U; return 1;}
848 :
849 167022491 : for (j=kappa-1; j>zeros; j--)
850 : {
851 137115967 : double tmp = fabs(ldexp (dmael(mu,kappa,j), expoB[kappa]-expoB[j]));
852 137115967 : if (tmp>eta) { go_on = 1; break; }
853 : }
854 :
855 : /* Step3--5: compute the X_j's */
856 46957145 : if (go_on)
857 77348508 : for (j=kappa-1; j>zeros; j--)
858 : { /* The code below seemingly handles U = NULL, but in this case d = 0 */
859 60299578 : int e = expoB[j] - expoB[kappa];
860 60299578 : double tmp = ldexp(dmael(mu,kappa,j), -e), atmp = fabs(tmp);
861 : /* tmp = Inf is allowed */
862 60299578 : if (atmp <= .5) continue; /* size-reduced */
863 33821336 : if (gc_needed(av,2))
864 : {
865 349 : if(DEBUGMEM>1) pari_warn(warnmem,"Babai[2], a=%ld, j=%ld", aa,j);
866 349 : gc_lll(av,2,&B,&U);
867 : }
868 33823818 : did_something = 1;
869 : /* we consider separately the case |X| = 1 */
870 33823818 : if (atmp <= 1.5)
871 : {
872 22758778 : if (dmael(mu,kappa,j) > 0) { /* in this case, X = 1 */
873 46655701 : for (k=zeros+1; k<j; k++)
874 35036626 : dmael(mu,kappa,k) -= ldexp(dmael(mu,j,k), e);
875 157314921 : for (i=1; i<=n; i++)
876 145697803 : gmael(B,kappa,i) = subii(gmael(B,kappa,i), gmael(B,j,i));
877 104135158 : for (i=1; i<=d; i++)
878 92517986 : gmael(U,kappa,i) = subii(gmael(U,kappa,i), gmael(U,j,i));
879 : } else { /* otherwise X = -1 */
880 45814986 : for (k=zeros+1; k<j; k++)
881 34675283 : dmael(mu,kappa,k) += ldexp(dmael(mu,j,k), e);
882 154592711 : for (i=1; i<=n; i++)
883 143455326 : gmael(B,kappa,i) = addii(gmael(B,kappa,i), gmael(B,j,i));
884 101395940 : for (i=1; i<=d; i++)
885 90258596 : gmael(U,kappa,i) = addii(gmael(U,kappa,i), gmael(U,j,i));
886 : }
887 22754516 : continue;
888 : }
889 : /* we have |X| >= 2 */
890 11065040 : if (atmp < 9007199254740992.)
891 : {
892 10234876 : tmp = pari_rint(tmp);
893 24436077 : for (k=zeros+1; k<j; k++)
894 14201192 : dmael(mu,kappa,k) -= ldexp(tmp * dmael(mu,j,k), e);
895 10234885 : xx = (s64) tmp;
896 10234885 : if (xx > 0) /* = xx */
897 : {
898 45938744 : for (i=1; i<=n; i++)
899 40790091 : gmael(B,kappa,i) = submuliu64_inplace(gmael(B,kappa,i), gmael(B,j,i), xx);
900 33063696 : for (i=1; i<=d; i++)
901 27915014 : gmael(U,kappa,i) = submuliu64_inplace(gmael(U,kappa,i), gmael(U,j,i), xx);
902 : }
903 : else /* = -xx */
904 : {
905 45602345 : for (i=1; i<=n; i++)
906 40516106 : gmael(B,kappa,i) = addmuliu64_inplace(gmael(B,kappa,i), gmael(B,j,i), -xx);
907 32727710 : for (i=1; i<=d; i++)
908 27641438 : gmael(U,kappa,i) = addmuliu64_inplace(gmael(U,kappa,i), gmael(U,j,i), -xx);
909 : }
910 : }
911 : else
912 : {
913 : int E;
914 830164 : xx = (s64) ldexp(frexp(dmael(mu,kappa,j), &E), 53);
915 830164 : E -= e + 53;
916 830164 : if (E <= 0)
917 : {
918 0 : xx = xx << -E;
919 0 : for (k=zeros+1; k<j; k++)
920 0 : dmael(mu,kappa,k) -= ldexp(((double)xx) * dmael(mu,j,k), e);
921 0 : if (xx > 0) /* = xx */
922 : {
923 0 : for (i=1; i<=n; i++)
924 0 : gmael(B,kappa,i) = submuliu64_inplace(gmael(B,kappa,i), gmael(B,j,i), xx);
925 0 : for (i=1; i<=d; i++)
926 0 : gmael(U,kappa,i) = submuliu64_inplace(gmael(U,kappa,i), gmael(U,j,i), xx);
927 : }
928 : else /* = -xx */
929 : {
930 0 : for (i=1; i<=n; i++)
931 0 : gmael(B,kappa,i) = addmuliu64_inplace(gmael(B,kappa,i), gmael(B,j,i), -xx);
932 0 : for (i=1; i<=d; i++)
933 0 : gmael(U,kappa,i) = addmuliu64_inplace(gmael(U,kappa,i), gmael(U,j,i), -xx);
934 : }
935 : } else
936 : {
937 2755813 : for (k=zeros+1; k<j; k++)
938 1925649 : dmael(mu,kappa,k) -= ldexp(((double)xx) * dmael(mu,j,k), E + e);
939 830164 : if (xx > 0) /* = xx */
940 : {
941 3887604 : for (i=1; i<=n; i++)
942 3469841 : gmael(B,kappa,i) = submuliu642n(gmael(B,kappa,i), gmael(B,j,i), xx, E);
943 1506968 : for (i=1; i<=d; i++)
944 1089205 : gmael(U,kappa,i) = submuliu642n(gmael(U,kappa,i), gmael(U,j,i), xx, E);
945 : }
946 : else /* = -xx */
947 : {
948 3841160 : for (i=1; i<=n; i++)
949 3428721 : gmael(B,kappa,i) = addmuliu642n(gmael(B,kappa,i), gmael(B,j,i), -xx, E);
950 1484839 : for (i=1; i<=d; i++)
951 1072400 : gmael(U,kappa,i) = addmuliu642n(gmael(U,kappa,i), gmael(U,j,i), -xx, E);
952 : }
953 : }
954 : }
955 : }
956 46955481 : if (!go_on) break; /* Anything happened? */
957 17047029 : expoB[kappa] = set_line(del(appB,kappa), gel(B,kappa), n);
958 17050049 : setG_fast(appB, n, G, kappa, zeros+1, kappa-1);
959 17050445 : aa = zeros+1;
960 : }
961 29908452 : if (did_something) setG2_fast(appB, n, G, kappa, kappa, maxG);
962 :
963 29908745 : del(s,zeros+1) = dmael(G,kappa,kappa);
964 : /* the last s[kappa-1]=r[kappa][kappa] is computed only if kappa increases */
965 108862318 : for (k=zeros+1; k<=kappa-2; k++)
966 78953573 : del(s,k+1) = del(s,k) - dmael(mu,kappa,k)*dmael(r,kappa,k);
967 29908745 : *pB = B; *pU = U; return 0;
968 : }
969 :
970 : static void
971 11863255 : update_alpha(GEN alpha, long kappa, long kappa2, long kappamax)
972 : {
973 : long i;
974 37676555 : for (i = kappa; i < kappa2; i++)
975 25813300 : if (kappa <= alpha[i]) alpha[i] = kappa;
976 37676536 : for (i = kappa2; i > kappa; i--) alpha[i] = alpha[i-1];
977 22991756 : for (i = kappa2+1; i <= kappamax; i++)
978 11128501 : if (kappa < alpha[i]) alpha[i] = kappa;
979 11863255 : alpha[kappa] = kappa;
980 11863255 : }
981 : static void
982 419042 : rotateG(GEN G, long kappa2, long kappa, long maxG, GEN Gtmp)
983 : {
984 : long i, j;
985 3285350 : for (i=1; i<=kappa2; i++) gel(Gtmp,i) = gmael(G,kappa2,i);
986 1773160 : for ( ; i<=maxG; i++) gel(Gtmp,i) = gmael(G,i,kappa2);
987 1458583 : for (i=kappa2; i>kappa; i--)
988 : {
989 5144588 : for (j=1; j<kappa; j++) gmael(G,i,j) = gmael(G,i-1,j);
990 1039541 : gmael(G,i,kappa) = gel(Gtmp,i-1);
991 3760605 : for (j=kappa+1; j<=i; j++) gmael(G,i,j) = gmael(G,i-1,j-1);
992 4686631 : for (j=kappa2+1; j<=maxG; j++) gmael(G,j,i) = gmael(G,j,i-1);
993 : }
994 1826767 : for (i=1; i<kappa; i++) gmael(G,kappa,i) = gel(Gtmp,i);
995 419042 : gmael(G,kappa,kappa) = gel(Gtmp,kappa2);
996 1773160 : for (i=kappa2+1; i<=maxG; i++) gmael(G,i,kappa) = gel(Gtmp,i);
997 419042 : }
998 : static void
999 11444061 : rotateG_fast(double **G, long kappa2, long kappa, long maxG, double *Gtmp)
1000 : {
1001 : long i, j;
1002 66461438 : for (i=1; i<=kappa2; i++) del(Gtmp,i) = dmael(G,kappa2,i);
1003 22080763 : for ( ; i<=maxG; i++) del(Gtmp,i) = dmael(G,i,kappa2);
1004 36217518 : for (i=kappa2; i>kappa; i--)
1005 : {
1006 69275137 : for (j=1; j<kappa; j++) dmael(G,i,j) = dmael(G,i-1,j);
1007 24773457 : dmael(G,i,kappa) = del(Gtmp,i-1);
1008 84363993 : for (j=kappa+1; j<=i; j++) dmael(G,i,j) = dmael(G,i-1,j-1);
1009 46674703 : for (j=kappa2+1; j<=maxG; j++) dmael(G,j,i) = dmael(G,j,i-1);
1010 : }
1011 30244210 : for (i=1; i<kappa; i++) dmael(G,kappa,i) = del(Gtmp,i);
1012 11444061 : dmael(G,kappa,kappa) = del(Gtmp,kappa2);
1013 22080810 : for (i=kappa2+1; i<=maxG; i++) dmael(G,i,kappa) = del(Gtmp,i);
1014 11444061 : }
1015 :
1016 : /* LLL-reduces (B,U) in place [apply base change transforms to B and U].
1017 : * Gram matrix, and GSO performed on matrices of 'double'.
1018 : * If (keepfirst), never swap with first vector.
1019 : * Return -1 on failure, else zeros = dim Kernel (>= 0) */
1020 : static long
1021 2069087 : fplll_fast(GEN *pB, GEN *pU, double delta, double eta, long keepfirst)
1022 : {
1023 : pari_sp av;
1024 : long kappa, kappa2, d, n, i, j, zeros, kappamax, maxG;
1025 : double **mu, **r, *s, tmp, *Gtmp, **G, **appB;
1026 2069087 : GEN alpha, expoB, B = *pB, U;
1027 2069087 : long cnt = 0;
1028 :
1029 2069087 : d = lg(B)-1;
1030 2069087 : n = nbrows(B);
1031 2069088 : U = *pU; /* NULL if inplace */
1032 :
1033 2069088 : G = cget_dblmat(d+1);
1034 2069088 : appB = cget_dblmat(d+1);
1035 2069085 : mu = cget_dblmat(d+1);
1036 2069087 : r = cget_dblmat(d+1);
1037 2069087 : s = cget_dblvec(d+1);
1038 9654701 : for (j = 1; j <= d; j++)
1039 : {
1040 7585616 : del(mu,j) = cget_dblvec(d+1);
1041 7585614 : del(r,j) = cget_dblvec(d+1);
1042 7585604 : del(appB,j) = cget_dblvec(n+1);
1043 7585606 : del(G,j) = cget_dblvec(d+1);
1044 46800590 : for (i=1; i<=d; i++) dmael(G,j,i) = 0.;
1045 : }
1046 2069085 : expoB = cgetg(d+1, t_VECSMALL);
1047 9654623 : for (i=1; i<=d; i++) expoB[i] = set_line(del(appB,i), gel(B,i), n);
1048 2069057 : Gtmp = cget_dblvec(d+1);
1049 2069086 : alpha = cgetg(d+1, t_VECSMALL);
1050 2069085 : av = avma;
1051 :
1052 : /* Step2: Initializing the main loop */
1053 2069085 : kappamax = 1;
1054 2069085 : i = 1;
1055 2069085 : maxG = d; /* later updated to kappamax */
1056 :
1057 : do {
1058 2234876 : dmael(G,i,i) = dbldotsquare(del(appB,i),n);
1059 2234877 : } while (dmael(G,i,i) <= 0 && (++i <=d));
1060 2069086 : zeros = i-1; /* all vectors B[i] with i <= zeros are zero vectors */
1061 2069086 : kappa = i;
1062 2069086 : if (zeros < d) dmael(r,zeros+1,zeros+1) = dmael(G,zeros+1,zeros+1);
1063 9488913 : for (i=zeros+1; i<=d; i++) alpha[i]=1;
1064 31977930 : while (++kappa <= d)
1065 : {
1066 29912948 : if (kappa > kappamax)
1067 : {
1068 5347252 : if (DEBUGLEVEL>=4) err_printf("K%ld ",kappa);
1069 5347252 : maxG = kappamax = kappa;
1070 5347252 : setG_fast(appB, n, G, kappa, zeros+1, kappa);
1071 : }
1072 : /* Step3: Call to the Babai algorithm, mu,r,s updated in place */
1073 29912940 : if (Babai_fast(av, kappa, &B,&U, mu,r,s, appB, expoB, G, alpha[kappa],
1074 4111 : zeros, maxG, eta)) { *pB=B; *pU=U; return -1; }
1075 :
1076 29908687 : tmp = ldexp(r[kappa-1][kappa-1] * delta, 2*(expoB[kappa-1]-expoB[kappa]));
1077 29908687 : if ((keepfirst && kappa == 2) || tmp <= del(s,kappa-1))
1078 : { /* Step4: Success of Lovasz's condition */
1079 18464477 : alpha[kappa] = kappa;
1080 18464477 : tmp = dmael(mu,kappa,kappa-1) * dmael(r,kappa,kappa-1);
1081 18464477 : dmael(r,kappa,kappa) = del(s,kappa-1)- tmp;
1082 18464477 : continue;
1083 : }
1084 : /* Step5: Find the right insertion index kappa, kappa2 = initial kappa */
1085 11444210 : if (DEBUGLEVEL>=4 && kappa==kappamax && del(s,kappa-1)!=0)
1086 0 : if (++cnt > 20) { cnt = 0; err_printf("(%ld) ", 2*expoB[1] + dblexpo(del(s,1))); }
1087 11444204 : kappa2 = kappa;
1088 : do {
1089 24773739 : kappa--;
1090 24773739 : if (kappa<zeros+2 + (keepfirst ? 1: 0)) break;
1091 18242400 : tmp = dmael(r,kappa-1,kappa-1) * delta;
1092 18242400 : tmp = ldexp(tmp, 2*(expoB[kappa-1]-expoB[kappa2]));
1093 18242400 : } while (del(s,kappa-1) <= tmp);
1094 11444204 : update_alpha(alpha, kappa, kappa2, kappamax);
1095 :
1096 : /* Step6: Update the mu's and r's */
1097 11444207 : dblrotate(mu,kappa2,kappa);
1098 11444160 : dblrotate(r,kappa2,kappa);
1099 11444116 : dmael(r,kappa,kappa) = del(s,kappa);
1100 :
1101 : /* Step7: Update B, appB, U, G */
1102 11444116 : rotate(B,kappa2,kappa);
1103 11444100 : dblrotate(appB,kappa2,kappa);
1104 11444070 : if (U) rotate(U,kappa2,kappa);
1105 11444070 : rotate(expoB,kappa2,kappa);
1106 11444061 : rotateG_fast(G,kappa2,kappa, maxG, Gtmp);
1107 :
1108 : /* Step8: Prepare the next loop iteration */
1109 11444367 : if (kappa == zeros+1 && dmael(G,kappa,kappa)<= 0)
1110 : {
1111 205807 : zeros++; kappa++;
1112 205807 : dmael(G,kappa,kappa) = dbldotsquare(del(appB,kappa),n);
1113 205807 : dmael(r,kappa,kappa) = dmael(G,kappa,kappa);
1114 : }
1115 : }
1116 2064982 : *pB = B; *pU = U; return zeros;
1117 : }
1118 :
1119 : /***************** HEURISTIC version (reduced precision) ****************/
1120 : static GEN
1121 154056 : realsqrdotproduct(GEN x)
1122 : {
1123 154056 : long i, l = lg(x);
1124 154056 : GEN z = sqrr(gel(x,1));
1125 1015189 : for (i=2; i<l; i++) z = addrr(z, sqrr(gel(x,i)));
1126 154056 : return z;
1127 : }
1128 : /* x, y non-empty vector of t_REALs, same length */
1129 : static GEN
1130 930581 : realdotproduct(GEN x, GEN y)
1131 : {
1132 : long i, l;
1133 : GEN z;
1134 930581 : if (x == y) return realsqrdotproduct(x);
1135 776525 : l = lg(x); z = mulrr(gel(x,1),gel(y,1));
1136 7252216 : for (i=2; i<l; i++) z = addrr(z, mulrr(gel(x,i), gel(y,i)));
1137 776525 : return z;
1138 : }
1139 : static void
1140 162040 : setG_heuristic(GEN appB, GEN G, long kappa, long a, long b)
1141 162040 : { pari_sp av = avma;
1142 : long i;
1143 747178 : for (i = a; i <= b; i++)
1144 585138 : affrr(realdotproduct(gel(appB,kappa),gel(appB,i)), gmael(G,kappa,i));
1145 162040 : set_avma(av);
1146 162040 : }
1147 : static void
1148 144478 : setG2_heuristic(GEN appB, GEN G, long kappa, long a, long b)
1149 144478 : { pari_sp av = avma;
1150 : long i;
1151 489921 : for (i = a; i <= b; i++)
1152 345443 : affrr(realdotproduct(gel(appB,kappa),gel(appB,i)), gmael(G,i,kappa));
1153 144478 : set_avma(av);
1154 144478 : }
1155 :
1156 : /* approximate t_REAL x as m * 2^e, where |m| < 2^bit */
1157 : static GEN
1158 11502 : truncexpo(GEN x, long bit, long *e)
1159 : {
1160 11502 : *e = expo(x) + 1 - bit;
1161 11502 : if (*e >= 0) return mantissa2nr(x, 0);
1162 890 : *e = 0; return roundr_safe(x);
1163 : }
1164 : /* Babai's Nearest Plane algorithm (iterative); see Babai() */
1165 : static int
1166 220292 : Babai_heuristic(pari_sp av, long kappa, GEN *pB, GEN *pU, GEN mu, GEN r, GEN s,
1167 : GEN appB, GEN G, long a, long zeros, long maxG,
1168 : GEN eta, long prec)
1169 : {
1170 220292 : GEN B = *pB, U = *pU;
1171 220292 : const long n = nbrows(B), d = U ? lg(U)-1: 0, bit = prec2nbits(prec);
1172 220292 : long k, aa = (a > zeros)? a : zeros+1;
1173 220292 : int did_something = 0;
1174 220292 : long emaxmu = EX0, emax2mu = EX0;
1175 : /* N.B: we set d = 0 (resp. n = 0) to avoid updating U (resp. B) */
1176 :
1177 152462 : for (;;) {
1178 372754 : int go_on = 0;
1179 372754 : long i, j, emax3mu = emax2mu;
1180 :
1181 372754 : if (gc_needed(av,2))
1182 : {
1183 3 : if(DEBUGMEM>1) pari_warn(warnmem,"Babai[1], a=%ld", aa);
1184 3 : gc_lll(av,2,&B,&U);
1185 : }
1186 : /* Step2: compute the GSO for stage kappa */
1187 372754 : emax2mu = emaxmu; emaxmu = EX0;
1188 1446083 : for (j=aa; j<kappa; j++)
1189 : {
1190 1073329 : pari_sp btop = avma;
1191 1073329 : GEN g = gmael(G,kappa,j);
1192 3720133 : for (k = zeros+1; k<j; k++)
1193 2646804 : g = subrr(g, mulrr(gmael(mu,j,k), gmael(r,kappa,k)));
1194 1073329 : affrr(g, gmael(r,kappa,j));
1195 1073329 : affrr(divrr(gmael(r,kappa,j), gmael(r,j,j)), gmael(mu,kappa,j));
1196 1073329 : emaxmu = maxss(emaxmu, expo(gmael(mu,kappa,j)));
1197 1073329 : set_avma(btop);
1198 : }
1199 372754 : if (emax3mu != EX0 && emax3mu <= emax2mu + 5)
1200 1327 : { *pB = B; *pU = U; return 1; }
1201 :
1202 1291310 : for (j=kappa-1; j>zeros; j--)
1203 1072345 : if (abscmprr(gmael(mu,kappa,j), eta) > 0) { go_on = 1; break; }
1204 :
1205 : /* Step3--5: compute the X_j's */
1206 371427 : if (go_on)
1207 707307 : for (j=kappa-1; j>zeros; j--)
1208 : { /* The code below seemingly handles U = NULL, but in this case d = 0 */
1209 : pari_sp btop;
1210 554845 : GEN tmp = gmael(mu,kappa,j);
1211 554845 : if (absrsmall(tmp)) continue; /* size-reduced */
1212 :
1213 320494 : if (gc_needed(av,2))
1214 : {
1215 8 : if(DEBUGMEM>1) pari_warn(warnmem,"Babai[2], a=%ld, j=%ld", aa,j);
1216 8 : gc_lll(av,2,&B,&U);
1217 : }
1218 320494 : btop = avma; did_something = 1;
1219 : /* we consider separately the case |X| = 1 */
1220 320494 : if (absrsmall2(tmp))
1221 : {
1222 199723 : if (signe(tmp) > 0) { /* in this case, X = 1 */
1223 313534 : for (k=zeros+1; k<j; k++)
1224 213960 : affrr(subrr(gmael(mu,kappa,k), gmael(mu,j,k)), gmael(mu,kappa,k));
1225 99574 : set_avma(btop);
1226 1017248 : for (i=1; i<=n; i++)
1227 917674 : gmael(B,kappa,i) = subii(gmael(B,kappa,i), gmael(B,j,i));
1228 598869 : for (i=1; i<=d; i++)
1229 499295 : gmael(U,kappa,i) = subii(gmael(U,kappa,i), gmael(U,j,i));
1230 : } else { /* otherwise X = -1 */
1231 316599 : for (k=zeros+1; k<j; k++)
1232 216450 : affrr(addrr(gmael(mu,kappa,k), gmael(mu,j,k)), gmael(mu,kappa,k));
1233 100149 : set_avma(btop);
1234 1028081 : for (i=1; i<=n; i++)
1235 927932 : gmael(B,kappa,i) = addii(gmael(B,kappa,i), gmael(B,j,i));
1236 597321 : for (i=1; i<=d; i++)
1237 497172 : gmael(U,kappa,i) = addii(gmael(U,kappa,i),gmael(U,j,i));
1238 : }
1239 199723 : continue;
1240 : }
1241 : /* we have |X| >= 2 */
1242 120771 : if (expo(tmp) < BITS_IN_LONG)
1243 : {
1244 109269 : ulong xx = roundr_safe(tmp)[2]; /* X fits in an ulong */
1245 109269 : if (signe(tmp) > 0) /* = xx */
1246 : {
1247 121183 : for (k=zeros+1; k<j; k++)
1248 66012 : affrr(subrr(gmael(mu,kappa,k), mulur(xx, gmael(mu,j,k))),
1249 66012 : gmael(mu,kappa,k));
1250 55171 : set_avma(btop);
1251 362931 : for (i=1; i<=n; i++)
1252 307760 : gmael(B,kappa,i) = submuliu_inplace(gmael(B,kappa,i), gmael(B,j,i), xx);
1253 257943 : for (i=1; i<=d; i++)
1254 202772 : gmael(U,kappa,i) = submuliu_inplace(gmael(U,kappa,i), gmael(U,j,i), xx);
1255 : }
1256 : else /* = -xx */
1257 : {
1258 118363 : for (k=zeros+1; k<j; k++)
1259 64265 : affrr(addrr(gmael(mu,kappa,k), mulur(xx, gmael(mu,j,k))),
1260 64265 : gmael(mu,kappa,k));
1261 54098 : set_avma(btop);
1262 355923 : for (i=1; i<=n; i++)
1263 301825 : gmael(B,kappa,i) = addmuliu_inplace(gmael(B,kappa,i), gmael(B,j,i), xx);
1264 244797 : for (i=1; i<=d; i++)
1265 190699 : gmael(U,kappa,i) = addmuliu_inplace(gmael(U,kappa,i), gmael(U,j,i), xx);
1266 : }
1267 : }
1268 : else
1269 : {
1270 : long e;
1271 11502 : GEN X = truncexpo(tmp, bit, &e); /* tmp ~ X * 2^e */
1272 11502 : btop = avma;
1273 27779 : for (k=zeros+1; k<j; k++)
1274 : {
1275 16277 : GEN x = mulir(X, gmael(mu,j,k));
1276 16277 : if (e) shiftr_inplace(x, e);
1277 16277 : affrr(subrr(gmael(mu,kappa,k), x), gmael(mu,kappa,k));
1278 : }
1279 11502 : set_avma(btop);
1280 93948 : for (i=1; i<=n; i++)
1281 82446 : gmael(B,kappa,i) = submulshift(gmael(B,kappa,i), gmael(B,j,i), X, e);
1282 69442 : for (i=1; i<=d; i++)
1283 57940 : gmael(U,kappa,i) = submulshift(gmael(U,kappa,i), gmael(U,j,i), X, e);
1284 : }
1285 : }
1286 371427 : if (!go_on) break; /* Anything happened? */
1287 1160645 : for (i=1 ; i<=n; i++) affir(gmael(B,kappa,i), gmael(appB,kappa,i));
1288 152462 : setG_heuristic(appB, G, kappa, zeros+1, kappa-1);
1289 152462 : aa = zeros+1;
1290 : }
1291 218965 : if (did_something) setG2_heuristic(appB, G, kappa, kappa, maxG);
1292 218965 : affrr(gmael(G,kappa,kappa), gel(s,zeros+1));
1293 : /* the last s[kappa-1]=r[kappa][kappa] is computed only if kappa increases */
1294 218965 : av = avma;
1295 822601 : for (k=zeros+1; k<=kappa-2; k++)
1296 603636 : affrr(subrr(gel(s,k), mulrr(gmael(mu,kappa,k), gmael(r,kappa,k))),
1297 603636 : gel(s,k+1));
1298 218965 : *pB = B; *pU = U; return gc_bool(av, 0);
1299 : }
1300 :
1301 : static GEN
1302 15591 : ZC_to_RC(GEN x, long prec)
1303 102385 : { pari_APPLY_type(t_COL,itor(gel(x,i),prec)) }
1304 :
1305 : static GEN
1306 4111 : ZM_to_RM(GEN x, long prec)
1307 19702 : { pari_APPLY_same(ZC_to_RC(gel(x,i),prec)) }
1308 :
1309 : /* LLL-reduces (B,U) in place [apply base change transforms to B and U].
1310 : * Gram matrix made of t_REAL at precision prec2, performe GSO at prec.
1311 : * If (keepfirst), never swap with first vector.
1312 : * Return -1 on failure, else zeros = dim Kernel (>= 0) */
1313 : static long
1314 4111 : fplll_heuristic(GEN *pB, GEN *pU, double DELTA, double ETA, long keepfirst,
1315 : long prec, long prec2)
1316 : {
1317 : pari_sp av, av2;
1318 : long kappa, kappa2, d, i, j, zeros, kappamax, maxG;
1319 4111 : GEN mu, r, s, tmp, Gtmp, alpha, G, appB, B = *pB, U;
1320 4111 : GEN delta = dbltor(DELTA), eta = dbltor(ETA);
1321 4111 : long cnt = 0;
1322 :
1323 4111 : d = lg(B)-1;
1324 4111 : U = *pU; /* NULL if inplace */
1325 :
1326 4111 : G = cgetg(d+1, t_MAT);
1327 4111 : mu = cgetg(d+1, t_MAT);
1328 4111 : r = cgetg(d+1, t_MAT);
1329 4111 : s = cgetg(d+1, t_VEC);
1330 4111 : appB = ZM_to_RM(B, prec2);
1331 19702 : for (j = 1; j <= d; j++)
1332 : {
1333 15591 : GEN M = cgetg(d+1, t_COL), R = cgetg(d+1, t_COL), S = cgetg(d+1, t_COL);
1334 15591 : gel(mu,j)= M;
1335 15591 : gel(r,j) = R;
1336 15591 : gel(G,j) = S;
1337 15591 : gel(s,j) = cgetr(prec);
1338 94136 : for (i = 1; i <= d; i++)
1339 : {
1340 78545 : gel(R,i) = cgetr(prec);
1341 78545 : gel(M,i) = cgetr(prec);
1342 78545 : gel(S,i) = cgetr(prec2);
1343 : }
1344 : }
1345 4111 : Gtmp = cgetg(d+1, t_VEC);
1346 4111 : alpha = cgetg(d+1, t_VECSMALL);
1347 4111 : av = avma;
1348 :
1349 : /* Step2: Initializing the main loop */
1350 4111 : kappamax = 1;
1351 4111 : i = 1;
1352 4111 : maxG = d; /* later updated to kappamax */
1353 :
1354 : do {
1355 4114 : affrr(RgV_dotsquare(gel(appB,i)), gmael(G,i,i));
1356 4114 : } while (signe(gmael(G,i,i)) == 0 && (++i <=d));
1357 4111 : zeros = i-1; /* all vectors B[i] with i <= zeros are zero vectors */
1358 4111 : kappa = i;
1359 4111 : if (zeros < d) affrr(gmael(G,zeros+1,zeros+1), gmael(r,zeros+1,zeros+1));
1360 19699 : for (i=zeros+1; i<=d; i++) alpha[i]=1;
1361 :
1362 223076 : while (++kappa <= d)
1363 : {
1364 220292 : if (kappa > kappamax)
1365 : {
1366 9578 : if (DEBUGLEVEL>=4) err_printf("K%ld ",kappa);
1367 9578 : maxG = kappamax = kappa;
1368 9578 : setG_heuristic(appB, G, kappa, zeros+1, kappa);
1369 : }
1370 : /* Step3: Call to the Babai algorithm, mu,r,s updated in place */
1371 220292 : if (Babai_heuristic(av, kappa, &B,&U, mu,r,s, appB, G, alpha[kappa], zeros,
1372 1327 : maxG, eta, prec)) { *pB = B; *pU = U; return -1; }
1373 218965 : av2 = avma;
1374 437837 : if ((keepfirst && kappa == 2) ||
1375 218872 : cmprr(mulrr(gmael(r,kappa-1,kappa-1), delta), gel(s,kappa-1)) <= 0)
1376 : { /* Step4: Success of Lovasz's condition */
1377 125479 : alpha[kappa] = kappa;
1378 125479 : tmp = mulrr(gmael(mu,kappa,kappa-1), gmael(r,kappa,kappa-1));
1379 125479 : affrr(subrr(gel(s,kappa-1), tmp), gmael(r,kappa,kappa));
1380 125479 : set_avma(av2); continue;
1381 : }
1382 : /* Step5: Find the right insertion index kappa, kappa2 = initial kappa */
1383 93486 : if (DEBUGLEVEL>=4 && kappa==kappamax && signe(gel(s,kappa-1)))
1384 0 : if (++cnt > 20) { cnt = 0; err_printf("(%ld) ", expo(gel(s,1))); }
1385 93486 : kappa2 = kappa;
1386 : do {
1387 210739 : kappa--;
1388 210739 : if (kappa < zeros+2 + (keepfirst ? 1: 0)) break;
1389 190150 : tmp = mulrr(gmael(r,kappa-1,kappa-1), delta);
1390 190150 : } while (cmprr(gel(s,kappa-1), tmp) <= 0 );
1391 93486 : set_avma(av2);
1392 93486 : update_alpha(alpha, kappa, kappa2, kappamax);
1393 :
1394 : /* Step6: Update the mu's and r's */
1395 93486 : rotate(mu,kappa2,kappa);
1396 93486 : rotate(r,kappa2,kappa);
1397 93486 : affrr(gel(s,kappa), gmael(r,kappa,kappa));
1398 :
1399 : /* Step7: Update B, appB, U, G */
1400 93486 : rotate(B,kappa2,kappa);
1401 93486 : rotate(appB,kappa2,kappa);
1402 93486 : if (U) rotate(U,kappa2,kappa);
1403 93486 : rotateG(G,kappa2,kappa, maxG, Gtmp);
1404 :
1405 : /* Step8: Prepare the next loop iteration */
1406 93486 : if (kappa == zeros+1 && !signe(gmael(G,kappa,kappa)))
1407 : {
1408 0 : zeros++; kappa++;
1409 0 : affrr(RgV_dotsquare(gel(appB,kappa)), gmael(G,kappa,kappa));
1410 0 : affrr(gmael(G,kappa,kappa), gmael(r,kappa,kappa));
1411 : }
1412 : }
1413 2784 : *pB=B; *pU=U; return zeros;
1414 : }
1415 :
1416 : /************************* PROVED version (t_INT) ***********************/
1417 : /* dpe inspired by dpe.h by Patrick Pelissier, Paul Zimmermann
1418 : * https://gforge.inria.fr/projects/dpe/
1419 : */
1420 :
1421 : typedef struct
1422 : {
1423 : double d; /* significand */
1424 : long e; /* exponent */
1425 : } dpe_t;
1426 :
1427 : #define Dmael(x,i,j) (&((x)[i][j]))
1428 : #define Del(x,i) (&((x)[i]))
1429 :
1430 : static void
1431 651112 : dperotate(dpe_t **A, long k2, long k)
1432 : {
1433 : long i;
1434 651112 : dpe_t *B = A[k2];
1435 2308716 : for (i = k2; i > k; i--) A[i] = A[i-1];
1436 651112 : A[k] = B;
1437 651112 : }
1438 :
1439 : static void
1440 107976060 : dpe_normalize0(dpe_t *x)
1441 : {
1442 : int e;
1443 107976060 : x->d = frexp(x->d, &e);
1444 107976060 : x->e += e;
1445 107976060 : }
1446 :
1447 : static void
1448 47879156 : dpe_normalize(dpe_t *x)
1449 : {
1450 47879156 : if (x->d == 0.0)
1451 495722 : x->e = -LONG_MAX;
1452 : else
1453 47383434 : dpe_normalize0(x);
1454 47879201 : }
1455 :
1456 : static GEN
1457 93859 : dpetor(dpe_t *x)
1458 : {
1459 93859 : GEN r = dbltor(x->d);
1460 93859 : if (signe(r)==0) return r;
1461 93859 : setexpo(r, x->e-1);
1462 93859 : return r;
1463 : }
1464 :
1465 : static void
1466 25548568 : affdpe(dpe_t *y, dpe_t *x)
1467 : {
1468 25548568 : x->d = y->d;
1469 25548568 : x->e = y->e;
1470 25548568 : }
1471 :
1472 : static void
1473 20514747 : affidpe(GEN y, dpe_t *x)
1474 : {
1475 20514747 : pari_sp av = avma;
1476 20514747 : GEN r = itor(y, DEFAULTPREC);
1477 20514490 : x->e = expo(r)+1;
1478 20514490 : setexpo(r,-1);
1479 20514441 : x->d = rtodbl(r);
1480 20514384 : set_avma(av);
1481 20514339 : }
1482 :
1483 : static void
1484 3136894 : affdbldpe(double y, dpe_t *x)
1485 : {
1486 3136894 : x->d = (double)y;
1487 3136894 : x->e = 0;
1488 3136894 : dpe_normalize(x);
1489 3136906 : }
1490 :
1491 : static void
1492 56683627 : dpe_mulz(dpe_t *x, dpe_t *y, dpe_t *z)
1493 : {
1494 56683627 : z->d = x->d * y->d;
1495 56683627 : if (z->d == 0.0)
1496 8140134 : z->e = -LONG_MAX;
1497 : else
1498 : {
1499 48543493 : z->e = x->e + y->e;
1500 48543493 : dpe_normalize0(z);
1501 : }
1502 56683798 : }
1503 :
1504 : static void
1505 13949102 : dpe_divz(dpe_t *x, dpe_t *y, dpe_t *z)
1506 : {
1507 13949102 : z->d = x->d / y->d;
1508 13949102 : if (z->d == 0.0)
1509 1899610 : z->e = -LONG_MAX;
1510 : else
1511 : {
1512 12049492 : z->e = x->e - y->e;
1513 12049492 : dpe_normalize0(z);
1514 : }
1515 13949155 : }
1516 :
1517 : static void
1518 243661 : dpe_negz(dpe_t *y, dpe_t *x)
1519 : {
1520 243661 : x->d = - y->d;
1521 243661 : x->e = y->e;
1522 243661 : }
1523 :
1524 : static void
1525 1941343 : dpe_addz(dpe_t *y, dpe_t *z, dpe_t *x)
1526 : {
1527 1941343 : if (y->e > z->e + 53)
1528 111864 : affdpe(y, x);
1529 1829479 : else if (z->e > y->e + 53)
1530 41649 : affdpe(z, x);
1531 : else
1532 : {
1533 1787830 : long d = y->e - z->e;
1534 :
1535 1787830 : if (d >= 0)
1536 : {
1537 1343732 : x->d = y->d + ldexp(z->d, -d);
1538 1343732 : x->e = y->e;
1539 : }
1540 : else
1541 : {
1542 444098 : x->d = z->d + ldexp(y->d, d);
1543 444098 : x->e = z->e;
1544 : }
1545 1787830 : dpe_normalize(x);
1546 : }
1547 1941343 : }
1548 : static void
1549 53519721 : dpe_subz(dpe_t *y, dpe_t *z, dpe_t *x)
1550 : {
1551 53519721 : if (y->e > z->e + 53)
1552 11120511 : affdpe(y, x);
1553 42399210 : else if (z->e > y->e + 53)
1554 243661 : dpe_negz(z, x);
1555 : else
1556 : {
1557 42155549 : long d = y->e - z->e;
1558 :
1559 42155549 : if (d >= 0)
1560 : {
1561 39397528 : x->d = y->d - ldexp(z->d, -d);
1562 39397528 : x->e = y->e;
1563 : }
1564 : else
1565 : {
1566 2758021 : x->d = ldexp(y->d, d) - z->d;
1567 2758021 : x->e = z->e;
1568 : }
1569 42155549 : dpe_normalize(x);
1570 : }
1571 53519879 : }
1572 :
1573 : static void
1574 798796 : dpe_muluz(dpe_t *y, ulong t, dpe_t *x)
1575 : {
1576 798796 : x->d = y->d * (double)t;
1577 798796 : x->e = y->e;
1578 798796 : dpe_normalize(x);
1579 798796 : }
1580 :
1581 : static void
1582 342556 : dpe_addmuluz(dpe_t *y, dpe_t *z, ulong t, dpe_t *x)
1583 : {
1584 : dpe_t tmp;
1585 342556 : dpe_muluz(z, t, &tmp);
1586 342556 : dpe_addz(y, &tmp, x);
1587 342556 : }
1588 :
1589 : static void
1590 411756 : dpe_submuluz(dpe_t *y, dpe_t *z, ulong t, dpe_t *x)
1591 : {
1592 : dpe_t tmp;
1593 411756 : dpe_muluz(z, t, &tmp);
1594 411756 : dpe_subz(y, &tmp, x);
1595 411756 : }
1596 :
1597 : static void
1598 51490318 : dpe_submulz(dpe_t *y, dpe_t *z, dpe_t *t, dpe_t *x)
1599 : {
1600 : dpe_t tmp;
1601 51490318 : dpe_mulz(z, t, &tmp);
1602 51490306 : dpe_subz(y, &tmp, x);
1603 51490321 : }
1604 :
1605 : static int
1606 5193449 : dpe_cmp(dpe_t *x, dpe_t *y)
1607 : {
1608 5193449 : int sx = x->d < 0. ? -1: x->d > 0.;
1609 5193449 : int sy = y->d < 0. ? -1: y->d > 0.;
1610 5193449 : int d = sx - sy;
1611 :
1612 5193449 : if (d != 0)
1613 141619 : return d;
1614 5051830 : else if (x->e > y->e)
1615 480809 : return (sx > 0) ? 1 : -1;
1616 4571021 : else if (y->e > x->e)
1617 2501412 : return (sx > 0) ? -1 : 1;
1618 : else
1619 2069609 : return (x->d < y->d) ? -1 : (x->d > y->d);
1620 : }
1621 :
1622 : static int
1623 14389950 : dpe_abscmp(dpe_t *x, dpe_t *y)
1624 : {
1625 14389950 : if (x->e > y->e)
1626 271005 : return 1;
1627 14118945 : else if (y->e > x->e)
1628 13269004 : return -1;
1629 : else
1630 849941 : return (fabs(x->d) < fabs(y->d)) ? -1 : (fabs(x->d) > fabs(y->d));
1631 : }
1632 :
1633 : static int
1634 1387087 : dpe_abssmall(dpe_t *x)
1635 : {
1636 1387087 : return (x->e <= 0) || (x->e == 1 && fabs(x->d) <= .75);
1637 : }
1638 :
1639 : static int
1640 5193447 : dpe_cmpmul(dpe_t *x, dpe_t *y, dpe_t *z)
1641 : {
1642 : dpe_t t;
1643 5193447 : dpe_mulz(x,y,&t);
1644 5193449 : return dpe_cmp(&t, z);
1645 : }
1646 :
1647 : static dpe_t *
1648 13042625 : cget_dpevec(long d)
1649 13042625 : { return (dpe_t*) stack_malloc_align(d*sizeof(dpe_t), sizeof(dpe_t)); }
1650 :
1651 : static dpe_t **
1652 3136898 : cget_dpemat(long d) { return (dpe_t **) cgetg(d, t_VECSMALL); }
1653 :
1654 : static GEN
1655 20228 : dpeM_diagonal_shallow(dpe_t **m, long d)
1656 : {
1657 : long i;
1658 20228 : GEN y = cgetg(d+1,t_VEC);
1659 114087 : for (i=1; i<=d; i++) gel(y, i) = dpetor(Dmael(m,i,i));
1660 20228 : return y;
1661 : }
1662 :
1663 : static void
1664 1387088 : affii_or_copy_gc(pari_sp av, GEN x, GEN *y)
1665 : {
1666 1387088 : long l = lg(*y);
1667 1387088 : if (lgefint(x) <= l && isonstack(*y))
1668 : {
1669 1387074 : affii(x,*y);
1670 1387075 : set_avma(av);
1671 : }
1672 : else
1673 11 : *y = gerepileuptoint(av, x);
1674 1387085 : }
1675 :
1676 : /* *x -= u*y */
1677 : INLINE void
1678 5918614 : submulziu(GEN *x, GEN y, ulong u)
1679 : {
1680 : pari_sp av;
1681 5918614 : long ly = lgefint(y);
1682 5918614 : if (ly == 2) return;
1683 3251170 : av = avma;
1684 3251170 : (void)new_chunk(3+ly+lgefint(*x)); /* HACK */
1685 3251208 : y = mului(u,y);
1686 3251210 : set_avma(av); subzi(x, y);
1687 : }
1688 :
1689 : /* *x += u*y */
1690 : INLINE void
1691 4576046 : addmulziu(GEN *x, GEN y, ulong u)
1692 : {
1693 : pari_sp av;
1694 4576046 : long ly = lgefint(y);
1695 4576046 : if (ly == 2) return;
1696 2754900 : av = avma;
1697 2754900 : (void)new_chunk(3+ly+lgefint(*x)); /* HACK */
1698 2754912 : y = mului(u,y);
1699 2754912 : set_avma(av); addzi(x, y);
1700 : }
1701 :
1702 : /************************** PROVED version (dpe) *************************/
1703 :
1704 : /* Babai's Nearest Plane algorithm (iterative).
1705 : * Size-reduces b_kappa using mu_{i,j} and r_{i,j} for j<=i <kappa
1706 : * Update B[,kappa]; compute mu_{kappa,j}, r_{kappa,j} for j<=kappa and s[kappa]
1707 : * mu, r, s updated in place (affrr). Return 1 on failure, else 0. */
1708 : static int
1709 4585396 : Babai_dpe(pari_sp av, long kappa, GEN *pG, GEN *pB, GEN *pU, dpe_t **mu, dpe_t **r, dpe_t *s,
1710 : long a, long zeros, long maxG, dpe_t *eta)
1711 : {
1712 4585396 : GEN G = *pG, B = *pB, U = *pU, ztmp;
1713 4585396 : long k, d, n, aa = a > zeros? a: zeros+1;
1714 4585396 : long emaxmu = EX0, emax2mu = EX0;
1715 : /* N.B: we set d = 0 (resp. n = 0) to avoid updating U (resp. B) */
1716 4585396 : d = U? lg(U)-1: 0;
1717 4585396 : n = B? nbrows(B): 0;
1718 521891 : for (;;) {
1719 5107307 : int go_on = 0;
1720 5107307 : long i, j, emax3mu = emax2mu;
1721 :
1722 5107307 : if (gc_needed(av,2))
1723 : {
1724 0 : if(DEBUGMEM>1) pari_warn(warnmem,"Babai[1], a=%ld", aa);
1725 0 : gc_lll(av,3,&G,&B,&U);
1726 : }
1727 : /* Step2: compute the GSO for stage kappa */
1728 5107294 : emax2mu = emaxmu; emaxmu = EX0;
1729 19056461 : for (j=aa; j<kappa; j++)
1730 : {
1731 : dpe_t g;
1732 13949151 : affidpe(gmael(G,kappa,j), &g);
1733 52301635 : for (k = zeros+1; k < j; k++)
1734 38352607 : dpe_submulz(&g, Dmael(mu,j,k), Dmael(r,kappa,k), &g);
1735 13949028 : affdpe(&g, Dmael(r,kappa,j));
1736 13949100 : dpe_divz(Dmael(r,kappa,j), Dmael(r,j,j), Dmael(mu,kappa,j));
1737 13949109 : emaxmu = maxss(emaxmu, Dmael(mu,kappa,j)->e);
1738 : }
1739 5107310 : if (emax3mu != EX0 && emax3mu <= emax2mu + 5) /* precision too low */
1740 0 : { *pG = G; *pB = B; *pU = U; return 1; }
1741 :
1742 18975377 : for (j=kappa-1; j>zeros; j--)
1743 14389948 : if (dpe_abscmp(Dmael(mu,kappa,j), eta) > 0) { go_on = 1; break; }
1744 :
1745 : /* Step3--5: compute the X_j's */
1746 5107313 : if (go_on)
1747 3029383 : for (j=kappa-1; j>zeros; j--)
1748 : {
1749 : pari_sp btop;
1750 2507495 : dpe_t *tmp = Dmael(mu,kappa,j);
1751 2507495 : if (tmp->e < 0) continue; /* (essentially) size-reduced */
1752 :
1753 1387086 : if (gc_needed(av,2))
1754 : {
1755 0 : if(DEBUGMEM>1) pari_warn(warnmem,"Babai[2], a=%ld, j=%ld", aa,j);
1756 0 : gc_lll(av,3,&G,&B,&U);
1757 : }
1758 : /* we consider separately the case |X| = 1 */
1759 1387086 : if (dpe_abssmall(tmp))
1760 : {
1761 920134 : if (tmp->d > 0) { /* in this case, X = 1 */
1762 2057073 : for (k=zeros+1; k<j; k++)
1763 1595753 : dpe_subz(Dmael(mu,kappa,k), Dmael(mu,j,k), Dmael(mu,kappa,k));
1764 3016693 : for (i=1; i<=n; i++)
1765 2555373 : subzi(&gmael(B,kappa,i), gmael(B,j,i));
1766 6969190 : for (i=1; i<=d; i++)
1767 6507875 : subzi(&gmael(U,kappa,i), gmael(U,j,i));
1768 461315 : btop = avma;
1769 461315 : ztmp = subii(gmael(G,j,j), shifti(gmael(G,kappa,j), 1));
1770 461316 : ztmp = addii(gmael(G,kappa,kappa), ztmp);
1771 461315 : affii_or_copy_gc(btop, ztmp, &gmael(G,kappa,kappa));
1772 2858586 : for (i=1; i<=j; i++)
1773 2397269 : subzi(&gmael(G,kappa,i), gmael(G,j,i));
1774 2191132 : for (i=j+1; i<kappa; i++)
1775 1729814 : subzi(&gmael(G,kappa,i), gmael(G,i,j));
1776 2360408 : for (i=kappa+1; i<=maxG; i++)
1777 1899090 : subzi(&gmael(G,i,kappa), gmael(G,i,j));
1778 : } else { /* otherwise X = -1 */
1779 2035060 : for (k=zeros+1; k<j; k++)
1780 1576242 : dpe_addz(Dmael(mu,kappa,k), Dmael(mu,j,k), Dmael(mu,kappa,k));
1781 3011610 : for (i=1; i<=n; i++)
1782 2552792 : addzi(&gmael(B,kappa,i),gmael(B,j,i));
1783 6841902 : for (i=1; i<=d; i++)
1784 6383084 : addzi(&gmael(U,kappa,i),gmael(U,j,i));
1785 458818 : btop = avma;
1786 458818 : ztmp = addii(gmael(G,j,j), shifti(gmael(G,kappa,j), 1));
1787 458817 : ztmp = addii(gmael(G,kappa,kappa), ztmp);
1788 458819 : affii_or_copy_gc(btop, ztmp, &gmael(G,kappa,kappa));
1789 2769267 : for (i=1; i<=j; i++)
1790 2310450 : addzi(&gmael(G,kappa,i), gmael(G,j,i));
1791 2194819 : for (i=j+1; i<kappa; i++)
1792 1736002 : addzi(&gmael(G,kappa,i), gmael(G,i,j));
1793 2313048 : for (i=kappa+1; i<=maxG; i++)
1794 1854233 : addzi(&gmael(G,i,kappa), gmael(G,i,j));
1795 : }
1796 920133 : continue;
1797 : }
1798 : /* we have |X| >= 2 */
1799 466953 : if (tmp->e < BITS_IN_LONG-1)
1800 : {
1801 447959 : if (tmp->d > 0)
1802 : {
1803 247057 : ulong xx = (ulong) pari_rint(ldexp(tmp->d, tmp->e)); /* X fits in an ulong */
1804 658813 : for (k=zeros+1; k<j; k++)
1805 411756 : dpe_submuluz(Dmael(mu,kappa,k), Dmael(mu,j,k), xx, Dmael(mu,kappa,k));
1806 722104 : for (i=1; i<=n; i++)
1807 475047 : submulziu(&gmael(B,kappa,i), gmael(B,j,i), xx);
1808 3106445 : for (i=1; i<=d; i++)
1809 2859382 : submulziu(&gmael(U,kappa,i), gmael(U,j,i), xx);
1810 247063 : btop = avma;
1811 247063 : ztmp = submuliu2n(mulii(gmael(G,j,j), sqru(xx)), gmael(G,kappa,j), xx, 1);
1812 247058 : ztmp = addii(gmael(G,kappa,kappa), ztmp);
1813 247056 : affii_or_copy_gc(btop, ztmp, &gmael(G,kappa,kappa));
1814 1313365 : for (i=1; i<=j; i++)
1815 1066309 : submulziu(&gmael(G,kappa,i), gmael(G,j,i), xx);
1816 807535 : for (i=j+1; i<kappa; i++)
1817 560482 : submulziu(&gmael(G,kappa,i), gmael(G,i,j), xx);
1818 1204506 : for (i=kappa+1; i<=maxG; i++)
1819 957453 : submulziu(&gmael(G,i,kappa), gmael(G,i,j), xx);
1820 : }
1821 : else
1822 : {
1823 200902 : ulong xx = (ulong) pari_rint(ldexp(-tmp->d, tmp->e)); /* X fits in an ulong */
1824 543458 : for (k=zeros+1; k<j; k++)
1825 342556 : dpe_addmuluz(Dmael(mu,kappa,k), Dmael(mu,j,k), xx, Dmael(mu,kappa,k));
1826 687338 : for (i=1; i<=n; i++)
1827 486436 : addmulziu(&gmael(B,kappa,i), gmael(B,j,i), xx);
1828 2358660 : for (i=1; i<=d; i++)
1829 2157765 : addmulziu(&gmael(U,kappa,i), gmael(U,j,i), xx);
1830 200895 : btop = avma;
1831 200895 : ztmp = addmuliu2n(mulii(gmael(G,j,j), sqru(xx)), gmael(G,kappa,j), xx, 1);
1832 200902 : ztmp = addii(gmael(G,kappa,kappa), ztmp);
1833 200902 : affii_or_copy_gc(btop, ztmp, &gmael(G,kappa,kappa));
1834 989752 : for (i=1; i<=j; i++)
1835 788850 : addmulziu(&gmael(G,kappa,i), gmael(G,j,i), xx);
1836 661917 : for (i=j+1; i<kappa; i++)
1837 461015 : addmulziu(&gmael(G,kappa,i), gmael(G,i,j), xx);
1838 882912 : for (i=kappa+1; i<=maxG; i++)
1839 682010 : addmulziu(&gmael(G,i,kappa), gmael(G,i,j), xx);
1840 : }
1841 : }
1842 : else
1843 : {
1844 18994 : long e = tmp->e - BITS_IN_LONG + 1;
1845 18994 : if (tmp->d > 0)
1846 : {
1847 9396 : ulong xx = (ulong) pari_rint(ldexp(tmp->d, BITS_IN_LONG - 1));
1848 31333 : for (k=zeros+1; k<j; k++)
1849 : {
1850 : dpe_t x;
1851 21937 : dpe_muluz(Dmael(mu,j,k), xx, &x);
1852 21937 : x.e += e;
1853 21937 : dpe_subz(Dmael(mu,kappa,k), &x, Dmael(mu,kappa,k));
1854 : }
1855 124323 : for (i=1; i<=n; i++)
1856 114927 : submulzu2n(&gmael(B,kappa,i), gmael(B,j,i), xx, e);
1857 87036 : for (i=1; i<=d; i++)
1858 77640 : submulzu2n(&gmael(U,kappa,i), gmael(U,j,i), xx, e);
1859 9396 : btop = avma;
1860 9396 : ztmp = submuliu2n(mulshift(gmael(G,j,j), sqru(xx), 2*e),
1861 9396 : gmael(G,kappa,j), xx, e+1);
1862 9396 : ztmp = addii(gmael(G,kappa,kappa), ztmp);
1863 9396 : affii_or_copy_gc(btop, ztmp, &gmael(G,kappa,kappa));
1864 40927 : for (i=1; i<=j; i++)
1865 31531 : submulzu2n(&gmael(G,kappa,i), gmael(G,j,i), xx, e);
1866 47343 : for ( ; i<kappa; i++)
1867 37947 : submulzu2n(&gmael(G,kappa,i), gmael(G,i,j), xx, e);
1868 10308 : for (i=kappa+1; i<=maxG; i++)
1869 912 : submulzu2n(&gmael(G,i,kappa), gmael(G,i,j), xx, e);
1870 : } else
1871 : {
1872 9598 : ulong xx = (ulong) pari_rint(ldexp(-tmp->d, BITS_IN_LONG - 1));
1873 32153 : for (k=zeros+1; k<j; k++)
1874 : {
1875 : dpe_t x;
1876 22547 : dpe_muluz(Dmael(mu,j,k), xx, &x);
1877 22547 : x.e += e;
1878 22547 : dpe_addz(Dmael(mu,kappa,k), &x, Dmael(mu,kappa,k));
1879 : }
1880 128490 : for (i=1; i<=n; i++)
1881 118884 : addmulzu2n(&gmael(B,kappa,i), gmael(B,j,i), xx, e);
1882 89277 : for (i=1; i<=d; i++)
1883 79671 : addmulzu2n(&gmael(U,kappa,i), gmael(U,j,i), xx, e);
1884 9606 : btop = avma;
1885 9606 : ztmp = addmuliu2n(mulshift(gmael(G,j,j), sqru(xx), 2*e),
1886 9606 : gmael(G,kappa,j), xx, e+1);
1887 9606 : ztmp = addii(gmael(G,kappa,kappa), ztmp);
1888 9606 : affii_or_copy_gc(btop, ztmp, &gmael(G,kappa,kappa));
1889 41957 : for (i=1; i<=j; i++)
1890 32351 : addmulzu2n(&gmael(G,kappa,i), gmael(G,j,i), xx, e);
1891 48921 : for ( ; i<kappa; i++)
1892 39315 : addmulzu2n(&gmael(G,kappa,i), gmael(G,i,j), xx, e);
1893 10353 : for (i=kappa+1; i<=maxG; i++)
1894 747 : addmulzu2n(&gmael(G,i,kappa), gmael(G,i,j), xx, e);
1895 : }
1896 : }
1897 : }
1898 5107317 : if (!go_on) break; /* Anything happened? */
1899 521891 : aa = zeros+1;
1900 : }
1901 :
1902 4585426 : affidpe(gmael(G,kappa,kappa), Del(s,zeros+1));
1903 : /* the last s[kappa-1]=r[kappa][kappa] is computed only if kappa increases */
1904 13463455 : for (k=zeros+1; k<=kappa-2; k++)
1905 8878050 : dpe_submulz(Del(s,k), Dmael(mu,kappa,k), Dmael(r,kappa,k), Del(s,k+1));
1906 4585405 : *pG = G; *pB = B; *pU = U; return 0;
1907 : }
1908 :
1909 : /* G integral Gram matrix, LLL-reduces (G,B,U) in place [apply base change
1910 : * transforms to B and U]. If (keepfirst), never swap with first vector.
1911 : * If G = NULL, we compute the Gram matrix incrementally.
1912 : * Return -1 on failure, else zeros = dim Kernel (>= 0) */
1913 : static long
1914 1568454 : fplll_dpe(GEN *pG, GEN *pB, GEN *pU, GEN *pr, double DELTA, double ETA,
1915 : long keepfirst)
1916 : {
1917 : pari_sp av;
1918 1568454 : GEN Gtmp, alpha, G = *pG, B = *pB, U = *pU;
1919 1568454 : long d, maxG, kappa, kappa2, i, j, zeros, kappamax, incgram = !G, cnt = 0;
1920 : dpe_t delta, eta, **mu, **r, *s;
1921 1568454 : affdbldpe(DELTA,&delta);
1922 1568453 : affdbldpe(ETA,&eta);
1923 :
1924 1568454 : if (incgram)
1925 : { /* incremental Gram matrix */
1926 1508221 : maxG = 2; d = lg(B)-1;
1927 1508221 : G = zeromatcopy(d, d);
1928 : }
1929 : else
1930 60233 : maxG = d = lg(G)-1;
1931 :
1932 1568454 : mu = cget_dpemat(d+1);
1933 1568451 : r = cget_dpemat(d+1);
1934 1568442 : s = cget_dpevec(d+1);
1935 7305566 : for (j = 1; j <= d; j++)
1936 : {
1937 5737117 : mu[j]= cget_dpevec(d+1);
1938 5737115 : r[j] = cget_dpevec(d+1);
1939 : }
1940 1568449 : Gtmp = cgetg(d+1, t_VEC);
1941 1568447 : alpha = cgetg(d+1, t_VECSMALL);
1942 1568444 : av = avma;
1943 :
1944 : /* Step2: Initializing the main loop */
1945 1568444 : kappamax = 1;
1946 1568444 : i = 1;
1947 : do {
1948 1945328 : if (incgram) gmael(G,i,i) = ZV_dotsquare(gel(B,i));
1949 1945299 : affidpe(gmael(G,i,i), Dmael(r,i,i));
1950 1945303 : } while (!signe(gmael(G,i,i)) && ++i <= d);
1951 1568419 : zeros = i-1; /* all basis vectors b_i with i <= zeros are zero vectors */
1952 1568419 : kappa = i;
1953 6928617 : for (i=zeros+1; i<=d; i++) alpha[i]=1;
1954 :
1955 6153873 : while (++kappa <= d)
1956 : {
1957 4585426 : if (kappa > kappamax)
1958 : {
1959 3791783 : if (DEBUGLEVEL>=4) err_printf("K%ld ",kappa);
1960 3791783 : kappamax = kappa;
1961 3791783 : if (incgram)
1962 : {
1963 15907713 : for (i=zeros+1; i<=kappa; i++)
1964 12315451 : gmael(G,kappa,i) = ZV_dotproduct(gel(B,kappa), gel(B,i));
1965 3592262 : maxG = kappamax;
1966 : }
1967 : }
1968 : /* Step3: Call to the Babai algorithm, mu,r,s updated in place */
1969 4585086 : if (Babai_dpe(av, kappa, &G,&B,&U, mu,r,s, alpha[kappa], zeros, maxG, &eta))
1970 0 : { *pG = incgram? NULL: G; *pB = B; *pU = U; return -1; }
1971 9071705 : if ((keepfirst && kappa == 2) ||
1972 4486269 : dpe_cmpmul(Dmael(r,kappa-1,kappa-1), &delta, Del(s,kappa-1)) <= 0)
1973 : { /* Step4: Success of Lovasz's condition */
1974 4259880 : alpha[kappa] = kappa;
1975 4259880 : dpe_submulz(Del(s,kappa-1), Dmael(mu,kappa,kappa-1), Dmael(r,kappa,kappa-1), Dmael(r,kappa,kappa));
1976 4259875 : continue;
1977 : }
1978 : /* Step5: Find the right insertion index kappa, kappa2 = initial kappa */
1979 325556 : if (DEBUGLEVEL>=4 && kappa==kappamax && Del(s,kappa-1)->d)
1980 0 : if (++cnt > 20) { cnt = 0; err_printf("(%ld) ", Del(s,1)->e-1); }
1981 325556 : kappa2 = kappa;
1982 : do {
1983 828802 : kappa--;
1984 828802 : if (kappa < zeros+2 + (keepfirst ? 1: 0)) break;
1985 707168 : } while (dpe_cmpmul(Dmael(r,kappa-1,kappa-1), &delta, Del(s,kappa-1)) >= 0);
1986 325556 : update_alpha(alpha, kappa, kappa2, kappamax);
1987 :
1988 : /* Step6: Update the mu's and r's */
1989 325556 : dperotate(mu, kappa2, kappa);
1990 325556 : dperotate(r, kappa2, kappa);
1991 325556 : affdpe(Del(s,kappa), Dmael(r,kappa,kappa));
1992 :
1993 : /* Step7: Update G, B, U */
1994 325556 : if (U) rotate(U, kappa2, kappa);
1995 325556 : if (B) rotate(B, kappa2, kappa);
1996 325556 : rotateG(G,kappa2,kappa, maxG, Gtmp);
1997 :
1998 : /* Step8: Prepare the next loop iteration */
1999 325556 : if (kappa == zeros+1 && !signe(gmael(G,kappa,kappa)))
2000 : {
2001 35161 : zeros++; kappa++;
2002 35161 : affidpe(gmael(G,kappa,kappa), Dmael(r,kappa,kappa));
2003 : }
2004 : }
2005 1568447 : if (pr) *pr = dpeM_diagonal_shallow(r,d);
2006 1568447 : *pG = G; *pB = B; *pU = U; return zeros; /* success */
2007 : }
2008 :
2009 :
2010 : /************************** PROVED version (t_INT) *************************/
2011 :
2012 : /* Babai's Nearest Plane algorithm (iterative).
2013 : * Size-reduces b_kappa using mu_{i,j} and r_{i,j} for j<=i <kappa
2014 : * Update B[,kappa]; compute mu_{kappa,j}, r_{kappa,j} for j<=kappa and s[kappa]
2015 : * mu, r, s updated in place (affrr). Return 1 on failure, else 0. */
2016 : static int
2017 0 : Babai(pari_sp av, long kappa, GEN *pG, GEN *pB, GEN *pU, GEN mu, GEN r, GEN s,
2018 : long a, long zeros, long maxG, GEN eta, long prec)
2019 : {
2020 0 : GEN G = *pG, B = *pB, U = *pU, ztmp;
2021 0 : long k, aa = a > zeros? a: zeros+1;
2022 0 : const long n = B? nbrows(B): 0, d = U ? lg(U)-1: 0, bit = prec2nbits(prec);
2023 0 : long emaxmu = EX0, emax2mu = EX0;
2024 : /* N.B: we set d = 0 (resp. n = 0) to avoid updating U (resp. B) */
2025 :
2026 0 : for (;;) {
2027 0 : int go_on = 0;
2028 0 : long i, j, emax3mu = emax2mu;
2029 :
2030 0 : if (gc_needed(av,2))
2031 : {
2032 0 : if(DEBUGMEM>1) pari_warn(warnmem,"Babai[1], a=%ld", aa);
2033 0 : gc_lll(av,3,&G,&B,&U);
2034 : }
2035 : /* Step2: compute the GSO for stage kappa */
2036 0 : emax2mu = emaxmu; emaxmu = EX0;
2037 0 : for (j=aa; j<kappa; j++)
2038 : {
2039 0 : pari_sp btop = avma;
2040 0 : GEN g = gmael(G,kappa,j);
2041 0 : for (k = zeros+1; k < j; k++)
2042 0 : g = mpsub(g, mulrr(gmael(mu,j,k), gmael(r,kappa,k)));
2043 0 : mpaff(g, gmael(r,kappa,j));
2044 0 : affrr(divrr(gmael(r,kappa,j), gmael(r,j,j)), gmael(mu,kappa,j));
2045 0 : emaxmu = maxss(emaxmu, expo(gmael(mu,kappa,j)));
2046 0 : set_avma(btop);
2047 : }
2048 0 : if (emax3mu != EX0 && emax3mu <= emax2mu + 5) /* precision too low */
2049 0 : { *pG = G; *pB = B; *pU = U; return 1; }
2050 :
2051 0 : for (j=kappa-1; j>zeros; j--)
2052 0 : if (abscmprr(gmael(mu,kappa,j), eta) > 0) { go_on = 1; break; }
2053 :
2054 : /* Step3--5: compute the X_j's */
2055 0 : if (go_on)
2056 0 : for (j=kappa-1; j>zeros; j--)
2057 : {
2058 : pari_sp btop;
2059 0 : GEN tmp = gmael(mu,kappa,j);
2060 0 : if (absrsmall(tmp)) continue; /* size-reduced */
2061 :
2062 0 : if (gc_needed(av,2))
2063 : {
2064 0 : if(DEBUGMEM>1) pari_warn(warnmem,"Babai[2], a=%ld, j=%ld", aa,j);
2065 0 : gc_lll(av,3,&G,&B,&U);
2066 : }
2067 0 : btop = avma;
2068 : /* we consider separately the case |X| = 1 */
2069 0 : if (absrsmall2(tmp))
2070 : {
2071 0 : if (signe(tmp) > 0) { /* in this case, X = 1 */
2072 0 : for (k=zeros+1; k<j; k++)
2073 0 : affrr(subrr(gmael(mu,kappa,k), gmael(mu,j,k)), gmael(mu,kappa,k));
2074 0 : set_avma(btop);
2075 0 : for (i=1; i<=n; i++)
2076 0 : gmael(B,kappa,i) = subii(gmael(B,kappa,i), gmael(B,j,i));
2077 0 : for (i=1; i<=d; i++)
2078 0 : gmael(U,kappa,i) = subii(gmael(U,kappa,i), gmael(U,j,i));
2079 0 : btop = avma;
2080 0 : ztmp = subii(gmael(G,j,j), shifti(gmael(G,kappa,j), 1));
2081 0 : ztmp = addii(gmael(G,kappa,kappa), ztmp);
2082 0 : gmael(G,kappa,kappa) = gerepileuptoint(btop, ztmp);
2083 0 : for (i=1; i<=j; i++)
2084 0 : gmael(G,kappa,i) = subii(gmael(G,kappa,i), gmael(G,j,i));
2085 0 : for (i=j+1; i<kappa; i++)
2086 0 : gmael(G,kappa,i) = subii(gmael(G,kappa,i), gmael(G,i,j));
2087 0 : for (i=kappa+1; i<=maxG; i++)
2088 0 : gmael(G,i,kappa) = subii(gmael(G,i,kappa), gmael(G,i,j));
2089 : } else { /* otherwise X = -1 */
2090 0 : for (k=zeros+1; k<j; k++)
2091 0 : affrr(addrr(gmael(mu,kappa,k), gmael(mu,j,k)), gmael(mu,kappa,k));
2092 0 : set_avma(btop);
2093 0 : for (i=1; i<=n; i++)
2094 0 : gmael(B,kappa,i) = addii(gmael(B,kappa,i),gmael(B,j,i));
2095 0 : for (i=1; i<=d; i++)
2096 0 : gmael(U,kappa,i) = addii(gmael(U,kappa,i),gmael(U,j,i));
2097 0 : btop = avma;
2098 0 : ztmp = addii(gmael(G,j,j), shifti(gmael(G,kappa,j), 1));
2099 0 : ztmp = addii(gmael(G,kappa,kappa), ztmp);
2100 0 : gmael(G,kappa,kappa) = gerepileuptoint(btop, ztmp);
2101 0 : for (i=1; i<=j; i++)
2102 0 : gmael(G,kappa,i) = addii(gmael(G,kappa,i), gmael(G,j,i));
2103 0 : for (i=j+1; i<kappa; i++)
2104 0 : gmael(G,kappa,i) = addii(gmael(G,kappa,i), gmael(G,i,j));
2105 0 : for (i=kappa+1; i<=maxG; i++)
2106 0 : gmael(G,i,kappa) = addii(gmael(G,i,kappa), gmael(G,i,j));
2107 : }
2108 0 : continue;
2109 : }
2110 : /* we have |X| >= 2 */
2111 0 : if (expo(tmp) < BITS_IN_LONG)
2112 : {
2113 0 : ulong xx = roundr_safe(tmp)[2]; /* X fits in an ulong */
2114 0 : if (signe(tmp) > 0) /* = xx */
2115 : {
2116 0 : for (k=zeros+1; k<j; k++)
2117 0 : affrr(subrr(gmael(mu,kappa,k), mulur(xx, gmael(mu,j,k))),
2118 0 : gmael(mu,kappa,k));
2119 0 : set_avma(btop);
2120 0 : for (i=1; i<=n; i++)
2121 0 : gmael(B,kappa,i) = submuliu_inplace(gmael(B,kappa,i), gmael(B,j,i), xx);
2122 0 : for (i=1; i<=d; i++)
2123 0 : gmael(U,kappa,i) = submuliu_inplace(gmael(U,kappa,i), gmael(U,j,i), xx);
2124 0 : btop = avma;
2125 0 : ztmp = submuliu2n(mulii(gmael(G,j,j), sqru(xx)), gmael(G,kappa,j), xx, 1);
2126 0 : ztmp = addii(gmael(G,kappa,kappa), ztmp);
2127 0 : gmael(G,kappa,kappa) = gerepileuptoint(btop, ztmp);
2128 0 : for (i=1; i<=j; i++)
2129 0 : gmael(G,kappa,i) = submuliu_inplace(gmael(G,kappa,i), gmael(G,j,i), xx);
2130 0 : for (i=j+1; i<kappa; i++)
2131 0 : gmael(G,kappa,i) = submuliu_inplace(gmael(G,kappa,i), gmael(G,i,j), xx);
2132 0 : for (i=kappa+1; i<=maxG; i++)
2133 0 : gmael(G,i,kappa) = submuliu_inplace(gmael(G,i,kappa), gmael(G,i,j), xx);
2134 : }
2135 : else /* = -xx */
2136 : {
2137 0 : for (k=zeros+1; k<j; k++)
2138 0 : affrr(addrr(gmael(mu,kappa,k), mulur(xx, gmael(mu,j,k))),
2139 0 : gmael(mu,kappa,k));
2140 0 : set_avma(btop);
2141 0 : for (i=1; i<=n; i++)
2142 0 : gmael(B,kappa,i) = addmuliu_inplace(gmael(B,kappa,i), gmael(B,j,i), xx);
2143 0 : for (i=1; i<=d; i++)
2144 0 : gmael(U,kappa,i) = addmuliu_inplace(gmael(U,kappa,i), gmael(U,j,i), xx);
2145 0 : btop = avma;
2146 0 : ztmp = addmuliu2n(mulii(gmael(G,j,j), sqru(xx)), gmael(G,kappa,j), xx, 1);
2147 0 : ztmp = addii(gmael(G,kappa,kappa), ztmp);
2148 0 : gmael(G,kappa,kappa) = gerepileuptoint(btop, ztmp);
2149 0 : for (i=1; i<=j; i++)
2150 0 : gmael(G,kappa,i) = addmuliu_inplace(gmael(G,kappa,i), gmael(G,j,i), xx);
2151 0 : for (i=j+1; i<kappa; i++)
2152 0 : gmael(G,kappa,i) = addmuliu_inplace(gmael(G,kappa,i), gmael(G,i,j), xx);
2153 0 : for (i=kappa+1; i<=maxG; i++)
2154 0 : gmael(G,i,kappa) = addmuliu_inplace(gmael(G,i,kappa), gmael(G,i,j), xx);
2155 : }
2156 : }
2157 : else
2158 : {
2159 : long e;
2160 0 : GEN X = truncexpo(tmp, bit, &e); /* tmp ~ X * 2^e */
2161 0 : btop = avma;
2162 0 : for (k=zeros+1; k<j; k++)
2163 : {
2164 0 : GEN x = mulir(X, gmael(mu,j,k));
2165 0 : if (e) shiftr_inplace(x, e);
2166 0 : affrr(subrr(gmael(mu,kappa,k), x), gmael(mu,kappa,k));
2167 : }
2168 0 : set_avma(btop);
2169 0 : for (i=1; i<=n; i++)
2170 0 : gmael(B,kappa,i) = submulshift(gmael(B,kappa,i), gmael(B,j,i), X, e);
2171 0 : for (i=1; i<=d; i++)
2172 0 : gmael(U,kappa,i) = submulshift(gmael(U,kappa,i), gmael(U,j,i), X, e);
2173 0 : btop = avma;
2174 0 : ztmp = submulshift(mulshift(gmael(G,j,j), sqri(X), 2*e),
2175 0 : gmael(G,kappa,j), X, e+1);
2176 0 : ztmp = addii(gmael(G,kappa,kappa), ztmp);
2177 0 : gmael(G,kappa,kappa) = gerepileuptoint(btop, ztmp);
2178 0 : for (i=1; i<=j; i++)
2179 0 : gmael(G,kappa,i) = submulshift(gmael(G,kappa,i), gmael(G,j,i), X, e);
2180 0 : for ( ; i<kappa; i++)
2181 0 : gmael(G,kappa,i) = submulshift(gmael(G,kappa,i), gmael(G,i,j), X, e);
2182 0 : for (i=kappa+1; i<=maxG; i++)
2183 0 : gmael(G,i,kappa) = submulshift(gmael(G,i,kappa), gmael(G,i,j), X, e);
2184 : }
2185 : }
2186 0 : if (!go_on) break; /* Anything happened? */
2187 0 : aa = zeros+1;
2188 : }
2189 :
2190 0 : affir(gmael(G,kappa,kappa), gel(s,zeros+1));
2191 : /* the last s[kappa-1]=r[kappa][kappa] is computed only if kappa increases */
2192 0 : av = avma;
2193 0 : for (k=zeros+1; k<=kappa-2; k++)
2194 0 : affrr(subrr(gel(s,k), mulrr(gmael(mu,kappa,k), gmael(r,kappa,k))),
2195 0 : gel(s,k+1));
2196 0 : *pG = G; *pB = B; *pU = U; return gc_bool(av, 0);
2197 : }
2198 :
2199 : /* G integral Gram matrix, LLL-reduces (G,B,U) in place [apply base change
2200 : * transforms to B and U]. If (keepfirst), never swap with first vector.
2201 : * If G = NULL, we compute the Gram matrix incrementally.
2202 : * Return -1 on failure, else zeros = dim Kernel (>= 0) */
2203 : static long
2204 0 : fplll(GEN *pG, GEN *pB, GEN *pU, GEN *pr, double DELTA, double ETA,
2205 : long keepfirst, long prec)
2206 : {
2207 : pari_sp av, av2;
2208 0 : GEN mu, r, s, tmp, Gtmp, alpha, G = *pG, B = *pB, U = *pU;
2209 0 : GEN delta = dbltor(DELTA), eta = dbltor(ETA);
2210 0 : long d, maxG, kappa, kappa2, i, j, zeros, kappamax, incgram = !G, cnt = 0;
2211 :
2212 0 : if (incgram)
2213 : { /* incremental Gram matrix */
2214 0 : maxG = 2; d = lg(B)-1;
2215 0 : G = zeromatcopy(d, d);
2216 : }
2217 : else
2218 0 : maxG = d = lg(G)-1;
2219 :
2220 0 : mu = cgetg(d+1, t_MAT);
2221 0 : r = cgetg(d+1, t_MAT);
2222 0 : s = cgetg(d+1, t_VEC);
2223 0 : for (j = 1; j <= d; j++)
2224 : {
2225 0 : GEN M = cgetg(d+1, t_COL), R = cgetg(d+1, t_COL);
2226 0 : gel(mu,j)= M;
2227 0 : gel(r,j) = R;
2228 0 : gel(s,j) = cgetr(prec);
2229 0 : for (i = 1; i <= d; i++)
2230 : {
2231 0 : gel(R,i) = cgetr(prec);
2232 0 : gel(M,i) = cgetr(prec);
2233 : }
2234 : }
2235 0 : Gtmp = cgetg(d+1, t_VEC);
2236 0 : alpha = cgetg(d+1, t_VECSMALL);
2237 0 : av = avma;
2238 :
2239 : /* Step2: Initializing the main loop */
2240 0 : kappamax = 1;
2241 0 : i = 1;
2242 : do {
2243 0 : if (incgram) gmael(G,i,i) = ZV_dotsquare(gel(B,i));
2244 0 : affir(gmael(G,i,i), gmael(r,i,i));
2245 0 : } while (!signe(gmael(G,i,i)) && ++i <= d);
2246 0 : zeros = i-1; /* all basis vectors b_i with i <= zeros are zero vectors */
2247 0 : kappa = i;
2248 0 : for (i=zeros+1; i<=d; i++) alpha[i]=1;
2249 :
2250 0 : while (++kappa <= d)
2251 : {
2252 0 : if (kappa > kappamax)
2253 : {
2254 0 : if (DEBUGLEVEL>=4) err_printf("K%ld ",kappa);
2255 0 : kappamax = kappa;
2256 0 : if (incgram)
2257 : {
2258 0 : for (i=zeros+1; i<=kappa; i++)
2259 0 : gmael(G,kappa,i) = ZV_dotproduct(gel(B,kappa), gel(B,i));
2260 0 : maxG = kappamax;
2261 : }
2262 : }
2263 : /* Step3: Call to the Babai algorithm, mu,r,s updated in place */
2264 0 : if (Babai(av, kappa, &G,&B,&U, mu,r,s, alpha[kappa], zeros, maxG, eta, prec))
2265 0 : { *pG = incgram? NULL: G; *pB = B; *pU = U; return -1; }
2266 0 : av2 = avma;
2267 0 : if ((keepfirst && kappa == 2) ||
2268 0 : cmprr(mulrr(gmael(r,kappa-1,kappa-1), delta), gel(s,kappa-1)) <= 0)
2269 : { /* Step4: Success of Lovasz's condition */
2270 0 : alpha[kappa] = kappa;
2271 0 : tmp = mulrr(gmael(mu,kappa,kappa-1), gmael(r,kappa,kappa-1));
2272 0 : affrr(subrr(gel(s,kappa-1), tmp), gmael(r,kappa,kappa));
2273 0 : set_avma(av2); continue;
2274 : }
2275 : /* Step5: Find the right insertion index kappa, kappa2 = initial kappa */
2276 0 : if (DEBUGLEVEL>=4 && kappa==kappamax && signe(gel(s,kappa-1)))
2277 0 : if (++cnt > 20) { cnt = 0; err_printf("(%ld) ", expo(gel(s,1))); }
2278 0 : kappa2 = kappa;
2279 : do {
2280 0 : kappa--;
2281 0 : if (kappa < zeros+2 + (keepfirst ? 1: 0)) break;
2282 0 : tmp = mulrr(gmael(r,kappa-1,kappa-1), delta);
2283 0 : } while (cmprr(gel(s,kappa-1), tmp) <= 0);
2284 0 : set_avma(av2);
2285 0 : update_alpha(alpha, kappa, kappa2, kappamax);
2286 :
2287 : /* Step6: Update the mu's and r's */
2288 0 : rotate(mu, kappa2, kappa);
2289 0 : rotate(r, kappa2, kappa);
2290 0 : affrr(gel(s,kappa), gmael(r,kappa,kappa));
2291 :
2292 : /* Step7: Update G, B, U */
2293 0 : if (U) rotate(U, kappa2, kappa);
2294 0 : if (B) rotate(B, kappa2, kappa);
2295 0 : rotateG(G,kappa2,kappa, maxG, Gtmp);
2296 :
2297 : /* Step8: Prepare the next loop iteration */
2298 0 : if (kappa == zeros+1 && !signe(gmael(G,kappa,kappa)))
2299 : {
2300 0 : zeros++; kappa++;
2301 0 : affir(gmael(G,kappa,kappa), gmael(r,kappa,kappa));
2302 : }
2303 : }
2304 0 : if (pr) *pr = RgM_diagonal_shallow(r);
2305 0 : *pG = G; *pB = B; *pU = U; return zeros; /* success */
2306 : }
2307 :
2308 : /* do not support LLL_KER, LLL_ALL, LLL_KEEP_FIRST */
2309 : static GEN
2310 4700352 : ZM2_lll_norms(GEN x, long flag, GEN *pN)
2311 : {
2312 : GEN a,b,c,d;
2313 : GEN G, U;
2314 4700352 : if (flag & LLL_GRAM)
2315 7355 : G = x;
2316 : else
2317 4692997 : G = gram_matrix(x);
2318 4700330 : a = gcoeff(G,1,1); b = shifti(gcoeff(G,1,2),1); c = gcoeff(G,2,2);
2319 4700282 : d = qfb_disc3(a,b,c);
2320 4700294 : if (signe(d)>=0) return NULL;
2321 4699909 : G = redimagsl2(mkqfb(a,b,c,d),&U);
2322 4699969 : if (pN) (void) RgM_gram_schmidt(G, pN);
2323 4699969 : if (flag & LLL_INPLACE) return ZM2_mul(x,U);
2324 4699969 : return U;
2325 : }
2326 :
2327 : static void
2328 617216 : fplll_flatter(GEN *pG, GEN *pB, GEN *pU, long rank, long flag)
2329 : {
2330 617216 : if (!*pG)
2331 : {
2332 616255 : GEN T = ZM_flatter_rank(*pB, rank, flag);
2333 616257 : if (*pU)
2334 : {
2335 602263 : *pU = ZM_mul(*pU, T);
2336 602261 : *pB = ZM_mul(*pB, T);
2337 13994 : } else *pB = T;
2338 : } else
2339 : {
2340 961 : GEN T, G = *pG;
2341 961 : long i, j, l = lg(G);
2342 7207 : for (i = 1; i < l; i++)
2343 43383 : for(j = 1; j < i; j++)
2344 37137 : gmael(G,j,i) = gmael(G,i,j);
2345 961 : T = ZM_flattergram_rank(G, rank, flag);
2346 961 : if (*pU) *pU = ZM_mul(*pU, T);
2347 961 : *pG = qf_ZM_apply(*pG, T);
2348 : }
2349 617217 : }
2350 :
2351 : static GEN
2352 1082619 : get_gramschmidt(GEN M, long rank)
2353 : {
2354 : GEN B, Q, L;
2355 1082619 : long r = lg(M)-1, bitprec = 3*r + 30;
2356 1082619 : long prec = nbits2prec64(bitprec);
2357 1082618 : if (rank < r) M = vconcat(gshift(M,1), matid(r));
2358 1082618 : if (!QR_init(RgM_gtofp(M, prec), &B, &Q, &L, prec) || !gsisinv(L)) return NULL;
2359 467411 : return L;
2360 : }
2361 :
2362 : static GEN
2363 44232 : get_gaussred(GEN M, long rank)
2364 : {
2365 44232 : pari_sp ltop = avma;
2366 44232 : long r = lg(M)-1, bitprec = 3*r + 30, prec = nbits2prec64(bitprec);
2367 : GEN R;
2368 44232 : if (rank < r) M = RgM_Rg_add(gshift(M, 1), gen_1);
2369 44232 : R = RgM_Cholesky(RgM_gtofp(M, prec), prec);
2370 44232 : if (!R) return NULL;
2371 43271 : return gerepilecopy(ltop, R);
2372 : }
2373 :
2374 : /* Assume x a ZM, if pN != NULL, set it to Gram-Schmidt (squared) norms
2375 : * The following modes are supported:
2376 : * - flag & LLL_INPLACE: x a lattice basis, return x*U
2377 : * - flag & LLL_GRAM: x a Gram matrix / else x a lattice basis; return
2378 : * LLL base change matrix U [LLL_IM]
2379 : * kernel basis [LLL_KER, nonreduced]
2380 : * both [LLL_ALL] */
2381 : GEN
2382 6953722 : ZM_lll_norms(GEN x, double DELTA, long flag, GEN *pN)
2383 : {
2384 6953722 : pari_sp av = avma;
2385 6953722 : const double ETA = 0.51;
2386 6953722 : const long keepfirst = flag & LLL_KEEP_FIRST;
2387 6953722 : long p, zeros = -1, n = lg(x)-1, is_upper, is_lower, useflatter = 0, rank;
2388 6953722 : GEN G, B, U, L = NULL;
2389 : pari_timer T;
2390 6953722 : long thre[]={31783,34393,20894,22525,13533,1928,672,671,
2391 : 422,506,315,313,222,205,167,154,139,138,
2392 : 110,120,98,94,81,75,74,64,74,74,
2393 : 79,96,112,111,105,104,96,86,84,78,75,70,66,62,62,57,56,47,45,52,50,44,48,42,36,35,35,34,40,33,34,32,36,31,
2394 : 38,38,40,38,38,37,35,31,34,36,34,32,34,32,28,27,25,31,25,27,28,26,25,21,21,25,25,22,21,24,24,22,21,23,22,22,22,22,21,24,21,22,19,20,19,20,19,19,19,18,19,18,18,20,19,20,18,19,18,21,18,20,18,18};
2395 6953722 : long thsn[]={23280,30486,50077,44136,78724,15690,1801,1611,
2396 : 981,1359,978,1042,815,866,788,775,726,712,
2397 : 626,613,548,564,474,481,504,447,453,508,
2398 : 705,794,1008,946,767,898,886,763,842,757,
2399 : 725,774,639,655,705,627,635,704,511,613,
2400 : 583,595,568,640,541,640,567,540,577,584,
2401 : 546,509,526,572,637,746,772,743,743,742,800,708,832,768,707,692,692,768,696,635,709,694,768,719,655,569,590,644,685,623,627,720,633,636,602,635,575,631,642,647,632,656,573,511,688,640,528,616,511,559,601,620,635,688,608,768,658,582,644,704,555,673,600,601,641,661,601,670};
2402 6953722 : if (n <= 1) return lll_trivial(x, flag);
2403 6844298 : if (nbrows(x)==0)
2404 : {
2405 15162 : if (flag & LLL_KER) return matid(n);
2406 15162 : if (flag & (LLL_INPLACE|LLL_IM)) return cgetg(1,t_MAT);
2407 0 : retmkvec2(matid(n), cgetg(1,t_MAT));
2408 : }
2409 6829301 : if (n==2 && nbrows(x)==2 && (flag&LLL_IM) && !keepfirst)
2410 : {
2411 4700353 : U = ZM2_lll_norms(x, flag, pN);
2412 4700355 : if (U) return U;
2413 : }
2414 2129327 : if (flag & LLL_GRAM)
2415 60234 : { G = x; B = NULL; U = matid(n); is_upper = 0; is_lower = 0; }
2416 : else
2417 : {
2418 2069093 : G = NULL; B = x; U = (flag & LLL_INPLACE)? NULL: matid(n);
2419 2069089 : is_upper = (flag & LLL_UPPER) || ZM_is_upper(B);
2420 2069089 : is_lower = !B || is_upper || keepfirst ? 0: ZM_is_lower(B);
2421 2069083 : if (is_lower) L = RgM_flip(B);
2422 : }
2423 2129316 : rank = (flag&LLL_NOFLATTER) ? 0: ZM_rank(x);
2424 2129316 : if (n > 2 && !(flag&LLL_NOFLATTER))
2425 1733027 : {
2426 1688795 : GEN R = B ? (is_upper ? B : (is_lower ? L : get_gramschmidt(B, rank)))
2427 3421806 : : get_gaussred(G, rank);
2428 1733014 : if (R)
2429 : {
2430 1116857 : long spr = spread(R), sz = mpexpo(gsupnorm(R, DEFAULTPREC)), thr;
2431 1116870 : if (DEBUGLEVEL>=5) err_printf("LLL: dim %ld, size %ld, spread %ld\n",n, sz, spr);
2432 1116870 : if ((is_upper && ZM_is_knapsack(B)) || (is_lower && ZM_is_knapsack(L)))
2433 92442 : thr = thsn[minss(n-3,numberof(thsn)-1)];
2434 : else
2435 : {
2436 1024428 : thr = thre[minss(n-3,numberof(thre)-1)];
2437 1024428 : if (n>=10) sz = spr;
2438 : }
2439 1116870 : useflatter = sz >= thr;
2440 : } else
2441 616157 : useflatter = 1;
2442 396290 : } else useflatter = 0;
2443 2129317 : if(DEBUGLEVEL>=4) timer_start(&T);
2444 2129317 : if (useflatter)
2445 : {
2446 617216 : if (is_lower)
2447 : {
2448 0 : fplll_flatter(&G, &L, &U, rank, flag | LLL_UPPER);
2449 0 : B = RgM_flop(L);
2450 0 : if (U) U = RgM_flop(U);
2451 : }
2452 : else
2453 617216 : fplll_flatter(&G, &B, &U, rank, flag | (is_upper? LLL_UPPER:0));
2454 617217 : if (DEBUGLEVEL>=4 && !(flag & LLL_NOCERTIFY))
2455 0 : timer_printf(&T, "FLATTER");
2456 : }
2457 2129317 : if (!(flag & LLL_GRAM))
2458 : {
2459 : long t;
2460 2069084 : B = gcopy(B);
2461 2069086 : if(DEBUGLEVEL>=4)
2462 0 : err_printf("Entering L^2 (double): dim %ld, LLL-parameters (%.3f,%.3f)\n",
2463 : n, DELTA,ETA);
2464 2069086 : zeros = fplll_fast(&B, &U, DELTA, ETA, keepfirst);
2465 2069093 : if (DEBUGLEVEL>=4) timer_printf(&T, zeros < 0? "LLL (failed)": "LLL");
2466 2073204 : for (p = DEFAULTPREC, t = 0; zeros < 0 && t < 1 ; p += EXTRAPREC64, t++)
2467 : {
2468 4111 : if (DEBUGLEVEL>=4)
2469 0 : err_printf("Entering L^2 (heuristic): LLL-parameters (%.3f,%.3f), prec = %d/%d\n", DELTA, ETA, p, p);
2470 4111 : zeros = fplll_heuristic(&B, &U, DELTA, ETA, keepfirst, p, p);
2471 4111 : gc_lll(av, 2, &B, &U);
2472 4111 : if (DEBUGLEVEL>=4) timer_printf(&T, zeros < 0? "LLL (failed)": "LLL");
2473 : }
2474 : } else
2475 60233 : G = gcopy(G);
2476 2129326 : if (zeros < 0 || !(flag & LLL_NOCERTIFY))
2477 : {
2478 1568456 : if(DEBUGLEVEL>=4)
2479 0 : err_printf("Entering L^2 (dpe): LLL-parameters (%.3f,%.3f)\n", DELTA,ETA);
2480 1568456 : zeros = fplll_dpe(&G, &B, &U, pN, DELTA, ETA, keepfirst);
2481 1568447 : if (DEBUGLEVEL>=4) timer_printf(&T, zeros < 0? "LLL (failed)": "LLL");
2482 1568448 : if (zeros < 0)
2483 0 : for (p = DEFAULTPREC;; p += EXTRAPREC64)
2484 : {
2485 0 : if (DEBUGLEVEL>=4)
2486 0 : err_printf("Entering L^2: LLL-parameters (%.3f,%.3f), prec = %d\n",
2487 : DELTA,ETA, p);
2488 0 : zeros = fplll(&G, &B, &U, pN, DELTA, ETA, keepfirst, p);
2489 0 : if (DEBUGLEVEL>=4) timer_printf(&T, zeros < 0? "LLL (failed)": "LLL");
2490 0 : if (zeros >= 0) break;
2491 0 : gc_lll(av, 3, &G, &B, &U);
2492 : }
2493 : }
2494 2129318 : return lll_finish(U? U: B, zeros, flag);
2495 : }
2496 :
2497 : /********************************************************************/
2498 : /** **/
2499 : /** LLL OVER K[X] **/
2500 : /** **/
2501 : /********************************************************************/
2502 : static int
2503 504 : pslg(GEN x)
2504 : {
2505 : long tx;
2506 504 : if (gequal0(x)) return 2;
2507 448 : tx = typ(x); return is_scalar_t(tx)? 3: lg(x);
2508 : }
2509 :
2510 : static int
2511 196 : REDgen(long k, long l, GEN h, GEN L, GEN B)
2512 : {
2513 196 : GEN q, u = gcoeff(L,k,l);
2514 : long i;
2515 :
2516 196 : if (pslg(u) < pslg(B)) return 0;
2517 :
2518 140 : q = gneg(gdeuc(u,B));
2519 140 : gel(h,k) = gadd(gel(h,k), gmul(q,gel(h,l)));
2520 140 : for (i=1; i<l; i++) gcoeff(L,k,i) = gadd(gcoeff(L,k,i), gmul(q,gcoeff(L,l,i)));
2521 140 : gcoeff(L,k,l) = gadd(gcoeff(L,k,l), gmul(q,B)); return 1;
2522 : }
2523 :
2524 : static int
2525 196 : do_SWAPgen(GEN h, GEN L, GEN B, long k, GEN fl, int *flc)
2526 : {
2527 : GEN p1, la, la2, Bk;
2528 : long ps1, ps2, i, j, lx;
2529 :
2530 196 : if (!fl[k-1]) return 0;
2531 :
2532 140 : la = gcoeff(L,k,k-1); la2 = gsqr(la);
2533 140 : Bk = gel(B,k);
2534 140 : if (fl[k])
2535 : {
2536 56 : GEN q = gadd(la2, gmul(gel(B,k-1),gel(B,k+1)));
2537 56 : ps1 = pslg(gsqr(Bk));
2538 56 : ps2 = pslg(q);
2539 56 : if (ps1 <= ps2 && (ps1 < ps2 || !*flc)) return 0;
2540 28 : *flc = (ps1 != ps2);
2541 28 : gel(B,k) = gdiv(q, Bk);
2542 : }
2543 :
2544 112 : swap(gel(h,k-1), gel(h,k)); lx = lg(L);
2545 112 : for (j=1; j<k-1; j++) swap(gcoeff(L,k-1,j), gcoeff(L,k,j));
2546 112 : if (fl[k])
2547 : {
2548 28 : for (i=k+1; i<lx; i++)
2549 : {
2550 0 : GEN t = gcoeff(L,i,k);
2551 0 : p1 = gsub(gmul(gel(B,k+1),gcoeff(L,i,k-1)), gmul(la,t));
2552 0 : gcoeff(L,i,k) = gdiv(p1, Bk);
2553 0 : p1 = gadd(gmul(la,gcoeff(L,i,k-1)), gmul(gel(B,k-1),t));
2554 0 : gcoeff(L,i,k-1) = gdiv(p1, Bk);
2555 : }
2556 : }
2557 84 : else if (!gequal0(la))
2558 : {
2559 28 : p1 = gdiv(la2, Bk);
2560 28 : gel(B,k+1) = gel(B,k) = p1;
2561 28 : for (i=k+2; i<=lx; i++) gel(B,i) = gdiv(gmul(p1,gel(B,i)),Bk);
2562 28 : for (i=k+1; i<lx; i++)
2563 0 : gcoeff(L,i,k-1) = gdiv(gmul(la,gcoeff(L,i,k-1)), Bk);
2564 28 : for (j=k+1; j<lx-1; j++)
2565 0 : for (i=j+1; i<lx; i++)
2566 0 : gcoeff(L,i,j) = gdiv(gmul(p1,gcoeff(L,i,j)), Bk);
2567 : }
2568 : else
2569 : {
2570 56 : gcoeff(L,k,k-1) = gen_0;
2571 56 : for (i=k+1; i<lx; i++)
2572 : {
2573 0 : gcoeff(L,i,k) = gcoeff(L,i,k-1);
2574 0 : gcoeff(L,i,k-1) = gen_0;
2575 : }
2576 56 : gel(B,k) = gel(B,k-1); fl[k] = 1; fl[k-1] = 0;
2577 : }
2578 112 : return 1;
2579 : }
2580 :
2581 : static void
2582 168 : incrementalGSgen(GEN x, GEN L, GEN B, long k, GEN fl)
2583 : {
2584 168 : GEN u = NULL; /* gcc -Wall */
2585 : long i, j;
2586 420 : for (j = 1; j <= k; j++)
2587 252 : if (j==k || fl[j])
2588 : {
2589 252 : u = gcoeff(x,k,j);
2590 252 : if (!is_extscalar_t(typ(u))) pari_err_TYPE("incrementalGSgen",u);
2591 336 : for (i=1; i<j; i++)
2592 84 : if (fl[i])
2593 : {
2594 84 : u = gsub(gmul(gel(B,i+1),u), gmul(gcoeff(L,k,i),gcoeff(L,j,i)));
2595 84 : u = gdiv(u, gel(B,i));
2596 : }
2597 252 : gcoeff(L,k,j) = u;
2598 : }
2599 168 : if (gequal0(u)) gel(B,k+1) = gel(B,k);
2600 : else
2601 : {
2602 112 : gel(B,k+1) = gcoeff(L,k,k); gcoeff(L,k,k) = gen_1; fl[k] = 1;
2603 : }
2604 168 : }
2605 :
2606 : static GEN
2607 168 : lllgramallgen(GEN x, long flag)
2608 : {
2609 168 : long lx = lg(x), i, j, k, l, n;
2610 : pari_sp av;
2611 : GEN B, L, h, fl;
2612 : int flc;
2613 :
2614 168 : n = lx-1; if (n<=1) return lll_trivial(x,flag);
2615 84 : if (lgcols(x) != lx) pari_err_DIM("lllgramallgen");
2616 :
2617 84 : fl = cgetg(lx, t_VECSMALL);
2618 :
2619 84 : av = avma;
2620 84 : B = scalarcol_shallow(gen_1, lx);
2621 84 : L = cgetg(lx,t_MAT);
2622 252 : for (j=1; j<lx; j++) { gel(L,j) = zerocol(n); fl[j] = 0; }
2623 :
2624 84 : h = matid(n);
2625 252 : for (i=1; i<lx; i++)
2626 168 : incrementalGSgen(x, L, B, i, fl);
2627 84 : flc = 0;
2628 84 : for(k=2;;)
2629 : {
2630 196 : if (REDgen(k, k-1, h, L, gel(B,k))) flc = 1;
2631 196 : if (do_SWAPgen(h, L, B, k, fl, &flc)) { if (k > 2) k--; }
2632 : else
2633 : {
2634 84 : for (l=k-2; l>=1; l--)
2635 0 : if (REDgen(k, l, h, L, gel(B,l+1))) flc = 1;
2636 84 : if (++k > n) break;
2637 : }
2638 112 : if (gc_needed(av,1))
2639 : {
2640 0 : if(DEBUGMEM>1) pari_warn(warnmem,"lllgramallgen");
2641 0 : gerepileall(av,3,&B,&L,&h);
2642 : }
2643 : }
2644 140 : k=1; while (k<lx && !fl[k]) k++;
2645 84 : return lll_finish(h,k-1,flag);
2646 : }
2647 :
2648 : static GEN
2649 168 : lllallgen(GEN x, long flag)
2650 : {
2651 168 : pari_sp av = avma;
2652 168 : if (!(flag & LLL_GRAM)) x = gram_matrix(x);
2653 84 : else if (!RgM_is_square_mat(x)) pari_err_DIM("qflllgram");
2654 168 : return gerepilecopy(av, lllgramallgen(x, flag));
2655 : }
2656 : GEN
2657 42 : lllgen(GEN x) { return lllallgen(x, LLL_IM); }
2658 : GEN
2659 42 : lllkerimgen(GEN x) { return lllallgen(x, LLL_ALL); }
2660 : GEN
2661 42 : lllgramgen(GEN x) { return lllallgen(x, LLL_IM|LLL_GRAM); }
2662 : GEN
2663 42 : lllgramkerimgen(GEN x) { return lllallgen(x, LLL_ALL|LLL_GRAM); }
2664 :
2665 : static GEN
2666 36699 : lllall(GEN x, long flag)
2667 36699 : { pari_sp av = avma; return gerepilecopy(av, ZM_lll(x, LLLDFT, flag)); }
2668 : GEN
2669 183 : lllint(GEN x) { return lllall(x, LLL_IM); }
2670 : GEN
2671 35 : lllkerim(GEN x) { return lllall(x, LLL_ALL); }
2672 : GEN
2673 36439 : lllgramint(GEN x)
2674 36439 : { if (!RgM_is_square_mat(x)) pari_err_DIM("qflllgram");
2675 36439 : return lllall(x, LLL_IM | LLL_GRAM); }
2676 : GEN
2677 35 : lllgramkerim(GEN x)
2678 35 : { if (!RgM_is_square_mat(x)) pari_err_DIM("qflllgram");
2679 35 : return lllall(x, LLL_ALL | LLL_GRAM); }
2680 :
2681 : GEN
2682 5268192 : lllfp(GEN x, double D, long flag)
2683 : {
2684 5268192 : long n = lg(x)-1;
2685 5268192 : pari_sp av = avma;
2686 : GEN h;
2687 5268192 : if (n <= 1) return lll_trivial(x,flag);
2688 4607805 : if (flag & LLL_GRAM)
2689 : {
2690 9270 : if (!RgM_is_square_mat(x)) pari_err_DIM("qflllgram");
2691 9256 : if (isinexact(x))
2692 : {
2693 9165 : x = RgM_Cholesky(x, gprecision(x));
2694 9165 : if (!x) return NULL;
2695 9165 : flag &= ~LLL_GRAM;
2696 : }
2697 : }
2698 4607791 : h = ZM_lll(RgM_rescale_to_int(x), D, flag);
2699 4607742 : return gerepilecopy(av, h);
2700 : }
2701 :
2702 : GEN
2703 9089 : lllgram(GEN x) { return lllfp(x,LLLDFT,LLL_GRAM|LLL_IM); }
2704 : GEN
2705 1174956 : lll(GEN x) { return lllfp(x,LLLDFT,LLL_IM); }
2706 :
2707 : static GEN
2708 63 : qflllgram(GEN x)
2709 : {
2710 63 : GEN T = lllgram(x);
2711 42 : if (!T) pari_err_PREC("qflllgram");
2712 42 : return T;
2713 : }
2714 :
2715 : GEN
2716 301 : qflll0(GEN x, long flag)
2717 : {
2718 301 : if (typ(x) != t_MAT) pari_err_TYPE("qflll",x);
2719 301 : switch(flag)
2720 : {
2721 49 : case 0: return lll(x);
2722 63 : case 1: return lllfp(x, LLLDFT, LLL_IM | LLL_NOFLATTER);
2723 49 : case 2: RgM_check_ZM(x,"qflll"); return lllintpartial(x);
2724 7 : case 3: RgM_check_ZM(x,"qflll"); return lllall(x, LLL_INPLACE);
2725 49 : case 4: RgM_check_ZM(x,"qflll"); return lllkerim(x);
2726 42 : case 5: return lllkerimgen(x);
2727 42 : case 8: return lllgen(x);
2728 0 : default: pari_err_FLAG("qflll");
2729 : }
2730 : return NULL; /* LCOV_EXCL_LINE */
2731 : }
2732 :
2733 : GEN
2734 245 : qflllgram0(GEN x, long flag)
2735 : {
2736 245 : if (typ(x) != t_MAT) pari_err_TYPE("qflllgram",x);
2737 245 : switch(flag)
2738 : {
2739 63 : case 0: return qflllgram(x);
2740 49 : case 1: return lllfp(x, LLLDFT, LLL_IM | LLL_GRAM | LLL_NOFLATTER);
2741 49 : case 4: RgM_check_ZM(x,"qflllgram"); return lllgramkerim(x);
2742 42 : case 5: return lllgramkerimgen(x);
2743 42 : case 8: return lllgramgen(x);
2744 0 : default: pari_err_FLAG("qflllgram");
2745 : }
2746 : return NULL; /* LCOV_EXCL_LINE */
2747 : }
2748 :
2749 : /********************************************************************/
2750 : /** **/
2751 : /** INTEGRAL KERNEL (LLL REDUCED) **/
2752 : /** **/
2753 : /********************************************************************/
2754 : static GEN
2755 70 : kerint0(GEN M)
2756 : {
2757 : /* return ZM_lll(M, LLLDFT, LLL_KER); */
2758 70 : GEN U, H = ZM_hnflll(M,&U,1);
2759 70 : long d = lg(M)-lg(H);
2760 70 : if (!d) return cgetg(1, t_MAT);
2761 70 : return ZM_lll(vecslice(U,1,d), LLLDFT, LLL_INPLACE);
2762 : }
2763 : GEN
2764 42 : kerint(GEN M)
2765 : {
2766 42 : pari_sp av = avma;
2767 42 : return gerepilecopy(av, kerint0(M));
2768 : }
2769 : /* OBSOLETE: use kerint */
2770 : GEN
2771 28 : matkerint0(GEN M, long flag)
2772 : {
2773 28 : pari_sp av = avma;
2774 28 : if (typ(M) != t_MAT) pari_err_TYPE("matkerint",M);
2775 28 : M = Q_primpart(M);
2776 28 : RgM_check_ZM(M, "kerint");
2777 28 : switch(flag)
2778 : {
2779 28 : case 0:
2780 28 : case 1: return gerepilecopy(av, kerint0(M));
2781 0 : default: pari_err_FLAG("matkerint");
2782 : }
2783 : return NULL; /* LCOV_EXCL_LINE */
2784 : }
|