Code coverage tests

This page documents the degree to which the PARI/GP source code is tested by our public test suite, distributed with the source distribution in directory src/test/. This is measured by the gcov utility; we then process gcov output using the lcov frond-end.

We test a few variants depending on Configure flags on the pari.math.u-bordeaux.fr machine (x86_64 architecture), and agregate them in the final report:

The target is to exceed 90% coverage for all mathematical modules (given that branches depending on DEBUGLEVEL or DEBUGMEM are not covered). This script is run to produce the results below.

LCOV - code coverage report
Current view: top level - kernel/none - mp.c (source / functions) Hit Total Coverage
Test: PARI/GP v2.14.0 lcov report (development 27713-af4b496856) Lines: 1127 1153 97.7 %
Date: 2022-05-21 10:32:40 Functions: 70 70 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : #line 2 "../src/kernel/none/mp.c"
       2             : /* Copyright (C) 2000-2003 The PARI group.
       3             : 
       4             : This file is part of the PARI/GP package.
       5             : 
       6             : PARI/GP is free software; you can redistribute it and/or modify it under the
       7             : terms of the GNU General Public License as published by the Free Software
       8             : Foundation; either version 2 of the License, or (at your option) any later
       9             : version. It is distributed in the hope that it will be useful, but WITHOUT
      10             : ANY WARRANTY WHATSOEVER.
      11             : 
      12             : Check the License for details. You should have received a copy of it, along
      13             : with the package; see the file 'COPYING'. If not, write to the Free Software
      14             : Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */
      15             : 
      16             : /***********************************************************************/
      17             : /**                                                                   **/
      18             : /**                       MULTIPRECISION KERNEL                       **/
      19             : /**                                                                   **/
      20             : /***********************************************************************/
      21             : #include "pari.h"
      22             : #include "paripriv.h"
      23             : #include "../src/kernel/none/tune-gen.h"
      24             : 
      25             : void
      26         746 : pari_kernel_init(void) { }
      27             : void
      28         744 : pari_kernel_close(void) { }
      29             : const char *
      30           2 : pari_kernel_version(void) { return ""; }
      31             : 
      32             : /* NOTE: arguments of "spec" routines (muliispec, addiispec, etc.) aren't
      33             :  * GENs but pairs (long *a, long na) representing a list of digits (in basis
      34             :  * BITS_IN_LONG) : a[0], ..., a[na-1]. [ In ordre to facilitate splitting: no
      35             :  * need to reintroduce codewords ] */
      36             : 
      37             : #define LIMBS(x)  ((x)+2)
      38             : #define NLIMBS(x) (lgefint(x)-2)
      39             : 
      40             : /* Normalize a nonnegative integer */
      41             : GEN
      42  1202821269 : int_normalize(GEN x, long known_zero_words)
      43             : {
      44  1202821269 :   long i, lx = lgefint(x);
      45             :   GEN x0;
      46  1202821269 :   if (lx == 2) { x[1] = evalsigne(0) | evallgefint(2); return x; }
      47  1202821269 :   if (!known_zero_words && x[2]) return x;
      48  7277118234 :   for (i = 2+known_zero_words; i < lx; i++)
      49  7166968422 :     if (x[i]) break;
      50   777178215 :   x0 = x; i -= 2; x += i;
      51   777178215 :   if (x0 == (GEN)avma) set_avma((pari_sp)x);
      52   518422251 :   else stackdummy((pari_sp)(x0+i), (pari_sp)x0);
      53   777178215 :   lx -= i;
      54   777178215 :   x[0] = evaltyp(t_INT) | evallg(lx);
      55   777178215 :   if (lx == 2) x[1] = evalsigne(0) | evallgefint(lx);
      56   667028403 :   else         x[1] = evalsigne(1) | evallgefint(lx);
      57   777178215 :   return x;
      58             : }
      59             : 
      60             : /***********************************************************************/
      61             : /**                                                                   **/
      62             : /**                      ADDITION / SUBTRACTION                       **/
      63             : /**                                                                   **/
      64             : /***********************************************************************/
      65             : 
      66             : GEN
      67     2238555 : setloop(GEN a)
      68             : {
      69     2238555 :   pari_sp av = avma;
      70     2238555 :   (void)cgetg(lgefint(a) + 3, t_VECSMALL);
      71     2238555 :   return icopy_avma(a, av); /* two cells of extra space before a */
      72             : }
      73             : 
      74             : /* we had a = setloop(?), then some incloops. Reset a to b */
      75             : GEN
      76      128238 : resetloop(GEN a, GEN b) {
      77      128238 :   long lb = lgefint(b);
      78      128238 :   a += lgefint(a) - lb;
      79      128238 :   a[0] = evaltyp(t_INT) | evallg(lb);
      80      128238 :   affii(b, a); return a;
      81             : }
      82             : 
      83             : /* assume a > 0, initialized by setloop. Do a++ */
      84             : static GEN
      85    31598274 : incpos(GEN a)
      86             : {
      87    31598274 :   long i, l = lgefint(a);
      88    31598277 :   for (i=l-1; i>1; i--)
      89    31598274 :     if (++a[i]) return a;
      90           3 :   l++; a--; /* use extra cell */
      91           3 :   a[0]=evaltyp(t_INT) | _evallg(l);
      92           3 :   a[1]=evalsigne(1) | evallgefint(l);
      93           3 :   a[2]=1; return a;
      94             : }
      95             : 
      96             : /* assume a < 0, initialized by setloop. Do a++ */
      97             : static GEN
      98       25341 : incneg(GEN a)
      99             : {
     100       25341 :   long i, l = lgefint(a)-1;
     101       25341 :   if (uel(a,l)--)
     102             :   {
     103       25338 :     if (l == 2 && !a[2])
     104             :     {
     105        1203 :       a++; /* save one cell */
     106        1203 :       a[0] = evaltyp(t_INT) | _evallg(2);
     107        1203 :       a[1] = evalsigne(0) | evallgefint(2);
     108             :     }
     109       25338 :     return a;
     110             :   }
     111           3 :   for (i = l-1;; i--) /* finishes since a[2] != 0 */
     112           3 :     if (uel(a,i)--) break;
     113           3 :   if (!a[2])
     114             :   {
     115           3 :     a++; /* save one cell */
     116           3 :     a[0] = evaltyp(t_INT) | _evallg(l);
     117           3 :     a[1] = evalsigne(-1) | evallgefint(l);
     118             :   }
     119           3 :   return a;
     120             : }
     121             : 
     122             : /* assume a initialized by setloop. Do a++ */
     123             : GEN
     124    31872654 : incloop(GEN a)
     125             : {
     126    31872654 :   switch(signe(a))
     127             :   {
     128      249039 :     case 0: a--; /* use extra cell */
     129      249039 :       a[0]=evaltyp(t_INT) | _evallg(3);
     130      249039 :       a[1]=evalsigne(1) | evallgefint(3);
     131      249039 :       a[2]=1; return a;
     132       25341 :     case -1: return incneg(a);
     133    31598274 :     default: return incpos(a);
     134             :   }
     135             : }
     136             : 
     137             : INLINE GEN
     138  1668159624 : adduispec(ulong s, GEN x, long nx)
     139             : {
     140  1668159624 :   GEN xd, zd = (GEN)avma;
     141             :   long lz;
     142             : 
     143  1668159624 :   if (nx == 1) return adduu(s, uel(x,0));
     144   179063355 :   lz = nx+3; (void)new_chunk(lz);
     145   179063355 :   xd = x + nx;
     146   179063355 :   *--zd = (ulong)*--xd + s;
     147   179063355 :   if ((ulong)*zd < s)
     148             :     for(;;)
     149             :     {
     150    23482746 :       if (xd == x) { *--zd = 1; break; } /* enlarge z */
     151    19783569 :       *--zd = ((ulong)*--xd) + 1;
     152    19783569 :       if (*zd) { lz--; break; }
     153             :     }
     154   162382677 :   else lz--;
     155  1386842688 :   while (xd > x) *--zd = *--xd;
     156   179063355 :   *--zd = evalsigne(1) | evallgefint(lz);
     157   179063355 :   *--zd = evaltyp(t_INT) | evallg(lz);
     158   179063355 :   return gc_const((pari_sp)zd, zd);
     159             : }
     160             : 
     161             : GEN
     162   267273180 : adduispec_offset(ulong s, GEN x, long offset, long nx)
     163             : {
     164   267273180 :   GEN xd = x+lgefint(x)-nx-offset;
     165   351955224 :   while (nx && *xd==0) {xd++; nx--;}
     166   267273180 :   if (!nx) return utoi(s);
     167   239042610 :   return adduispec(s,xd,nx);
     168             : }
     169             : 
     170             : static GEN
     171  5840317479 : addiispec(GEN x, GEN y, long nx, long ny)
     172             : {
     173             :   GEN xd, yd, zd;
     174  5840317479 :   long lz, i = -2;
     175             :   LOCAL_OVERFLOW;
     176             : 
     177  5840317479 :   if (nx < ny) swapspec(x,y, nx,ny);
     178  5840317479 :   if (ny == 1) return adduispec(*y,x,nx);
     179  4477227543 :   zd = (GEN)avma;
     180  4477227543 :   lz = nx+3; (void)new_chunk(lz);
     181  4477227543 :   xd = x + nx;
     182  4477227543 :   yd = y + ny;
     183  4477227543 :   zd[-1] = addll(xd[-1], yd[-1]);
     184             : #ifdef addllx8
     185  5913988630 :   for (  ; i-8 > -ny; i-=8)
     186  4421579449 :     addllx8(xd+i, yd+i, zd+i, overflow);
     187             : #endif
     188 95610076510 :   for (  ; i >= -ny; i--) zd[i] = addllx(xd[i], yd[i]);
     189  4477227543 :   if (overflow)
     190             :     for(;;)
     191             :     {
     192  1206767154 :       if (i < -nx) { zd[i] = 1; i--; break; } /* enlarge z */
     193   767858895 :       zd[i] = uel(xd,i) + 1;
     194   767858895 :       if (zd[i]) { i--; lz--; break; }
     195   110906382 :       i--;
     196             :     }
     197  3381366771 :   else lz--;
     198 40973073693 :   for (; i >= -nx; i--) zd[i] = xd[i];
     199  4477227543 :   zd += i+1;
     200  4477227543 :   *--zd = evalsigne(1) | evallgefint(lz);
     201  4477227543 :   *--zd = evaltyp(t_INT) | evallg(lz);
     202  4477227543 :   return gc_const((pari_sp)zd, zd);
     203             : }
     204             : 
     205             : /* assume x >= s */
     206             : INLINE GEN
     207  1575062526 : subiuspec(GEN x, ulong s, long nx)
     208             : {
     209  1575062526 :   GEN xd, zd = (GEN)avma;
     210             :   long lz;
     211             :   LOCAL_OVERFLOW;
     212             : 
     213  1575062526 :   if (nx == 1) return utoi(x[0] - s);
     214             : 
     215   380489607 :   lz = nx+2; (void)new_chunk(lz);
     216   380489607 :   xd = x + nx;
     217   380489607 :   *--zd = subll(*--xd, s);
     218   380489607 :   if (overflow)
     219             :     for(;;)
     220             :     {
     221    26470491 :       *--zd = ((ulong)*--xd) - 1;
     222    70024722 :       if (*xd) break;
     223             :     }
     224   380489607 :   if (xd == x)
     225    69004668 :     while (*zd == 0) { zd++; lz--; } /* shorten z */
     226             :   else
     227 13762161712 :     do  *--zd = *--xd; while (xd > x);
     228   380489607 :   *--zd = evalsigne(1) | evallgefint(lz);
     229   380489607 :   *--zd = evaltyp(t_INT) | evallg(lz);
     230   380489607 :   return gc_const((pari_sp)zd, zd);
     231             : }
     232             : 
     233             : /* assume x > y */
     234             : static GEN
     235  4687364895 : subiispec(GEN x, GEN y, long nx, long ny)
     236             : {
     237             :   GEN xd,yd,zd;
     238  4687364895 :   long lz, i = -2;
     239             :   LOCAL_OVERFLOW;
     240             : 
     241  4687364895 :   if (ny==1) return subiuspec(x,*y,nx);
     242  3392600046 :   zd = (GEN)avma;
     243  3392600046 :   lz = nx+2; (void)new_chunk(lz);
     244  3392600046 :   xd = x + nx;
     245  3392600046 :   yd = y + ny;
     246  3392600046 :   zd[-1] = subll(xd[-1], yd[-1]);
     247             : #ifdef subllx8
     248  6901829969 :   for (  ; i-8 > -ny; i-=8)
     249  5770963288 :     subllx8(xd+i, yd+i, zd+i, overflow);
     250             : #endif
     251 >11107*10^7 :   for (  ; i >= -ny; i--) zd[i] = subllx(xd[i], yd[i]);
     252  3392600046 :   if (overflow)
     253             :     for(;;)
     254             :     {
     255  1818253584 :       zd[i] = uel(xd,i) - 1;
     256  2579506970 :       if (xd[i--]) break;
     257             :     }
     258  3392600046 :   if (i>=-nx)
     259 12952393423 :     for (; i >= -nx; i--) zd[i] = xd[i];
     260             :   else
     261  4145231596 :     while (zd[i+1] == 0) { i++; lz--; } /* shorten z */
     262  3392600046 :   zd += i+1;
     263  3392600046 :   *--zd = evalsigne(1) | evallgefint(lz);
     264  3392600046 :   *--zd = evaltyp(t_INT) | evallg(lz);
     265  3392600046 :   return gc_const((pari_sp)zd, zd);
     266             : }
     267             : 
     268             : static void
     269   353531471 : roundr_up_ip(GEN x, long l)
     270             : {
     271   353531471 :   long i = l;
     272             :   for(;;)
     273             :   {
     274   354277820 :     if (++uel(x,--i)) break;
     275      964005 :     if (i == 2) { x[2] = (long)HIGHBIT; shiftr_inplace(x, 1); break; }
     276             :   }
     277   353531471 : }
     278             : 
     279             : void
     280   371742363 : affir(GEN x, GEN y)
     281             : {
     282   371742363 :   const long s = signe(x), ly = lg(y);
     283             :   long lx, sh, i;
     284             : 
     285   371742363 :   if (!s)
     286             :   {
     287    53802870 :     y[1] = evalexpo(-prec2nbits(ly));
     288    53802870 :     return;
     289             :   }
     290             : 
     291   317939493 :   lx = lgefint(x); sh = bfffo(x[2]);
     292   317939493 :   y[1] = evalsigne(s) | evalexpo(bit_accuracy(lx)-sh-1);
     293   317939493 :   if (sh) {
     294   314772996 :     if (lx <= ly)
     295             :     {
     296   543881733 :       for (i=lx; i<ly; i++) y[i]=0;
     297   184093308 :       shift_left(y,x,2,lx-1, 0,sh);
     298   184093308 :       return;
     299             :     }
     300   130679688 :     shift_left(y,x,2,ly-1, x[ly],sh);
     301             :     /* lx > ly: round properly */
     302   130679688 :     if ((uel(x,ly)<<sh) & HIGHBIT) roundr_up_ip(y, ly);
     303             :   }
     304             :   else {
     305     3166497 :     if (lx <= ly)
     306             :     {
     307     2985840 :       for (i=2; i<lx; i++) y[i]=x[i];
     308     1876275 :       for (   ; i<ly; i++) y[i]=0;
     309      762369 :       return;
     310             :     }
     311     5445024 :     for (i=2; i<ly; i++) y[i]=x[i];
     312             :     /* lx > ly: round properly */
     313     2404128 :     if (uel(x,ly) & HIGHBIT) roundr_up_ip(y, ly);
     314             :   }
     315             : }
     316             : 
     317             : INLINE GEN
     318  1910952795 : shiftispec(GEN x, long nx, long n)
     319             : {
     320             :   long ny, i, m;
     321             :   GEN y, yd;
     322  1910952795 :   if (!n)  return icopyspec(x, nx);
     323             : 
     324  1744378095 :   if (n > 0)
     325             :   {
     326   986187324 :     GEN z = (GEN)avma;
     327   986187324 :     long d = dvmdsBIL(n, &m);
     328             : 
     329   986187324 :     ny = nx+d; y = new_chunk(ny + 2); yd = y + 2;
     330 38102137554 :     for ( ; d; d--) *--z = 0;
     331  5691676992 :     if (!m) for (i=0; i<nx; i++) yd[i]=x[i];
     332             :     else
     333             :     {
     334   915597561 :       const ulong sh = BITS_IN_LONG - m;
     335   915597561 :       shift_left(yd,x, 0,nx-1, 0,m);
     336   915597561 :       i = uel(x,0) >> sh;
     337             :       /* Extend y on the left? */
     338   915597561 :       if (i) { ny++; y = new_chunk(1); y[2] = i; }
     339             :     }
     340             :   }
     341             :   else
     342             :   {
     343   758190771 :     ny = nx - dvmdsBIL(-n, &m);
     344   758190771 :     if (ny<1) return gen_0;
     345   757000719 :     y = new_chunk(ny + 2); yd = y + 2;
     346   757000719 :     if (m) {
     347   244327110 :       shift_right(yd,x, 0,ny, 0,m);
     348   244327110 :       if (yd[0] == 0)
     349             :       {
     350    35953482 :         if (ny==1) return gc_const((pari_sp)(y+3), gen_0);
     351    29186061 :         ny--; set_avma((pari_sp)(++y));
     352             :       }
     353             :     } else {
     354 22381698303 :       for (i=0; i<ny; i++) yd[i]=x[i];
     355             :     }
     356             :   }
     357  1736420622 :   y[1] = evalsigne(1)|evallgefint(ny + 2);
     358  1736420622 :   y[0] = evaltyp(t_INT)|evallg(ny + 2); return y;
     359             : }
     360             : 
     361             : GEN
     362    49868100 : mantissa2nr(GEN x, long n)
     363             : { /*This is a kludge since x is not an integer*/
     364    49868100 :   long s = signe(x);
     365             :   GEN y;
     366             : 
     367    49868100 :   if(s == 0) return gen_0;
     368    49867185 :   y = shiftispec(x + 2, lg(x) - 2, n);
     369    49867185 :   if (signe(y)) setsigne(y, s);
     370    49867185 :   return y;
     371             : }
     372             : 
     373             : GEN
     374     2013009 : truncr(GEN x)
     375             : {
     376             :   long d,e,i,s,m;
     377             :   GEN y;
     378             : 
     379     2013009 :   if ((s=signe(x)) == 0 || (e=expo(x)) < 0) return gen_0;
     380     1012881 :   d = nbits2lg(e+1); m = remsBIL(e);
     381     1012881 :   if (d > lg(x)) pari_err_PREC( "truncr (precision loss in truncation)");
     382             : 
     383     1012878 :   y=cgeti(d); y[1] = evalsigne(s) | evallgefint(d);
     384     1012878 :   if (++m == BITS_IN_LONG)
     385         459 :     for (i=2; i<d; i++) y[i]=x[i];
     386             :   else
     387     1012662 :     shift_right(y,x, 2,d,0, BITS_IN_LONG - m);
     388     1012878 :   return y;
     389             : }
     390             : 
     391             : /* integral part */
     392             : GEN
     393     2139846 : floorr(GEN x)
     394             : {
     395             :   long d,e,i,lx,m;
     396             :   GEN y;
     397             : 
     398     2139846 :   if (signe(x) >= 0) return truncr(x);
     399      753099 :   if ((e=expo(x)) < 0) return gen_m1;
     400      264555 :   d = nbits2lg(e+1); m = remsBIL(e);
     401      264555 :   lx=lg(x); if (d>lx) pari_err_PREC( "floorr (precision loss in truncation)");
     402      264555 :   y = new_chunk(d);
     403      264555 :   if (++m == BITS_IN_LONG)
     404             :   {
     405        3252 :     for (i=2; i<d; i++) y[i]=x[i];
     406        3240 :     i=d; while (i<lx && !x[i]) i++;
     407        1596 :     if (i==lx) goto END;
     408             :   }
     409             :   else
     410             :   {
     411      262959 :     shift_right(y,x, 2,d,0, BITS_IN_LONG - m);
     412      262959 :     if (uel(x,d-1)<<m == 0)
     413             :     {
     414      360330 :       i=d; while (i<lx && !x[i]) i++;
     415       83100 :       if (i==lx) goto END;
     416             :     }
     417             :   }
     418             :   /* set y:=y+1 */
     419      211527 :   for (i=d-1; i>=2; i--) { uel(y,i)++; if (y[i]) goto END; }
     420           0 :   y=new_chunk(1); y[2]=1; d++;
     421      264555 : END:
     422      264555 :   y[1] = evalsigne(-1) | evallgefint(d);
     423      264555 :   y[0] = evaltyp(t_INT) | evallg(d); return y;
     424             : }
     425             : 
     426             : INLINE int
     427  5079133491 : cmpiispec(GEN x, GEN y, long lx, long ly)
     428             : {
     429             :   long i;
     430  5079133491 :   if (lx < ly) return -1;
     431  4793589981 :   if (lx > ly) return  1;
     432  4245405439 :   i = 0; while (i<lx && x[i]==y[i]) i++;
     433  3793666057 :   if (i==lx) return 0;
     434  3614650870 :   return (uel(x,i) > uel(y,i))? 1: -1;
     435             : }
     436             : 
     437             : INLINE int
     438   185485812 : equaliispec(GEN x, GEN y, long lx, long ly)
     439             : {
     440             :   long i;
     441   185485812 :   if (lx != ly) return 0;
     442   338517867 :   i = ly-1; while (i>=0 && x[i]==y[i]) i--;
     443   185419293 :   return i < 0;
     444             : }
     445             : 
     446             : /***********************************************************************/
     447             : /**                                                                   **/
     448             : /**                          MULTIPLICATION                           **/
     449             : /**                                                                   **/
     450             : /***********************************************************************/
     451             : /* assume ny > 0 */
     452             : INLINE GEN
     453  3564511182 : muluispec(ulong x, GEN y, long ny)
     454             : {
     455  3564511182 :   GEN yd, z = (GEN)avma;
     456  3564511182 :   long lz = ny+3;
     457             :   LOCAL_HIREMAINDER;
     458             : 
     459  3564511182 :   (void)new_chunk(lz);
     460  3564511182 :   yd = y + ny; *--z = mulll(x, *--yd);
     461 24395946618 :   while (yd > y) *--z = addmul(x,*--yd);
     462  3564511182 :   if (hiremainder) *--z = hiremainder; else lz--;
     463  3564511182 :   *--z = evalsigne(1) | evallgefint(lz);
     464  3564511182 :   *--z = evaltyp(t_INT) | evallg(lz);
     465  3564511182 :   return gc_const((pari_sp)z, z);
     466             : }
     467             : 
     468             : /* a + b*|Y| */
     469             : GEN
     470      593064 : addumului(ulong a, ulong b, GEN Y)
     471             : {
     472             :   GEN yd,y,z;
     473             :   long ny,lz;
     474             :   LOCAL_HIREMAINDER;
     475             :   LOCAL_OVERFLOW;
     476             : 
     477      593064 :   if (!b || !signe(Y)) return utoi(a);
     478             : 
     479      590241 :   y = LIMBS(Y); z = (GEN)avma;
     480      590241 :   ny = NLIMBS(Y);
     481      590241 :   lz = ny+3;
     482             : 
     483      590241 :   (void)new_chunk(lz);
     484      590241 :   yd = y + ny; *--z = addll(a, mulll(b, *--yd));
     485      590241 :   if (overflow) hiremainder++; /* can't overflow */
     486      590241 :   while (yd > y) *--z = addmul(b,*--yd);
     487      590241 :   if (hiremainder) *--z = hiremainder; else lz--;
     488      590241 :   *--z = evalsigne(1) | evallgefint(lz);
     489      590241 :   *--z = evaltyp(t_INT) | evallg(lz);
     490      590241 :   return gc_const((pari_sp)z, z);
     491             : }
     492             : 
     493             : /***********************************************************************/
     494             : /**                                                                   **/
     495             : /**                          DIVISION                                 **/
     496             : /**                                                                   **/
     497             : /***********************************************************************/
     498             : 
     499             : ulong
     500  1317055011 : umodiu(GEN y, ulong x)
     501             : {
     502  1317055011 :   long sy=signe(y),ly,i;
     503             :   ulong xi;
     504             :   LOCAL_HIREMAINDER;
     505             : 
     506  1317055011 :   if (!x) pari_err_INV("umodiu",gen_0);
     507  1317055011 :   if (!sy) return 0;
     508  1060278729 :   ly = lgefint(y);
     509  1060278729 :   if (x <= uel(y,2))
     510             :   {
     511   330590706 :     hiremainder=0;
     512   330590706 :     if (ly==3)
     513             :     {
     514   306387015 :       hiremainder=uel(y,2)%x;
     515   306387015 :       if (!hiremainder) return 0;
     516   258302613 :       return (sy > 0)? hiremainder: x - hiremainder;
     517             :     }
     518             :   }
     519             :   else
     520             :   {
     521   729688023 :     if (ly==3) return (sy > 0)? uel(y,2): x - uel(y,2);
     522    92149977 :     hiremainder=uel(y,2); ly--; y++;
     523             :   }
     524   116353668 :   xi = get_Fl_red(x);
     525   534742608 :   for (i=2; i<ly; i++) (void)divll_pre(y[i],x,xi);
     526   116353668 :   if (!hiremainder) return 0;
     527   111056367 :   return (sy > 0)? hiremainder: x - hiremainder;
     528             : }
     529             : 
     530             : /* return |y| \/ x */
     531             : GEN
     532   313337847 : absdiviu_rem(GEN y, ulong x, ulong *rem)
     533             : {
     534             :   long ly,i;
     535             :   GEN z;
     536             :   ulong xi;
     537             :   LOCAL_HIREMAINDER;
     538             : 
     539   313337847 :   if (!x) pari_err_INV("absdiviu_rem",gen_0);
     540   313337847 :   if (!signe(y)) { *rem = 0; return gen_0; }
     541             : 
     542   299546850 :   ly = lgefint(y);
     543   299546850 :   if (x <= uel(y,2))
     544             :   {
     545   236874363 :     hiremainder=0;
     546   236874363 :     if (ly==3)
     547             :     {
     548   123261060 :       z = cgetipos(3);
     549   123261060 :       z[2] = divll(uel(y,2),x);
     550   123261060 :       *rem = hiremainder; return z;
     551             :     }
     552             :   }
     553             :   else
     554             :   {
     555    62672487 :     if (ly==3) { *rem = uel(y,2); return gen_0; }
     556    44539827 :     hiremainder = uel(y,2); ly--; y++;
     557             :   }
     558   158153130 :   xi = get_Fl_red(x);
     559   158153130 :   z = cgetipos(ly);
     560  1300526394 :   for (i=2; i<ly; i++) z[i]=divll_pre(y[i],x,xi);
     561   158153130 :   *rem = hiremainder; return z;
     562             : }
     563             : 
     564             : GEN
     565    65816778 : divis_rem(GEN y, long x, long *rem)
     566             : {
     567    65816778 :   long sy=signe(y),ly,s,i;
     568             :   GEN z;
     569             :   ulong xi;
     570             :   LOCAL_HIREMAINDER;
     571             : 
     572    65816778 :   if (!x) pari_err_INV("divis_rem",gen_0);
     573    65816778 :   if (!sy) { *rem=0; return gen_0; }
     574    48366783 :   if (x<0) { s = -sy; x = -x; } else s = sy;
     575             : 
     576    48366783 :   ly = lgefint(y);
     577    48366783 :   if ((ulong)x <= uel(y,2))
     578             :   {
     579    33614742 :     hiremainder=0;
     580    33614742 :     if (ly==3)
     581             :     {
     582    33343968 :       z = cgeti(3); z[1] = evallgefint(3) | evalsigne(s);
     583    33343968 :       z[2] = divll(uel(y,2),x);
     584    33343968 :       if (sy<0) hiremainder = - ((long)hiremainder);
     585    33343968 :       *rem = (long)hiremainder; return z;
     586             :     }
     587             :   }
     588             :   else
     589             :   {
     590    14752041 :     if (ly==3) { *rem = itos(y); return gen_0; }
     591      233418 :     hiremainder = uel(y,2); ly--; y++;
     592             :   }
     593      504192 :   xi = get_Fl_red(x);
     594      504192 :   z = cgeti(ly); z[1] = evallgefint(ly) | evalsigne(s);
     595     3523173 :   for (i=2; i<ly; i++) z[i]=divll_pre(y[i],x,xi);
     596      504192 :   if (sy<0) hiremainder = - ((long)hiremainder);
     597      504192 :   *rem = (long)hiremainder; return z;
     598             : }
     599             : 
     600             : GEN
     601      673254 : divis(GEN y, long x)
     602             : {
     603      673254 :   long sy=signe(y),ly,s,i;
     604             :   ulong xi;
     605             :   GEN z;
     606             :   LOCAL_HIREMAINDER;
     607             : 
     608      673254 :   if (!x) pari_err_INV("divis",gen_0);
     609      673254 :   if (!sy) return gen_0;
     610      673221 :   if (x<0) { s = -sy; x = -x; } else s = sy;
     611             : 
     612      673221 :   ly = lgefint(y);
     613      673221 :   if ((ulong)x <= uel(y,2))
     614             :   {
     615      663381 :     hiremainder=0;
     616      663381 :     if (ly==3)
     617             :     {
     618      597111 :       z = cgeti(3); z[1] = evallgefint(3) | evalsigne(s);
     619      597111 :       z[2] = divll(y[2],x);
     620      597111 :       return z;
     621             :     }
     622             :   }
     623             :   else
     624             :   {
     625        9840 :     if (ly==3) return gen_0;
     626        9840 :     hiremainder=y[2]; ly--; y++;
     627             :   }
     628       76110 :   xi = get_Fl_red(x);
     629       76110 :   z = cgeti(ly); z[1] = evallgefint(ly) | evalsigne(s);
     630      576426 :   for (i=2; i<ly; i++) z[i]=divll_pre(y[i],x, xi);
     631       76110 :   return z;
     632             : }
     633             : 
     634             : GEN
     635    92725950 : divrr(GEN x, GEN y)
     636             : {
     637    92725950 :   long sx=signe(x), sy=signe(y), lx,ly,lr,e,i,j;
     638             :   ulong y0,y1;
     639             :   GEN r, r1;
     640             : 
     641    92725950 :   if (!sy) pari_err_INV("divrr",y);
     642    92725950 :   e = expo(x) - expo(y);
     643    92725950 :   if (!sx) return real_0_bit(e);
     644    92446965 :   if (sy<0) sx = -sx;
     645             : 
     646    92446965 :   lx=lg(x); ly=lg(y);
     647    92446965 :   if (ly==3)
     648             :   {
     649    17448747 :     ulong k = x[2], l = (lx>3)? x[3]: 0;
     650             :     LOCAL_HIREMAINDER;
     651    17448747 :     if (k < uel(y,2)) e--;
     652             :     else
     653             :     {
     654     6228054 :       l >>= 1; if (k&1) l |= HIGHBIT;
     655     6228054 :       k >>= 1;
     656             :     }
     657    17448747 :     hiremainder = k; k = divll(l,y[2]);
     658    17448747 :     if (hiremainder > (uel(y,2) >> 1) && !++k) { k = HIGHBIT; e++; }
     659    17448747 :     r = cgetr(3);
     660    17448747 :     r[1] = evalsigne(sx) | evalexpo(e);
     661    17448747 :     r[2] = k; return r;
     662             :   }
     663             : 
     664    74998218 :   lr = minss(lx,ly); r = new_chunk(lr);
     665    74998218 :   r1 = r-1;
     666   535149324 :   r1[1] = 0; for (i=2; i<lr; i++) r1[i]=x[i];
     667    74998218 :   r1[lr] = (lx>ly)? x[lr]: 0;
     668    74998218 :   y0 = y[2]; y1 = y[3];
     669   610147542 :   for (i=0; i<lr-1; i++)
     670             :   { /* r1 = r + (i-1), OK up to r1[2] (accesses at most r[lr]) */
     671             :     ulong k, qp;
     672             :     LOCAL_HIREMAINDER;
     673             :     LOCAL_OVERFLOW;
     674             : 
     675   535149324 :     if (uel(r1,1) == y0) { qp = ULONG_MAX; k = addll(y0,r1[2]); }
     676             :     else
     677             :     {
     678   533636472 :       if (uel(r1,1) > y0) /* can't happen if i=0 */
     679             :       {
     680           0 :         GEN y1 = y+1;
     681           0 :         j = lr-i; r1[j] = subll(r1[j],y1[j]);
     682           0 :         for (j--; j>0; j--) r1[j] = subllx(r1[j],y1[j]);
     683           0 :         j=i; do uel(r,--j)++; while (j && !uel(r,j));
     684             :       }
     685   533636472 :       hiremainder = r1[1]; overflow = 0;
     686   533636472 :       qp = divll(r1[2],y0); k = hiremainder;
     687             :     }
     688   535149324 :     j = lr-i+1;
     689   535149324 :     if (!overflow)
     690             :     {
     691             :       long k3, k4;
     692   533876550 :       k3 = mulll(qp,y1);
     693   533876550 :       if (j == 3) /* i = lr - 2 maximal, r1[3] undefined -> 0 */
     694    74916333 :         k4 = subll(hiremainder,k);
     695             :       else
     696             :       {
     697   458960217 :         k3 = subll(k3, r1[3]);
     698   458960217 :         k4 = subllx(hiremainder,k);
     699             :       }
     700   694496230 :       while (!overflow && k4) { qp--; k3 = subll(k3,y1); k4 = subllx(k4,y0); }
     701             :     }
     702   535149324 :     if (j<ly) (void)mulll(qp,y[j]); else { hiremainder = 0 ; j = ly; }
     703  3693433581 :     for (j--; j>1; j--)
     704             :     {
     705  3158284257 :       r1[j] = subll(r1[j], addmul(qp,y[j]));
     706  3158284257 :       hiremainder += overflow;
     707             :     }
     708   535149324 :     if (uel(r1,1) != hiremainder)
     709             :     {
     710      536991 :       if (uel(r1,1) < hiremainder)
     711             :       {
     712      536991 :         qp--;
     713      536991 :         j = lr-i-(lr-i>=ly); r1[j] = addll(r1[j], y[j]);
     714     3020025 :         for (j--; j>1; j--) r1[j] = addllx(r1[j], y[j]);
     715             :       }
     716             :       else
     717             :       {
     718           0 :         r1[1] -= hiremainder;
     719           0 :         while (r1[1])
     720             :         {
     721           0 :           qp++; if (!qp) { j=i; do uel(r,--j)++; while (j && !r[j]); }
     722           0 :           j = lr-i-(lr-i>=ly); r1[j] = subll(r1[j],y[j]);
     723           0 :           for (j--; j>1; j--) r1[j] = subllx(r1[j],y[j]);
     724           0 :           r1[1] -= overflow;
     725             :         }
     726             :       }
     727             :     }
     728   535149324 :     *++r1 = qp;
     729             :   }
     730             :   /* i = lr-1 */
     731             :   /* round correctly */
     732    74998218 :   if (uel(r1,1) > (y0>>1))
     733             :   {
     734    35947986 :     j=i; do uel(r,--j)++; while (j && !r[j]);
     735             :   }
     736   535149324 :   r1 = r-1; for (j=i; j>=2; j--) r[j]=r1[j];
     737    74998218 :   if (r[0] == 0) e--;
     738    39383196 :   else if (r[0] == 1) { shift_right(r,r, 2,lr, 1,1); }
     739             :   else { /* possible only when rounding up to 0x2 0x0 ... */
     740           0 :     r[2] = (long)HIGHBIT; e++;
     741             :   }
     742    74998218 :   r[0] = evaltyp(t_REAL)|evallg(lr);
     743    74998218 :   r[1] = evalsigne(sx) | evalexpo(e);
     744    74998218 :   return r;
     745             : }
     746             : 
     747             : GEN
     748    88771803 : divri(GEN x, GEN y)
     749             : {
     750    88771803 :   long lx, s = signe(y);
     751             :   pari_sp av;
     752             :   GEN z;
     753             : 
     754    88771803 :   if (!s) pari_err_INV("divri",y);
     755    88771803 :   if (!signe(x)) return real_0_bit(expo(x) - expi(y));
     756    88609554 :   if (!is_bigint(y)) {
     757    77347704 :     GEN z = divru(x, y[2]);
     758    77347704 :     if (s < 0) togglesign(z);
     759    77347704 :     return z;
     760             :   }
     761    11261850 :   lx = lg(x); z = cgetr(lx); av = avma;
     762    11261850 :   affrr(divrr(x, itor(y, lx+1)), z);
     763    11261850 :   return gc_const(av, z);
     764             : }
     765             : 
     766             : /* Integer division x / y: such that sign(r) = sign(x)
     767             :  *   if z = ONLY_REM return remainder, otherwise return quotient
     768             :  *   if z != NULL set *z to remainder
     769             :  *   *z is the last object on stack (and thus can be disposed of with cgiv
     770             :  *   instead of gerepile)
     771             :  * If *z is zero, we put gen_0 here and no copy.
     772             :  * space needed: lx + ly */
     773             : GEN
     774  1198110708 : dvmdii(GEN x, GEN y, GEN *z)
     775             : {
     776  1198110708 :   long sx = signe(x), sy = signe(y);
     777  1198110708 :   long lx, ly = lgefint(y), lz, i, j, sh, lq, lr;
     778             :   pari_sp av;
     779             :   ulong y0,y0i,y1, *xd,*rd,*qd;
     780             :   GEN q, r, r1;
     781             : 
     782  1198110708 :   if (!sx)
     783             :   {
     784    42530403 :     if (ly < 3) pari_err_INV("dvmdii",gen_0);
     785    42530400 :     if (!z || z == ONLY_REM) return gen_0;
     786    16368942 :     *z=gen_0; return gen_0;
     787             :   }
     788  1155580305 :   if (ly <= 3)
     789             :   {
     790             :     ulong rem;
     791   562753872 :     if (ly < 3) pari_err_INV("dvmdii",gen_0);
     792   562753872 :     if (z == ONLY_REM)
     793             :     {
     794   443685366 :       rem = umodiu(x,uel(y,2));
     795   443685366 :       if (!rem) return gen_0;
     796   401066283 :       return (sx < 0)? utoineg(uel(y,2) - rem): utoipos(rem);
     797             :     }
     798   119068506 :     q = absdiviu_rem(x, uel(y,2), &rem);
     799   119068506 :     if (sx != sy) togglesign(q);
     800   119068506 :     if (!z) return q;
     801   116108091 :     if (!rem) *z = gen_0;
     802    46289514 :     else *z = sx < 0? utoineg(rem): utoipos(rem);
     803   116108091 :     return q;
     804             :   }
     805   592826433 :   lx=lgefint(x);
     806   592826433 :   lz=lx-ly;
     807   592826433 :   if (lz <= 0)
     808             :   {
     809   273998880 :     if (lz == 0)
     810             :     {
     811   249873510 :       for (i=2; i<lx; i++)
     812   249337683 :         if (x[i] != y[i])
     813             :         {
     814   233808939 :           if (uel(x,i) > uel(y,i)) goto DIVIDE;
     815    55915533 :           goto TRIVIAL;
     816             :         }
     817      535827 :       if (z == ONLY_REM) return gen_0;
     818       59049 :       if (z) *z = gen_0;
     819       59049 :       if (sx < 0) sy = -sy;
     820       59049 :       return stoi(sy);
     821             :     }
     822    39654114 : TRIVIAL:
     823    95569647 :     if (z == ONLY_REM) return icopy(x);
     824     1334826 :     if (z) *z = icopy(x);
     825     1334826 :     return gen_0;
     826             :   }
     827   318827553 : DIVIDE: /* quotient is nonzero */
     828   496720959 :   av=avma; if (sx<0) sy = -sy;
     829   496720959 :   r1 = new_chunk(lx); sh = bfffo(y[2]);
     830   496720959 :   if (sh)
     831             :   { /* normalize so that highbit(y) = 1 (shift left x and y by sh bits)*/
     832   487782720 :     const ulong m = BITS_IN_LONG - sh;
     833   487782720 :     r = new_chunk(ly);
     834   487782720 :     shift_left(r, y,2,ly-1, 0,sh); y = r;
     835   487782720 :     shift_left(r1,x,2,lx-1, 0,sh);
     836   487782720 :     r1[1] = uel(x,2) >> m;
     837             :   }
     838             :   else
     839             :   {
     840    99150738 :     r1[1] = 0; for (j=2; j<lx; j++) r1[j] = x[j];
     841             :   }
     842   496720959 :   x = r1;
     843   496720959 :   y0 = y[2]; y0i = get_Fl_red(y0);
     844   496720959 :   y1 = y[3];
     845  2454278748 :   for (i=0; i<=lz; i++)
     846             :   { /* r1 = x + i */
     847             :     ulong k, qp;
     848             :     LOCAL_HIREMAINDER;
     849             :     LOCAL_OVERFLOW;
     850             : 
     851  1957557789 :     if (uel(r1,1) == y0)
     852             :     {
     853       22377 :       qp = ULONG_MAX; k = addll(y0,r1[2]);
     854             :     }
     855             :     else
     856             :     {
     857  1957535412 :       hiremainder = r1[1]; overflow = 0;
     858  1957535412 :       qp = divll_pre(r1[2],y0,y0i); k = hiremainder;
     859             :     }
     860  1957557789 :     if (!overflow)
     861             :     {
     862  1957536468 :       long k3 = subll(mulll(qp,y1), r1[3]);
     863  1957536468 :       long k4 = subllx(hiremainder,k);
     864  2363244425 :       while (!overflow && k4) { qp--; k3 = subll(k3,y1); k4 = subllx(k4,y0); }
     865             :     }
     866  1957557789 :     hiremainder = 0; j = ly;
     867 >34007*10^7 :     for (j--; j>1; j--)
     868             :     {
     869 >33811*10^7 :       r1[j] = subll(r1[j], addmul(qp,y[j]));
     870 >33811*10^7 :       hiremainder += overflow;
     871             :     }
     872  1957557789 :     if (uel(r1,1) < hiremainder)
     873             :     {
     874     5675537 :       qp--;
     875     5675537 :       j = ly-1; r1[j] = addll(r1[j],y[j]);
     876    51914685 :       for (j--; j>1; j--) r1[j] = addllx(r1[j],y[j]);
     877             :     }
     878  1957557789 :     *++r1 = qp;
     879             :   }
     880             : 
     881   496720959 :   lq = lz+2;
     882   496720959 :   if (!z)
     883             :   {
     884     2252721 :     qd = (ulong*)av;
     885     2252721 :     xd = (ulong*)(x + lq);
     886     2252721 :     if (x[1]) { lz++; lq++; }
     887    31044003 :     while (lz--) *--qd = *--xd;
     888     2252721 :     *--qd = evalsigne(sy) | evallgefint(lq);
     889     2252721 :     *--qd = evaltyp(t_INT) | evallg(lq);
     890     2252721 :     return gc_const((pari_sp)qd, (GEN)qd);
     891             :   }
     892             : 
     893   596583723 :   j=lq; while (j<lx && !x[j]) j++;
     894   494468238 :   lz = lx-j;
     895   494468238 :   if (z == ONLY_REM)
     896             :   {
     897   249929214 :     if (lz==0) return gc_const(av, gen_0);
     898   241559133 :     rd = (ulong*)av; lr = lz+2;
     899   241559133 :     xd = (ulong*)(x + lx);
     900   280967823 :     if (!sh) while (lz--) *--rd = *--xd;
     901             :     else
     902             :     { /* shift remainder right by sh bits */
     903   233658813 :       const ulong shl = BITS_IN_LONG - sh;
     904             :       ulong l;
     905   233658813 :       xd--;
     906  1108562931 :       while (--lz) /* fill r[3..] */
     907             :       {
     908   874904118 :         l = *xd >> sh;
     909   874904118 :         *--rd = l | (*--xd << shl);
     910             :       }
     911   233658813 :       l = *xd >> sh;
     912   233658813 :       if (l) *--rd = l; else lr--;
     913             :     }
     914   241559133 :     *--rd = evalsigne(sx) | evallgefint(lr);
     915   241559133 :     *--rd = evaltyp(t_INT) | evallg(lr);
     916   241559133 :     return gc_const((pari_sp)rd, (GEN)rd);
     917             :   }
     918             : 
     919   244539024 :   lr = lz+2;
     920   244539024 :   rd = NULL; /* gcc -Wall */
     921   244539024 :   if (lz)
     922             :   { /* non zero remainder: initialize rd */
     923   242209737 :     xd = (ulong*)(x + lx);
     924   242209737 :     if (!sh)
     925             :     {
     926      882465 :       rd = (ulong*)avma; (void)new_chunk(lr);
     927     3706905 :       while (lz--) *--rd = *--xd;
     928             :     }
     929             :     else
     930             :     { /* shift remainder right by sh bits */
     931   241327272 :       const ulong shl = BITS_IN_LONG - sh;
     932             :       ulong l;
     933   241327272 :       rd = (ulong*)x; /* overwrite shifted y */
     934   241327272 :       xd--;
     935   955334874 :       while (--lz)
     936             :       {
     937   714007602 :         l = *xd >> sh;
     938   714007602 :         *--rd = l | (*--xd << shl);
     939             :       }
     940   241327272 :       l = *xd >> sh;
     941   241327272 :       if (l) *--rd = l; else lr--;
     942             :     }
     943   242209737 :     *--rd = evalsigne(sx) | evallgefint(lr);
     944   242209737 :     *--rd = evaltyp(t_INT) | evallg(lr);
     945   242209737 :     rd += lr;
     946             :   }
     947   244539024 :   qd = (ulong*)av;
     948   244539024 :   xd = (ulong*)(x + lq);
     949   244539024 :   if (x[1]) lq++;
     950   808585773 :   j = lq-2; while (j--) *--qd = *--xd;
     951   244539024 :   *--qd = evalsigne(sy) | evallgefint(lq);
     952   244539024 :   *--qd = evaltyp(t_INT) | evallg(lq);
     953   244539024 :   q = (GEN)qd;
     954   244539024 :   if (lr==2) *z = gen_0;
     955             :   else
     956             :   { /* rd has been properly initialized: we had lz > 0 */
     957  1543187644 :     while (lr--) *--qd = *--rd;
     958   242209737 :     *z = (GEN)qd;
     959             :   }
     960   244539024 :   return gc_const((pari_sp)qd, q);
     961             : }
     962             : 
     963             : /* Montgomery reduction.
     964             :  * N has k words, assume T >= 0 has less than 2k.
     965             :  * Return res := T / B^k mod N, where B = 2^BIL
     966             :  * such that 0 <= res < T/B^k + N  and  res has less than k words */
     967             : GEN
     968    28607937 : red_montgomery(GEN T, GEN N, ulong inv)
     969             : {
     970             :   pari_sp av;
     971             :   GEN Te, Td, Ne, Nd, scratch;
     972    28607937 :   ulong i, j, m, t, d, k = NLIMBS(N);
     973             :   int carry;
     974             :   LOCAL_HIREMAINDER;
     975             :   LOCAL_OVERFLOW;
     976             : 
     977    28607937 :   if (k == 0) return gen_0;
     978    28607937 :   d = NLIMBS(T); /* <= 2*k */
     979    28607937 :   if (d == 0) return gen_0;
     980             : #ifdef DEBUG
     981             :   if (d > 2*k) pari_err_BUG("red_montgomery");
     982             : #endif
     983    28607928 :   if (k == 1)
     984             :   { /* as below, special cased for efficiency */
     985       66303 :     ulong n = uel(N,2);
     986       66303 :     if (d == 1) {
     987       66231 :       hiremainder = uel(T,2);
     988       66231 :       m = hiremainder * inv;
     989       66231 :       (void)addmul(m, n); /* t + m*n = 0 */
     990       66231 :       return utoi(hiremainder);
     991             :     } else { /* d = 2 */
     992          72 :       hiremainder = uel(T,3);
     993          72 :       m = hiremainder * inv;
     994          72 :       (void)addmul(m, n); /* t + m*n = 0 */
     995          72 :       t = addll(hiremainder, uel(T,2));
     996          72 :       if (overflow) t -= n; /* t > n doesn't fit in 1 word */
     997          72 :       return utoi(t);
     998             :     }
     999             :   }
    1000             :   /* assume k >= 2 */
    1001    28541625 :   av = avma; scratch = new_chunk(k<<1); /* >= k + 2: result fits */
    1002             : 
    1003             :   /* copy T to scratch space (pad with zeroes to 2k words) */
    1004    28541625 :   Td = (GEN)av;
    1005    28541625 :   Te = T + (d+2);
    1006   685128636 :   for (i=0; i < d     ; i++) *--Td = *--Te;
    1007    52074348 :   for (   ; i < (k<<1); i++) *--Td = 0;
    1008             : 
    1009    28541625 :   Te = (GEN)av; /* 1 beyond end of current T mantissa (in scratch) */
    1010    28541625 :   Ne = N + k+2; /* 1 beyond end of N mantissa */
    1011             : 
    1012    28541625 :   carry = 0;
    1013   368601492 :   for (i=0; i<k; i++) /* set T := T/B nod N, k times */
    1014             :   {
    1015   340059867 :     Td = Te; /* one beyond end of (new) T mantissa */
    1016   340059867 :     Nd = Ne;
    1017   340059867 :     hiremainder = *--Td;
    1018   340059867 :     m = hiremainder * inv; /* solve T + m N = O(B) */
    1019             : 
    1020             :     /* set T := (T + mN) / B */
    1021   340059867 :     Te = Td;
    1022   340059867 :     (void)addmul(m, *--Nd); /* = 0 */
    1023  6108522099 :     for (j=1; j<k; j++)
    1024             :     {
    1025  5768462232 :       t = addll(addmul(m, *--Nd), *--Td);
    1026  5768462232 :       *Td = t;
    1027  5768462232 :       hiremainder += overflow;
    1028             :     }
    1029   340059867 :     t = addll(hiremainder, *--Td); *Td = t + carry;
    1030   340059867 :     carry = (overflow || (carry && *Td == 0));
    1031             :   }
    1032    28541625 :   if (carry)
    1033             :   { /* Td > N overflows (k+1 words), set Td := Td - N */
    1034      354465 :     Td = Te;
    1035      354465 :     Nd = Ne;
    1036      354465 :     t = subll(*--Td, *--Nd); *Td = t;
    1037     6746328 :     while (Td > scratch) { t = subllx(*--Td, *--Nd); *Td = t; }
    1038             :   }
    1039             : 
    1040             :   /* copy result */
    1041    28541625 :   Td = (GEN)av;
    1042    32333385 :   while (*scratch == 0 && Te > scratch) scratch++; /* strip leading 0s */
    1043   364809732 :   while (Te > scratch) *--Td = *--Te;
    1044    28541625 :   k = (GEN)av - Td; if (!k) return gc_const(av, gen_0);
    1045    28541625 :   k += 2;
    1046    28541625 :   *--Td = evalsigne(1) | evallgefint(k);
    1047    28541625 :   *--Td = evaltyp(t_INT) | evallg(k);
    1048             : #ifdef DEBUG
    1049             : {
    1050             :   long l = NLIMBS(N), s = BITS_IN_LONG*l;
    1051             :   GEN R = int2n(s);
    1052             :   GEN res = remii(mulii(T, Fp_inv(R, N)), N);
    1053             :   if (k > lgefint(N)
    1054             :     || !equalii(remii(Td,N),res)
    1055             :     || cmpii(Td, addii(shifti(T, -s), N)) >= 0) pari_err_BUG("red_montgomery");
    1056             : }
    1057             : #endif
    1058    28541625 :   return gc_const((pari_sp)Td, Td);
    1059             : }
    1060             : 
    1061             : /* EXACT INTEGER DIVISION */
    1062             : 
    1063             : /* assume xy>0, the division is exact and y is odd. Destroy x */
    1064             : static GEN
    1065    33506139 : diviuexact_i(GEN x, ulong y)
    1066             : {
    1067             :   long i, lz, lx;
    1068             :   ulong q, yinv;
    1069             :   GEN z, z0, x0, x0min;
    1070             : 
    1071    33506139 :   if (y == 1) return icopy(x);
    1072    28198938 :   lx = lgefint(x);
    1073    28198938 :   if (lx == 3)
    1074             :   {
    1075      756585 :     q = uel(x,2) / y;
    1076      756585 :     if (!q) pari_err_OP("exact division", x, utoi(y));
    1077      756585 :     return utoipos(q);
    1078             :   }
    1079    27442353 :   yinv = invmod2BIL(y);
    1080    27442353 :   lz = (y <= uel(x,2)) ? lx : lx-1;
    1081    27442353 :   z = new_chunk(lz);
    1082    27442353 :   z0 = z + lz;
    1083    27442353 :   x0 = x + lx; x0min = x + lx-lz+2;
    1084             : 
    1085    89642124 :   while (x0 > x0min)
    1086             :   {
    1087    62199771 :     *--z0 = q = yinv*uel(--x0,0); /* i-th quotient */
    1088    62199771 :     if (!q) continue;
    1089             :     /* x := x - q * y */
    1090             :     { /* update neither lowest word (could set it to 0) nor highest ones */
    1091    61703166 :       GEN x1 = x0 - 1;
    1092             :       LOCAL_HIREMAINDER;
    1093    61703166 :       (void)mulll(q,y);
    1094    61703166 :       if (hiremainder)
    1095             :       {
    1096    51309312 :         if (uel(x1,0) < hiremainder)
    1097             :         {
    1098      106293 :           uel(x1,0) -= hiremainder;
    1099      108237 :           do uel(--x1,0)--; while (uel(x1,0) == ULONG_MAX);
    1100             :         }
    1101             :         else
    1102    51203019 :           uel(x1,0) -= hiremainder;
    1103             :       }
    1104             :     }
    1105             :   }
    1106    27442353 :   i=2; while(!z[i]) i++;
    1107    27442353 :   z += i-2; lz -= i-2;
    1108    27442353 :   z[0] = evaltyp(t_INT)|evallg(lz);
    1109    27442353 :   z[1] = evalsigne(1)|evallg(lz);
    1110    27442353 :   if (lz == 2) pari_err_OP("exact division", x, utoi(y));
    1111    27442353 :   return gc_const((pari_sp)z, z);
    1112             : }
    1113             : 
    1114             : /* assume y != 0 and the division is exact */
    1115             : GEN
    1116    20507766 : diviuexact(GEN x, ulong y)
    1117             : {
    1118             :   pari_sp av;
    1119    20507766 :   long lx, vy, s = signe(x);
    1120             :   GEN z;
    1121             : 
    1122    20507766 :   if (!s) return gen_0;
    1123    19670802 :   if (y == 1) return icopy(x);
    1124    16693401 :   lx = lgefint(x);
    1125    16693401 :   if (lx == 3) {
    1126    13115886 :     ulong q = uel(x,2) / y;
    1127    13115886 :     if (!q) pari_err_OP("exact division", x, utoi(y));
    1128    13115886 :     return (s > 0)? utoipos(q): utoineg(q);
    1129             :   }
    1130     3577515 :   av = avma; (void)new_chunk(lx); vy = vals(y);
    1131     3577515 :   if (vy) {
    1132     1426551 :     y >>= vy;
    1133     1426551 :     if (y == 1) { set_avma(av); return shifti(x, -vy); }
    1134      726873 :     x = shifti(x, -vy);
    1135      726873 :     if (lx == 3) {
    1136           0 :       ulong q = uel(x,2) / y;
    1137           0 :       set_avma(av);
    1138           0 :       if (!q) pari_err_OP("exact division", x, utoi(y));
    1139           0 :       return (s > 0)? utoipos(q): utoineg(q);
    1140             :     }
    1141     2150964 :   } else x = icopy(x);
    1142     2877837 :   set_avma(av);
    1143     2877837 :   z = diviuexact_i(x, y);
    1144     2877837 :   setsigne(z, s); return z;
    1145             : }
    1146             : 
    1147             : /* Find z such that x=y*z, knowing that y | x (unchecked)
    1148             :  * Method: y0 z0 = x0 mod B = 2^BITS_IN_LONG ==> z0 = 1/y0 mod B.
    1149             :  *    Set x := (x - z0 y) / B, updating only relevant words, and repeat */
    1150             : GEN
    1151   302512200 : diviiexact(GEN x, GEN y)
    1152             : {
    1153   302512200 :   long lx, ly, lz, vy, i, ii, sx = signe(x), sy = signe(y);
    1154             :   pari_sp av;
    1155             :   ulong y0inv,q;
    1156             :   GEN z;
    1157             : 
    1158   302512200 :   if (!sy) pari_err_INV("diviiexact",gen_0);
    1159   302512200 :   if (!sx) return gen_0;
    1160   241077732 :   lx = lgefint(x);
    1161   241077732 :   if (lx == 3) {
    1162   190001511 :     q = uel(x,2) / uel(y,2);
    1163   190001511 :     if (!q) pari_err_OP("exact division", x, y);
    1164   190001511 :     return (sx+sy) ? utoipos(q): utoineg(q);
    1165             :   }
    1166    51076221 :   vy = vali(y); av = avma;
    1167    51076221 :   (void)new_chunk(lx); /* enough room for z */
    1168    51076221 :   if (vy)
    1169             :   { /* make y odd */
    1170    22918695 :     y = shifti(y,-vy);
    1171    22918695 :     x = shifti(x,-vy); lx = lgefint(x);
    1172             :   }
    1173    28157526 :   else x = icopy(x); /* necessary because we destroy x */
    1174    51076221 :   set_avma(av); /* will erase our x,y when exiting */
    1175             :   /* now y is odd */
    1176    51076221 :   ly = lgefint(y);
    1177    51076221 :   if (ly == 3)
    1178             :   {
    1179    30628302 :     z = diviuexact_i(x,uel(y,2)); /* x != 0 */
    1180    30628302 :     setsigne(z, (sx+sy)? 1: -1); return z;
    1181             :   }
    1182    20447919 :   y0inv = invmod2BIL(y[ly-1]);
    1183    39820593 :   i=2; while (i<ly && y[i]==x[i]) i++;
    1184    20447919 :   lz = (i==ly || uel(y,i) < uel(x,i)) ? lx-ly+3 : lx-ly+2;
    1185    20447919 :   z = new_chunk(lz);
    1186             : 
    1187    20447919 :   y += ly - 1; /* now y[-i] = i-th word of y */
    1188   132369039 :   for (ii=lx-1,i=lz-1; i>=2; i--,ii--)
    1189             :   {
    1190             :     long limj;
    1191             :     LOCAL_HIREMAINDER;
    1192             :     LOCAL_OVERFLOW;
    1193             : 
    1194   111921120 :     z[i] = q = y0inv*uel(x,ii); /* i-th quotient */
    1195   111921120 :     if (!q) continue;
    1196             : 
    1197             :     /* x := x - q * y */
    1198   111838173 :     (void)mulll(q,y[0]); limj = maxss(lx - lz, ii+3-ly);
    1199             :     { /* update neither lowest word (could set it to 0) nor highest ones */
    1200   111838173 :       GEN x0 = x + (ii - 1), y0 = y - 1, xlim = x + limj;
    1201 14008343754 :       for (; x0 >= xlim; x0--, y0--)
    1202             :       {
    1203 13896505581 :         *x0 = subll(*x0, addmul(q,*y0));
    1204 13896505581 :         hiremainder += overflow;
    1205             :       }
    1206   111838173 :       if (hiremainder && limj != lx - lz)
    1207             :       {
    1208    51678024 :         if ((ulong)*x0 < hiremainder)
    1209             :         {
    1210      691428 :           *x0 -= hiremainder;
    1211      691428 :           do (*--x0)--; while ((ulong)*x0 == ULONG_MAX);
    1212             :         }
    1213             :         else
    1214    50986596 :           *x0 -= hiremainder;
    1215             :       }
    1216             :     }
    1217             :   }
    1218    20447919 :   i=2; while(!z[i]) i++;
    1219    20447919 :   z += i-2; lz -= (i-2);
    1220    20447919 :   z[0] = evaltyp(t_INT)|evallg(lz);
    1221    20447919 :   z[1] = evalsigne((sx+sy)? 1: -1) | evallg(lz);
    1222    20447919 :   if (lz == 2) pari_err_OP("exact division", x, y);
    1223    20447919 :   return gc_const((pari_sp)z, z);
    1224             : }
    1225             : 
    1226             : /* assume yz != and yz | x */
    1227             : GEN
    1228      147606 : diviuuexact(GEN x, ulong y, ulong z)
    1229             : {
    1230             :   long tmp[4];
    1231             :   ulong t;
    1232             :   LOCAL_HIREMAINDER;
    1233      147606 :   t = mulll(y, z);
    1234      147606 :   if (!hiremainder) return diviuexact(x, t);
    1235           0 :   tmp[0] = evaltyp(t_INT)|_evallg(4);
    1236           0 :   tmp[1] = evalsigne(1)|evallgefint(4);
    1237           0 :   tmp[2] = hiremainder;
    1238           0 :   tmp[3] = t;
    1239           0 :   return diviiexact(x, tmp);
    1240             : }
    1241             : 
    1242             : /********************************************************************/
    1243             : /**                                                                **/
    1244             : /**               INTEGER MULTIPLICATION (BASECASE)                **/
    1245             : /**                                                                **/
    1246             : /********************************************************************/
    1247             : /* nx >= ny = num. of digits of x, y (not GEN, see mulii) */
    1248             : INLINE GEN
    1249  4546164264 : muliispec_basecase(GEN x, GEN y, long nx, long ny)
    1250             : {
    1251             :   GEN z2e,z2d,yd,xd,ye,zd;
    1252             :   long p1,lz;
    1253             :   LOCAL_HIREMAINDER;
    1254             : 
    1255  4546164264 :   if (ny == 1) return muluispec((ulong)*y, x, nx);
    1256  1672749069 :   if (ny == 0) return gen_0;
    1257  1671696687 :   zd = (GEN)avma; lz = nx+ny+2;
    1258  1671696687 :   (void)new_chunk(lz);
    1259  1671696687 :   xd = x + nx;
    1260  1671696687 :   yd = y + ny;
    1261  1671696687 :   ye = yd; p1 = *--xd;
    1262             : 
    1263  1671696687 :   *--zd = mulll(p1, *--yd); z2e = zd;
    1264 17813653647 :   while (yd > y) *--zd = addmul(p1, *--yd);
    1265  1671696687 :   *--zd = hiremainder;
    1266             : 
    1267 19396430649 :   while (xd > x)
    1268             :   {
    1269             :     LOCAL_OVERFLOW;
    1270 17724733962 :     yd = ye; p1 = *--xd;
    1271             : 
    1272 17724733962 :     z2d = --z2e;
    1273 17724733962 :     *z2d = addll(mulll(p1, *--yd), *z2d); z2d--;
    1274 >25013*10^7 :     while (yd > y)
    1275             :     {
    1276 >23241*10^7 :       hiremainder += overflow;
    1277 >23241*10^7 :       *z2d = addll(addmul(p1, *--yd), *z2d); z2d--;
    1278             :     }
    1279 17724733962 :     *--zd = hiremainder + overflow;
    1280             :   }
    1281  1671696687 :   if (*zd == 0) { zd++; lz--; } /* normalize */
    1282  1671696687 :   *--zd = evalsigne(1) | evallgefint(lz);
    1283  1671696687 :   *--zd = evaltyp(t_INT) | evallg(lz);
    1284  1671696687 :   return gc_const((pari_sp)zd, zd);
    1285             : }
    1286             : 
    1287             : INLINE GEN
    1288   815639325 : sqrispec_basecase(GEN x, long nx)
    1289             : {
    1290             :   GEN z2e,z2d,yd,xd,zd,x0,z0;
    1291             :   long p1,lz;
    1292             :   LOCAL_HIREMAINDER;
    1293             :   LOCAL_OVERFLOW;
    1294             : 
    1295   815639325 :   if (nx == 1) return sqru((ulong)*x);
    1296   561382881 :   if (nx == 0) return gen_0;
    1297   180131589 :   zd = (GEN)avma; lz = (nx+1) << 1;
    1298   180131589 :   z0 = new_chunk(lz);
    1299   180131589 :   if (nx == 1)
    1300             :   {
    1301           0 :     *--zd = mulll(*x, *x);
    1302           0 :     *--zd = hiremainder; goto END;
    1303             :   }
    1304   180131589 :   xd = x + nx;
    1305             : 
    1306             :   /* compute double products --> zd */
    1307   180131589 :   p1 = *--xd; yd = xd; --zd;
    1308   180131589 :   *--zd = mulll(p1, *--yd); z2e = zd;
    1309  1092430764 :   while (yd > x) *--zd = addmul(p1, *--yd);
    1310   180131589 :   *--zd = hiremainder;
    1311             : 
    1312   180131589 :   x0 = x+1;
    1313  1092430764 :   while (xd > x0)
    1314             :   {
    1315             :     LOCAL_OVERFLOW;
    1316   912299175 :     p1 = *--xd; yd = xd;
    1317             : 
    1318   912299175 :     z2e -= 2; z2d = z2e;
    1319   912299175 :     *z2d = addll(mulll(p1, *--yd), *z2d); z2d--;
    1320  7632319386 :     while (yd > x)
    1321             :     {
    1322  6720020211 :       hiremainder += overflow;
    1323  6720020211 :       *z2d = addll(addmul(p1, *--yd), *z2d); z2d--;
    1324             :     }
    1325   912299175 :     *--zd = hiremainder + overflow;
    1326             :   }
    1327             :   /* multiply zd by 2 (put result in zd - 1) */
    1328   180131589 :   zd[-1] = ((*zd & HIGHBIT) != 0);
    1329   180131589 :   shift_left(zd, zd, 0, (nx<<1)-3, 0, 1);
    1330             : 
    1331             :   /* add the squares */
    1332   180131589 :   xd = x + nx; zd = z0 + lz;
    1333   180131589 :   p1 = *--xd;
    1334   180131589 :   zd--; *zd = mulll(p1,p1);
    1335   180131589 :   zd--; *zd = addll(hiremainder, *zd);
    1336  1272562353 :   while (xd > x)
    1337             :   {
    1338  1092430764 :     p1 = *--xd;
    1339  1092430764 :     zd--; *zd = addll(mulll(p1,p1)+ overflow, *zd);
    1340  1092430764 :     zd--; *zd = addll(hiremainder + overflow, *zd);
    1341             :   }
    1342             : 
    1343   180131589 : END:
    1344   180131589 :   if (*zd == 0) { zd++; lz--; } /* normalize */
    1345   180131589 :   *--zd = evalsigne(1) | evallgefint(lz);
    1346   180131589 :   *--zd = evaltyp(t_INT) | evallg(lz);
    1347   180131589 :   return gc_const((pari_sp)zd, zd);
    1348             : }
    1349             : 
    1350             : /********************************************************************/
    1351             : /**                                                                **/
    1352             : /**               INTEGER MULTIPLICATION (FFT)                     **/
    1353             : /**                                                                **/
    1354             : /********************************************************************/
    1355             : 
    1356             : /*
    1357             :  Compute parameters for FFT:
    1358             :    len: result length
    1359             :    k: FFT depth.
    1360             :    n: number of blocks (2^k)
    1361             :    bs: block size
    1362             :    mod: Modulus is M=2^(BIL*mod)+1
    1363             :    ord: order of 2 in Z/MZ.
    1364             :  We must have:
    1365             :    bs*n >= l
    1366             :    2^(BIL*mod) > nb*2^(2*BIL*bs)
    1367             :    2^k | 2*BIL*mod
    1368             : */
    1369             : static void
    1370      200115 : mulliifft_params(long len, long *k, long *mod, long *bs, long *n, ulong *ord)
    1371             : {
    1372             :   long r;
    1373      200115 :   *k = expu((3*len)>>2)-3;
    1374             :   do {
    1375      200142 :     (*k)--;
    1376      200142 :     r  = *k-(TWOPOTBITS_IN_LONG+2);
    1377      200142 :     *n = 1L<<*k;
    1378      200142 :     *bs = (len+*n-1)>>*k;
    1379      200142 :     *mod= 2**bs+1;
    1380      200142 :     if (r>0)
    1381        8040 :       *mod=((*mod+(1L<<r)-1)>>r)<<r;
    1382      200142 :   } while(*mod>=3**bs);
    1383      200115 :   *ord= 4**mod*BITS_IN_LONG;
    1384      200115 : }
    1385             : 
    1386             : /* Zf_: arithmetic in ring Z/MZ where M= 2^(BITS_IN_LONG*mod)+1
    1387             :  * for some mod.
    1388             :  * Do not garbage collect.
    1389             :  */
    1390             : 
    1391             : static GEN
    1392   531880128 : Zf_add(GEN a, GEN b, GEN M)
    1393             : {
    1394   531880128 :   GEN y, z = addii(a,b);
    1395   531880128 :   long mod = lgefint(M)-3;
    1396   531880128 :   long l = NLIMBS(z);
    1397   531880128 :   if (l<=mod) return z;
    1398   209330898 :   y = subiu(z, 1);
    1399   209330898 :   if (NLIMBS(y)<=mod) return z;
    1400   209330898 :   return int_normalize(y,1);
    1401             : }
    1402             : 
    1403             : static GEN
    1404   544108317 : Zf_sub(GEN a, GEN b, GEN M)
    1405             : {
    1406   544108317 :   GEN z = subii(a,b);
    1407   544108317 :   return signe(z)>=0? z: addii(M,z);
    1408             : }
    1409             : 
    1410             : /* destroy z */
    1411             : static GEN
    1412  1136769312 : Zf_red_destroy(GEN z, GEN M)
    1413             : {
    1414  1136769312 :   long mod = lgefint(M)-3;
    1415  1136769312 :   long l = NLIMBS(z);
    1416             :   GEN y;
    1417  1136769312 :   if (l<=mod) return z;
    1418   507844245 :   y = shifti(z, -mod*BITS_IN_LONG);
    1419   507844245 :   z = int_normalize(z, NLIMBS(y));
    1420   507844245 :   y = Zf_red_destroy(y, M);
    1421   507844245 :   z = subii(z, y);
    1422   507844245 :   if (signe(z)<0) z = addii(z, M);
    1423   507844245 :   return z;
    1424             : }
    1425             : 
    1426             : INLINE GEN
    1427   586326795 : Zf_shift(GEN a, ulong s, GEN M) { return Zf_red_destroy(shifti(a, s), M); }
    1428             : 
    1429             : /*
    1430             :  Multiply by sqrt(2)^s
    1431             :  We use the formula sqrt(2)=z_8*(1-z_4)) && z_8=2^(ord/16) [2^(ord/4)+1]
    1432             : */
    1433             : 
    1434             : static GEN
    1435   531880128 : Zf_mulsqrt2(GEN a, ulong s, ulong ord, GEN M)
    1436             : {
    1437   531880128 :   ulong hord = ord>>1;
    1438   531880128 :   if (!signe(a)) return gen_0;
    1439   519272145 :   if (odd(s)) /* Multiply by 2^(s/2) */
    1440             :   {
    1441    12228189 :     GEN az8  = Zf_shift(a,   ord>>4, M);
    1442    12228189 :     GEN az83 = Zf_shift(az8, ord>>3, M);
    1443    12228189 :     a = Zf_sub(az8, az83, M);
    1444    12228189 :     s--;
    1445             :   }
    1446   519272145 :   if (s < hord)
    1447   383015268 :     return Zf_shift(a, s>>1, M);
    1448             :   else
    1449   136256877 :     return subii(M,Zf_shift(a, (s-hord)>>1, M));
    1450             : }
    1451             : 
    1452             : INLINE GEN
    1453      971520 : Zf_sqr(GEN a, GEN M) { return Zf_red_destroy(sqri(a), M); }
    1454             : 
    1455             : INLINE GEN
    1456    41626752 : Zf_mul(GEN a, GEN b, GEN M) { return Zf_red_destroy(mulii(a,b), M); }
    1457             : 
    1458             : /* In place, bit reversing FFT */
    1459             : static void
    1460    83827278 : muliifft_dit(ulong o, ulong ord, GEN M, GEN FFT, long d, long step)
    1461             : {
    1462    83827278 :   pari_sp av = avma;
    1463             :   long i;
    1464    83827278 :   ulong j, no = (o<<1)%ord;
    1465    83827278 :   long hstep=step>>1;
    1466   436786254 :   for (i = d+1, j = 0; i <= d+hstep; ++i, j =(j+o)%ord)
    1467             :   {
    1468   352958976 :     GEN a = Zf_add(gel(FFT,i), gel(FFT,i+hstep), M);
    1469   352958976 :     GEN b = Zf_mulsqrt2(Zf_sub(gel(FFT,i), gel(FFT,i+hstep), M), j, ord, M);
    1470   352958976 :     affii(a,gel(FFT,i));
    1471   352958976 :     affii(b,gel(FFT,i+hstep));
    1472   352958976 :     set_avma(av);
    1473             :   }
    1474    83827278 :   if (hstep>1)
    1475             :   {
    1476    41714766 :     muliifft_dit(no, ord, M, FFT, d, hstep);
    1477    41714766 :     muliifft_dit(no, ord, M, FFT, d+hstep, hstep);
    1478             :   }
    1479    83827278 : }
    1480             : 
    1481             : /* In place, bit reversed FFT, inverse of muliifft_dit */
    1482             : static void
    1483    42398157 : muliifft_dis(ulong o, ulong ord, GEN M, GEN FFT, long d, long step)
    1484             : {
    1485    42398157 :   pari_sp av = avma;
    1486             :   long i;
    1487    42398157 :   ulong j, no = (o<<1)%ord;
    1488    42398157 :   long hstep=step>>1;
    1489    42398157 :   if (hstep>1)
    1490             :   {
    1491    21099021 :     muliifft_dis(no, ord, M, FFT, d, hstep);
    1492    21099021 :     muliifft_dis(no, ord, M, FFT, d+hstep, hstep);
    1493             :   }
    1494   221319309 :   for (i = d+1, j = 0; i <= d+hstep; ++i, j =(j+o)%ord)
    1495             :   {
    1496   178921152 :     GEN z = Zf_mulsqrt2(gel(FFT,i+hstep), j, ord, M);
    1497   178921152 :     GEN a = Zf_add(gel(FFT,i), z, M);
    1498   178921152 :     GEN b = Zf_sub(gel(FFT,i), z, M);
    1499   178921152 :     affii(a,gel(FFT,i));
    1500   178921152 :     affii(b,gel(FFT,i+hstep));
    1501   178921152 :     set_avma(av);
    1502             :   }
    1503    42398157 : }
    1504             : 
    1505             : static GEN
    1506      397746 : muliifft_spliti(GEN a, long na, long bs, long n, long mod)
    1507             : {
    1508      397746 :   GEN ap = a+na-1;
    1509      397746 :   GEN c  = cgetg(n+1, t_VEC);
    1510             :   long i,j;
    1511    84622770 :   for(i=1;i<=n;i++)
    1512             :   {
    1513    84225024 :     GEN z = cgeti(mod+3);
    1514    84225024 :     if (na)
    1515             :     {
    1516    41588103 :       long m = minss(bs, na), v=0;
    1517    41588103 :       GEN zp, aa=ap-m+1;
    1518   265012707 :       while (!*aa && v<m) {aa++; v++;}
    1519    41588103 :       zp = z+m-v+1;
    1520  1054019319 :       for (j=v; j < m; j++)
    1521  1012431216 :         *zp-- = *ap--;
    1522    41588103 :       ap -= v; na -= m;
    1523    41588103 :       z[1] = evalsigne(m!=v) | evallgefint(m-v+2);
    1524             :     } else
    1525    42636921 :       z[1] = evalsigne(0) | evallgefint(2);
    1526    84225024 :     gel(c, i) = z;
    1527             :   }
    1528      397746 :   return c;
    1529             : }
    1530             : 
    1531             : static GEN
    1532      200115 : muliifft_unspliti(GEN V, long bs, long len)
    1533             : {
    1534      200115 :   long s, i, j, l = lg(V);
    1535      200115 :   GEN a = cgeti(len);
    1536      200115 :   a[1] = evalsigne(1)|evallgefint(len);
    1537  1251744714 :   for(i=2;i<len;i++)
    1538  1251544599 :     a[i] = 0;
    1539    42798387 :   for(i=1, s=0; i<l; i++, s+=bs)
    1540             :   {
    1541    42598272 :     GEN  u = gel(V,i);
    1542    42598272 :     if (signe(u))
    1543             :     {
    1544    41609877 :       GEN ap = int_W(a,s);
    1545    41609877 :       GEN up = int_LSW(u);
    1546    41609877 :       long lu = NLIMBS(u);
    1547             :       LOCAL_OVERFLOW;
    1548    41609877 :       *ap = addll(*ap, *up--); ap--;
    1549  2506934070 :       for (j=1; j<lu; j++)
    1550  2465324193 :        { *ap = addllx(*ap, *up--); ap--; }
    1551    41610021 :       while (overflow)
    1552         144 :        { *ap = addll(*ap, 1); ap--; }
    1553             :     }
    1554             :   }
    1555      200115 :   return int_normalize(a,0);
    1556             : }
    1557             : 
    1558             : static GEN
    1559        2484 : sqrispec_fft(GEN a, long na)
    1560             : {
    1561        2484 :   pari_sp av, ltop = avma;
    1562        2484 :   long len = 2*na;
    1563             :   long k, mod, bs, n;
    1564             :   GEN  FFT, M;
    1565             :   long i;
    1566             :   ulong o, ord;
    1567             : 
    1568        2484 :   mulliifft_params(len,&k,&mod,&bs,&n,&ord);
    1569        2484 :   o = ord>>k;
    1570        2484 :   M = int2n(mod*BITS_IN_LONG);
    1571        2484 :   M[2+mod] = 1;
    1572        2484 :   FFT = muliifft_spliti(a, na, bs, n, mod);
    1573        2484 :   muliifft_dit(o, ord, M, FFT, 0, n);
    1574        2484 :   av = avma;
    1575      974004 :   for(i=1; i<=n; i++)
    1576             :   {
    1577      971520 :     affii(Zf_sqr(gel(FFT,i), M), gel(FFT,i));
    1578      971520 :     set_avma(av);
    1579             :   }
    1580        2484 :   muliifft_dis(ord-o, ord, M, FFT, 0, n);
    1581      974004 :   for(i=1; i<=n; i++)
    1582             :   {
    1583      971520 :     affii(Zf_shift(gel(FFT,i), (ord>>1)-k, M), gel(FFT,i));
    1584      971520 :     set_avma(av);
    1585             :   }
    1586        2484 :   return gerepileuptoint(ltop, muliifft_unspliti(FFT,bs,2+len));
    1587             : }
    1588             : 
    1589             : static GEN
    1590      197631 : muliispec_fft(GEN a, GEN b, long na, long nb)
    1591             : {
    1592      197631 :   pari_sp av, av2, ltop = avma;
    1593      197631 :   long len = na+nb;
    1594             :   long k, mod, bs, n;
    1595             :   GEN FFT, FFTb, M;
    1596             :   long i;
    1597             :   ulong o, ord;
    1598             : 
    1599      197631 :   mulliifft_params(len,&k,&mod,&bs,&n,&ord);
    1600      197631 :   o = ord>>k;
    1601      197631 :   M = int2n(mod*BITS_IN_LONG);
    1602      197631 :   M[2+mod] = 1;
    1603      197631 :   FFT = muliifft_spliti(a, na, bs, n, mod);
    1604      197631 :   av=avma;
    1605      197631 :   muliifft_dit(o, ord, M, FFT, 0, n);
    1606      197631 :   FFTb = muliifft_spliti(b, nb, bs, n, mod);
    1607      197631 :   av2 = avma;
    1608      197631 :   muliifft_dit(o, ord, M, FFTb, 0, n);
    1609    41824383 :   for(i=1; i<=n; i++)
    1610             :   {
    1611    41626752 :     affii(Zf_mul(gel(FFT,i), gel(FFTb,i), M), gel(FFT,i));
    1612    41626752 :     set_avma(av2);
    1613             :   }
    1614      197631 :   set_avma(av);
    1615      197631 :   muliifft_dis(ord-o, ord, M, FFT, 0, n);
    1616    41824383 :   for(i=1; i<=n; i++)
    1617             :   {
    1618    41626752 :     affii(Zf_shift(gel(FFT,i),(ord>>1)-k,M), gel(FFT,i));
    1619    41626752 :     set_avma(av);
    1620             :   }
    1621      197631 :   return gerepileuptoint(ltop, muliifft_unspliti(FFT,bs,2+len));
    1622             : }
    1623             : 
    1624             : /********************************************************************/
    1625             : /**                                                                **/
    1626             : /**               INTEGER MULTIPLICATION (KARATSUBA)               **/
    1627             : /**                                                                **/
    1628             : /********************************************************************/
    1629             : 
    1630             : /* return (x shifted left d words) + y. Assume d > 0, x > 0 and y >= 0 */
    1631             : static GEN
    1632  1175903565 : addshiftw(GEN x, GEN y, long d)
    1633             : {
    1634  1175903565 :   GEN z,z0,y0,yd, zd = (GEN)avma;
    1635  1175903565 :   long a,lz,ly = lgefint(y);
    1636             : 
    1637  1175903565 :   z0 = new_chunk(d);
    1638  1175903565 :   a = ly-2; yd = y+ly;
    1639  1175903565 :   if (a >= d)
    1640             :   {
    1641 25790176539 :     y0 = yd-d; while (yd > y0) *--zd = *--yd; /* copy last d words of y */
    1642  1172496069 :     a -= d;
    1643  1172496069 :     if (a)
    1644   987295233 :       z = addiispec(LIMBS(x), LIMBS(y), NLIMBS(x), a);
    1645             :     else
    1646   185200836 :       z = icopy(x);
    1647             :   }
    1648             :   else
    1649             :   {
    1650    16895031 :     y0 = yd-a; while (yd > y0) *--zd = *--yd; /* copy last a words of y */
    1651    58945893 :     while (zd > z0) *--zd = 0;    /* complete with 0s */
    1652     3407496 :     z = icopy(x);
    1653             :   }
    1654  1175903565 :   lz = lgefint(z)+d;
    1655  1175903565 :   z[1] = evalsigne(1) | evallgefint(lz);
    1656  1175903565 :   z[0] = evaltyp(t_INT) | evallg(lz); return z;
    1657             : }
    1658             : 
    1659             : /* Fast product (Karatsuba) of integers. a and b are "special" GENs
    1660             :  * c,c0,c1,c2 are genuine GENs.
    1661             :  */
    1662             : GEN
    1663  5036870694 : muliispec(GEN a, GEN b, long na, long nb)
    1664             : {
    1665             :   GEN a0,c,c0;
    1666             :   long n0, n0a, i;
    1667             :   pari_sp av;
    1668             : 
    1669  5036870694 :   if (na < nb) swapspec(a,b, na,nb);
    1670  5036870694 :   if (nb < MULII_KARATSUBA_LIMIT) return muliispec_basecase(a,b,na,nb);
    1671   490706430 :   if (nb >= MULII_FFT_LIMIT)      return muliispec_fft(a,b,na,nb);
    1672   490508799 :   i=(na>>1); n0=na-i; na=i;
    1673   490508799 :   av=avma; a0=a+na; n0a=n0;
    1674   573593058 :   while (n0a && !*a0) { a0++; n0a--; }
    1675             : 
    1676   490508799 :   if (n0a && nb > n0)
    1677   484664712 :   { /* nb <= na <= n0 */
    1678             :     GEN b0,c1,c2;
    1679             :     long n0b;
    1680             : 
    1681   484664712 :     nb -= n0;
    1682   484664712 :     c = muliispec(a,b,na,nb);
    1683   484664712 :     b0 = b+nb; n0b = n0;
    1684   569442375 :     while (n0b && !*b0) { b0++; n0b--; }
    1685   484664712 :     if (n0b)
    1686             :     {
    1687   483764838 :       c0 = muliispec(a0,b0, n0a,n0b);
    1688             : 
    1689   483764838 :       c2 = addiispec(a0,a, n0a,na);
    1690   483764838 :       c1 = addiispec(b0,b, n0b,nb);
    1691   483764838 :       c1 = muliispec(LIMBS(c1),LIMBS(c2), NLIMBS(c1),NLIMBS(c2));
    1692   483764838 :       c2 = addiispec(LIMBS(c0),LIMBS(c),  NLIMBS(c0),NLIMBS(c));
    1693             : 
    1694   483764838 :       c1 = subiispec(LIMBS(c1),LIMBS(c2), NLIMBS(c1),NLIMBS(c2));
    1695             :     }
    1696             :     else
    1697             :     {
    1698      899874 :       c0 = gen_0;
    1699      899874 :       c1 = muliispec(a0,b, n0a,nb);
    1700             :     }
    1701   484664712 :     c = addshiftw(c,c1, n0);
    1702             :   }
    1703             :   else
    1704             :   {
    1705     5844087 :     c = muliispec(a,b,na,nb);
    1706     5844087 :     c0 = muliispec(a0,b,n0a,nb);
    1707             :   }
    1708   490508799 :   return gerepileuptoint(av, addshiftw(c,c0, n0));
    1709             : }
    1710             : GEN
    1711      164274 : muluui(ulong x, ulong y, GEN z)
    1712             : {
    1713      164274 :   long t, s = signe(z);
    1714             :   GEN r;
    1715             :   LOCAL_HIREMAINDER;
    1716             : 
    1717      164274 :   if (!x || !y || !signe(z)) return gen_0;
    1718      163977 :   t = mulll(x,y);
    1719      163977 :   if (!hiremainder)
    1720      163977 :     r = muluispec(t, z+2, lgefint(z)-2);
    1721             :   else
    1722             :   {
    1723             :     long tmp[2];
    1724           0 :     tmp[0] = hiremainder;
    1725           0 :     tmp[1] = t;
    1726           0 :     r = muliispec(z+2,tmp,lgefint(z)-2,2);
    1727             :   }
    1728      163977 :   setsigne(r,s); return r;
    1729             : }
    1730             : 
    1731             : #define sqrispec_mirror sqrispec
    1732             : #define muliispec_mirror muliispec
    1733             : 
    1734             : /* x % (2^n), assuming n >= 0 */
    1735             : GEN
    1736    28528866 : remi2n(GEN x, long n)
    1737             : {
    1738    28528866 :   long hi,l,k,lx,ly, sx = signe(x);
    1739             :   GEN z, xd, zd;
    1740             : 
    1741    28528866 :   if (!sx || !n) return gen_0;
    1742             : 
    1743    28508373 :   k = dvmdsBIL(n, &l);
    1744    28508373 :   lx = lgefint(x);
    1745    28508373 :   if (lx < k+3) return icopy(x);
    1746             : 
    1747    28395213 :   xd = x + (lx-k-1);
    1748             :   /* x = |_|...|#|1|...|k| : copy the last l bits of # and the last k words
    1749             :    *            ^--- initial xd  */
    1750    28395213 :   hi = ((ulong)*xd) & ((1UL<<l)-1); /* last l bits of # = top bits of result */
    1751    28395213 :   if (!hi)
    1752             :   { /* strip leading zeroes from result */
    1753     4499703 :     xd++; while (k && !*xd) { k--; xd++; }
    1754     4492527 :     if (!k) return gen_0;
    1755     4294395 :     ly = k+2; xd--;
    1756             :   }
    1757             :   else
    1758    23902686 :     ly = k+3;
    1759             : 
    1760    28197081 :   zd = z = cgeti(ly);
    1761    28197081 :   *++zd = evalsigne(sx) | evallgefint(ly);
    1762    28197081 :   if (hi) *++zd = hi;
    1763   110675208 :   for ( ;k; k--) *++zd = *++xd;
    1764    28197081 :   return z;
    1765             : }
    1766             : 
    1767             : GEN
    1768   823091544 : sqrispec(GEN a, long na)
    1769             : {
    1770             :   GEN a0,c;
    1771             :   long n0, n0a, i;
    1772             :   pari_sp av;
    1773             : 
    1774   823091544 :   if (na < SQRI_KARATSUBA_LIMIT) return sqrispec_basecase(a,na);
    1775     7452219 :   if (na >= SQRI_FFT_LIMIT) return sqrispec_fft(a,na);
    1776     7449735 :   i=(na>>1); n0=na-i; na=i;
    1777     7449735 :   av=avma; a0=a+na; n0a=n0;
    1778    10258680 :   while (n0a && !*a0) { a0++; n0a--; }
    1779     7449735 :   c = sqrispec(a,na);
    1780     7449735 :   if (n0a)
    1781             :   {
    1782     7441503 :     GEN t, c1, c0 = sqrispec(a0,n0a);
    1783             : #if 0
    1784             :     c1 = shifti(muliispec(a0,a, n0a,na),1);
    1785             : #else /* faster */
    1786     7441503 :     t = addiispec(a0,a,n0a,na);
    1787     7441503 :     t = sqrispec(LIMBS(t),NLIMBS(t));
    1788     7441503 :     c1= addiispec(LIMBS(c0),LIMBS(c), NLIMBS(c0), NLIMBS(c));
    1789     7441503 :     c1= subiispec(LIMBS(t),LIMBS(c1), NLIMBS(t), NLIMBS(c1));
    1790             : #endif
    1791     7441503 :     c = addshiftw(c,c1, n0);
    1792     7441503 :     c = addshiftw(c,c0, n0);
    1793             :   }
    1794             :   else
    1795        8232 :     c = addshiftw(c,gen_0,n0<<1);
    1796     7449735 :   return gerepileuptoint(av, c);
    1797             : }
    1798             : 
    1799             : /********************************************************************/
    1800             : /**                                                                **/
    1801             : /**                    KARATSUBA SQUARE ROOT                       **/
    1802             : /**      adapted from Paul Zimmermann's implementation of          **/
    1803             : /**      his algorithm in GMP (mpn_sqrtrem)                        **/
    1804             : /**                                                                **/
    1805             : /********************************************************************/
    1806             : 
    1807             : /* Square roots table */
    1808             : static const unsigned char approx_tab[192] = {
    1809             :   128,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,
    1810             :   143,144,144,145,146,147,148,149,150,150,151,152,153,154,155,155,
    1811             :   156,157,158,159,160,160,161,162,163,163,164,165,166,167,167,168,
    1812             :   169,170,170,171,172,173,173,174,175,176,176,177,178,178,179,180,
    1813             :   181,181,182,183,183,184,185,185,186,187,187,188,189,189,190,191,
    1814             :   192,192,193,193,194,195,195,196,197,197,198,199,199,200,201,201,
    1815             :   202,203,203,204,204,205,206,206,207,208,208,209,209,210,211,211,
    1816             :   212,212,213,214,214,215,215,216,217,217,218,218,219,219,220,221,
    1817             :   221,222,222,223,224,224,225,225,226,226,227,227,228,229,229,230,
    1818             :   230,231,231,232,232,233,234,234,235,235,236,236,237,237,238,238,
    1819             :   239,240,240,241,241,242,242,243,243,244,244,245,245,246,246,247,
    1820             :   247,248,248,249,249,250,250,251,251,252,252,253,253,254,254,255
    1821             : };
    1822             : 
    1823             : /* N[0], assume N[0] >= 2^(BIL-2).
    1824             :  * Return r,s such that s^2 + r = N, 0 <= r <= 2s */
    1825             : static void
    1826    70805340 : p_sqrtu1(ulong *N, ulong *ps, ulong *pr)
    1827             : {
    1828    70805340 :   ulong prec, r, s, q, u, n0 = N[0];
    1829             : 
    1830    70805340 :   q = n0 >> (BITS_IN_LONG - 8);
    1831             :   /* 2^6 = 64 <= q < 256 = 2^8 */
    1832    70805340 :   s = approx_tab[q - 64];                                /* 128 <= s < 255 */
    1833    70805340 :   r = (n0 >> (BITS_IN_LONG - 16)) - s * s;                /* r <= 2*s */
    1834    70805340 :   if (r > (s << 1)) { r -= (s << 1) | 1; s++; }
    1835             : 
    1836             :   /* 8-bit approximation from the high 8-bits of N[0] */
    1837    70805340 :   prec = 8;
    1838    70805340 :   n0 <<= 2 * prec;
    1839   212416020 :   while (2 * prec < BITS_IN_LONG)
    1840             :   { /* invariant: s has prec bits, and r <= 2*s */
    1841   141610680 :     r = (r << prec) + (n0 >> (BITS_IN_LONG - prec));
    1842   141610680 :     n0 <<= prec;
    1843   141610680 :     u = 2 * s;
    1844   141610680 :     q = r / u; u = r - q * u;
    1845   141610680 :     s = (s << prec) + q;
    1846   141610680 :     u = (u << prec) + (n0 >> (BITS_IN_LONG - prec));
    1847   141610680 :     q = q * q;
    1848   141610680 :     r = u - q;
    1849   141610680 :     if (u < q) { s--; r += (s << 1) | 1; }
    1850   141610680 :     n0 <<= prec;
    1851   141610680 :     prec = 2 * prec;
    1852             :   }
    1853    70805340 :   *ps = s;
    1854    70805340 :   *pr = r;
    1855    70805340 : }
    1856             : 
    1857             : /* N[0..1], assume N[0] >= 2^(BIL-2).
    1858             :  * Return 1 if remainder overflows, 0 otherwise */
    1859             : static int
    1860    69129354 : p_sqrtu2(ulong *N, ulong *ps, ulong *pr)
    1861             : {
    1862    69129354 :   ulong cc, qhl, r, s, q, u, n1 = N[1];
    1863             :   LOCAL_OVERFLOW;
    1864             : 
    1865    69129354 :   p_sqrtu1(N, &s, &r); /* r <= 2s */
    1866   104010756 :   qhl = 0; while (r >= s) { qhl++; r -= s; }
    1867             :   /* now r < s < 2^(BIL/2) */
    1868    69129354 :   r = (r << BITS_IN_HALFULONG) | (n1 >> BITS_IN_HALFULONG);
    1869    69129354 :   u = s << 1;
    1870    69129354 :   q = r / u; u = r - q * u;
    1871    69129354 :   q += (qhl & 1) << (BITS_IN_HALFULONG - 1);
    1872    69129354 :   qhl >>= 1;
    1873             :   /* (initial r)<<(BIL/2) + n1>>(BIL/2) = (qhl<<(BIL/2) + q) * 2s + u */
    1874    69129354 :   s = ((s + qhl) << BITS_IN_HALFULONG) + q;
    1875    69129354 :   cc = u >> BITS_IN_HALFULONG;
    1876    69129354 :   r = (u << BITS_IN_HALFULONG) | (n1 & LOWMASK);
    1877    69129354 :   r = subll(r, q * q);
    1878    69129354 :   cc -= overflow + qhl;
    1879             :   /* now subtract 2*q*2^(BIL/2) + 2^BIL if qhl is set */
    1880    69129354 :   if ((long)cc < 0)
    1881             :   {
    1882    18139308 :     if (s) {
    1883    18118410 :       r = addll(r, s);
    1884    18118410 :       cc += overflow;
    1885    18118410 :       s--;
    1886             :     } else {
    1887       20898 :       cc++;
    1888       20898 :       s = ~0UL;
    1889             :     }
    1890    18139308 :     r = addll(r, s);
    1891    18139308 :     cc += overflow;
    1892             :   }
    1893    69129354 :   *ps = s;
    1894    69129354 :   *pr = r; return cc;
    1895             : }
    1896             : 
    1897             : static void
    1898    67915563 : xmpn_zero(GEN x, long n)
    1899             : {
    1900   579704925 :   while (--n >= 0) x[n]=0;
    1901    67915563 : }
    1902             : static void
    1903   842711925 : xmpn_copy(GEN z, GEN x, long n)
    1904             : {
    1905   842711925 :   long k = n;
    1906  3569550181 :   while (--k >= 0) z[k] = x[k];
    1907   842711925 : }
    1908             : /* a[0..la-1] * 2^(lb BIL) | b[0..lb-1] */
    1909             : static GEN
    1910   371677632 : catii(GEN a, long la, GEN b, long lb)
    1911             : {
    1912   371677632 :   long l = la + lb + 2;
    1913   371677632 :   GEN z = cgetipos(l);
    1914   371677632 :   xmpn_copy(LIMBS(z), a, la);
    1915   371677632 :   xmpn_copy(LIMBS(z) + la, b, lb);
    1916   371677632 :   return int_normalize(z, 0);
    1917             : }
    1918             : 
    1919             : /* sqrt n[0..1], assume n normalized */
    1920             : static GEN
    1921    68495211 : sqrtispec2(GEN n, GEN *pr)
    1922             : {
    1923             :   ulong s, r;
    1924    68495211 :   int hi = p_sqrtu2((ulong*)n, &s, &r);
    1925    68495211 :   GEN S = utoi(s);
    1926    68495211 :   *pr = hi? uutoi(1,r): utoi(r);
    1927    68495211 :   return S;
    1928             : }
    1929             : 
    1930             : /* sqrt n[0], _dont_ assume n normalized */
    1931             : static GEN
    1932     1675986 : sqrtispec1_sh(GEN n, GEN *pr)
    1933             : {
    1934             :   GEN S;
    1935     1675986 :   ulong r, s, u0 = uel(n,0);
    1936     1675986 :   int sh = bfffo(u0) & ~1UL;
    1937     1675986 :   if (sh) u0 <<= sh;
    1938     1675986 :   p_sqrtu1(&u0, &s, &r);
    1939             :   /* s^2 + r = u0, s < 2^(BIL/2). Rescale back:
    1940             :    * 2^(2k) n = S^2 + R
    1941             :    * so 2^(2k) n = (S - s0)^2 + (2*S*s0 - s0^2 + R), s0 = S mod 2^k. */
    1942     1675986 :   if (sh) {
    1943     1412907 :     int k = sh >> 1;
    1944     1412907 :     ulong s0 = s & ((1L<<k) - 1);
    1945     1412907 :     r += s * (s0<<1);
    1946     1412907 :     s >>= k;
    1947     1412907 :     r >>= sh;
    1948             :   }
    1949     1675986 :   S = utoi(s);
    1950     1675986 :   if (pr) *pr = utoi(r);
    1951     1675986 :   return S;
    1952             : }
    1953             : 
    1954             : /* sqrt n[0..1], _dont_ assume n normalized */
    1955             : static GEN
    1956      634143 : sqrtispec2_sh(GEN n, GEN *pr)
    1957             : {
    1958             :   GEN S;
    1959      634143 :   ulong U[2], r, s, u0 = uel(n,0), u1 = uel(n,1);
    1960      634143 :   int hi, sh = bfffo(u0) & ~1UL;
    1961      634143 :   if (sh) {
    1962      605763 :     u0 = (u0 << sh) | (u1 >> (BITS_IN_LONG-sh));
    1963      605763 :     u1 <<= sh;
    1964             :   }
    1965      634143 :   U[0] = u0;
    1966      634143 :   U[1] = u1; hi = p_sqrtu2(U, &s, &r);
    1967             :   /* s^2 + R = u0|u1. Rescale back:
    1968             :    * 2^(2k) n = S^2 + R
    1969             :    * so 2^(2k) n = (S - s0)^2 + (2*S*s0 - s0^2 + R), s0 = S mod 2^k. */
    1970      634143 :   if (sh) {
    1971      605763 :     int k = sh >> 1;
    1972      605763 :     ulong s0 = s & ((1L<<k) - 1);
    1973             :     LOCAL_HIREMAINDER;
    1974             :     LOCAL_OVERFLOW;
    1975      605763 :     r = addll(r, mulll(s, (s0<<1)));
    1976      605763 :     if (overflow) hiremainder++;
    1977      605763 :     hiremainder += hi; /* + 0 or 1 */
    1978      605763 :     s >>= k;
    1979      605763 :     r = (r>>sh) | (hiremainder << (BITS_IN_LONG-sh));
    1980      605763 :     hi = (hiremainder & (1L<<sh));
    1981             :   }
    1982      634143 :   S = utoi(s);
    1983      634143 :   if (pr) *pr = hi? uutoi(1,r): utoi(r);
    1984      634143 :   return S;
    1985             : }
    1986             : 
    1987             : /* Let N = N[0..2n-1]. Return S (and set R) s.t S^2 + R = N, 0 <= R <= 2S
    1988             :  * Assume N normalized */
    1989             : static GEN
    1990   254334027 : sqrtispec(GEN N, long n, GEN *r)
    1991             : {
    1992             :   GEN S, R, q, z, u;
    1993             :   long l, h;
    1994             : 
    1995   254334027 :   if (n == 1) return sqrtispec2(N, r);
    1996   185838816 :   l = n >> 1;
    1997   185838816 :   h = n - l; /* N = a3(h) | a2(h) | a1(l) | a0(l words) */
    1998   185838816 :   S = sqrtispec(N, h, &R); /* S^2 + R = a3|a2 */
    1999             : 
    2000   185838816 :   z = catii(LIMBS(R), NLIMBS(R), N + 2*h, l); /* = R | a1(l) */
    2001   185838816 :   q = dvmdii(z, shifti(S,1), &u);
    2002   185838816 :   z = catii(LIMBS(u), NLIMBS(u), N + n + h, l); /* = u | a0(l) */
    2003             : 
    2004   185838816 :   S = addshiftw(S, q, l);
    2005   185838816 :   R = subii(z, sqri(q));
    2006   185838816 :   if (signe(R) < 0)
    2007             :   {
    2008    30294807 :     GEN S2 = shifti(S,1);
    2009    30294807 :     R = addis(subiispec(LIMBS(S2),LIMBS(R), NLIMBS(S2),NLIMBS(R)), -1);
    2010    30294807 :     S = addis(S, -1);
    2011             :   }
    2012   185838816 :   *r = R; return S;
    2013             : }
    2014             : 
    2015             : /* Return S (and set R) s.t S^2 + R = N, 0 <= R <= 2S.
    2016             :  * As for dvmdii, R is last on stack and guaranteed to be gen_0 in case the
    2017             :  * remainder is 0. R = NULL is allowed. */
    2018             : GEN
    2019     2890320 : sqrtremi(GEN N, GEN *r)
    2020             : {
    2021             :   pari_sp av;
    2022     2890320 :   GEN S, R, n = N+2;
    2023     2890320 :   long k, l2, ln = NLIMBS(N);
    2024             :   int sh;
    2025             : 
    2026     2890320 :   if (ln <= 2)
    2027             :   {
    2028     2310672 :     if (ln == 2) return sqrtispec2_sh(n, r);
    2029     1676529 :     if (ln == 1) return sqrtispec1_sh(n, r);
    2030         543 :     if (r) *r = gen_0;
    2031         543 :     return gen_0;
    2032             :   }
    2033      579648 :   av = avma;
    2034      579648 :   sh = bfffo(n[0]) >> 1;
    2035      579648 :   l2 = (ln + 1) >> 1;
    2036      579648 :   if (sh || (ln & 1)) { /* normalize n, so that n[0] >= 2^BIL / 4 */
    2037      579297 :     GEN s0, t = new_chunk(ln + 1);
    2038      579297 :     t[ln] = 0;
    2039      579297 :     if (sh)
    2040      578724 :       shift_left(t, n, 0,ln-1, 0, sh << 1);
    2041             :     else
    2042         573 :       xmpn_copy(t, n, ln);
    2043      579297 :     S = sqrtispec(t, l2, &R); /* t normalized, 2 * l2 words */
    2044             :     /* Rescale back:
    2045             :      * 2^(2k) n = S^2 + R, k = sh + (ln & 1)*BIL/2
    2046             :      * so 2^(2k) n = (S - s0)^2 + (2*S*s0 - s0^2 + R), s0 = S mod 2^k. */
    2047      579297 :     k = sh + (ln & 1) * (BITS_IN_LONG/2);
    2048      579297 :     s0 = remi2n(S, k);
    2049      579297 :     R = addii(shifti(R,-1), mulii(s0, S));
    2050      579297 :     R = shifti(R, 1 - (k<<1));
    2051      579297 :     S = shifti(S, -k);
    2052             :   }
    2053             :   else
    2054         351 :     S = sqrtispec(n, l2, &R);
    2055             : 
    2056      579648 :   if (!r) { set_avma((pari_sp)S); return gerepileuptoint(av, S); }
    2057      530889 :   gerepileall(av, 2, &S, &R); *r = R; return S;
    2058             : }
    2059             : 
    2060             : /* compute sqrt(|a|), assuming a != 0 */
    2061             : 
    2062             : #if 1
    2063             : GEN
    2064    67915563 : sqrtr_abs(GEN x)
    2065             : {
    2066    67915563 :   long l = realprec(x) - 2, e = expo(x), er = e>>1;
    2067    67915563 :   GEN b, c, res = cgetr(2 + l);
    2068    67915563 :   res[1] = evalsigne(1) | evalexpo(er);
    2069    67915563 :   if (e&1) {
    2070    31440525 :     b = new_chunk(l << 1);
    2071    31440525 :     xmpn_copy(b, x+2, l);
    2072    31440525 :     xmpn_zero(b + l,l);
    2073    31440525 :     b = sqrtispec(b, l, &c);
    2074    31440525 :     xmpn_copy(res+2, b+2, l);
    2075    31440525 :     if (cmpii(c, b) > 0) roundr_up_ip(res, l+2);
    2076             :   } else {
    2077             :     ulong u;
    2078    36475038 :     b = new_chunk(2 + (l << 1));
    2079    36475038 :     shift_left(b+1, x+2, 0,l-1, 0, BITS_IN_LONG-1);
    2080    36475038 :     b[0] = uel(x,2)>>1;
    2081    36475038 :     xmpn_zero(b + l+1,l+1);
    2082    36475038 :     b = sqrtispec(b, l+1, &c);
    2083    36475038 :     xmpn_copy(res+2, b+2, l);
    2084    36475038 :     u = uel(b,l+2);
    2085    36475038 :     if ( u&HIGHBIT || (u == ~HIGHBIT && cmpii(c,b) > 0))
    2086    17964164 :       roundr_up_ip(res, l+2);
    2087             :   }
    2088    67915563 :   return gc_const((pari_sp)res, res);
    2089             : }
    2090             : 
    2091             : #else /* use t_REAL: currently much slower (quadratic division) */
    2092             : 
    2093             : #ifdef LONG_IS_64BIT
    2094             : /* 64 bits of b = sqrt(a[0] * 2^64 + a[1])  [ up to 1ulp ] */
    2095             : static ulong
    2096             : sqrtu2(ulong *a)
    2097             : {
    2098             :   ulong c, b = dblmantissa( sqrt((double)a[0]) );
    2099             :   LOCAL_HIREMAINDER;
    2100             :   LOCAL_OVERFLOW;
    2101             : 
    2102             :   /* > 32 correct bits, 1 Newton iteration to reach 64 */
    2103             :   if (b <= a[0]) return HIGHBIT | (a[0] >> 1);
    2104             :   hiremainder = a[0]; c = divll(a[1], b);
    2105             :   return (addll(c, b) >> 1) | HIGHBIT;
    2106             : }
    2107             : /* 64 bits of sqrt(a[0] * 2^63) */
    2108             : static ulong
    2109             : sqrtu2_1(ulong *a)
    2110             : {
    2111             :   ulong t[2];
    2112             :   t[0] = (a[0] >> 1);
    2113             :   t[1] = (a[0] << (BITS_IN_LONG-1)) | (a[1] >> 1);
    2114             :   return sqrtu2(t);
    2115             : }
    2116             : #else
    2117             : /* 32 bits of sqrt(a[0] * 2^32) */
    2118             : static ulong
    2119             : sqrtu2(ulong *a)   { return dblmantissa( sqrt((double)a[0]) ); }
    2120             : /* 32 bits of sqrt(a[0] * 2^31) */
    2121             : static ulong
    2122             : sqrtu2_1(ulong *a) { return dblmantissa( sqrt(2. * a[0]) ); }
    2123             : #endif
    2124             : 
    2125             : GEN
    2126             : sqrtr_abs(GEN x)
    2127             : {
    2128             :   long l1, i, l = lg(x), ex = expo(x);
    2129             :   GEN a, t, y = cgetr(l);
    2130             :   pari_sp av, av0 = avma;
    2131             : 
    2132             :   a = rtor(x, l+1);
    2133             :   t = cgetr(l+1);
    2134             :   if (ex & 1) { /* odd exponent */
    2135             :     a[1] = evalsigne(1) | _evalexpo(1);
    2136             :     t[2] = (long)sqrtu2((ulong*)a + 2);
    2137             :   } else { /* even exponent */
    2138             :     a[1] = evalsigne(1) | _evalexpo(0);
    2139             :     t[2] = (long)sqrtu2_1((ulong*)a + 2);
    2140             :   }
    2141             :   t[1] = evalsigne(1) | _evalexpo(0);
    2142             :   for (i = 3; i <= l; i++) t[i] = 0;
    2143             : 
    2144             :   /* |x| = 2^(ex/2) a, t ~ sqrt(a) */
    2145             :   l--; l1 = 1; av = avma;
    2146             :   while (l1 < l) { /* let t := (t + a/t)/2 */
    2147             :     l1 <<= 1; if (l1 > l) l1 = l;
    2148             :     setlg(a, l1 + 2);
    2149             :     setlg(t, l1 + 2);
    2150             :     affrr(addrr(t, divrr(a,t)), t); shiftr_inplace(t, -1);
    2151             :     set_avma(av);
    2152             :   }
    2153             :   affrr(t,y); shiftr_inplace(y, (ex>>1));
    2154             :   return gc_const(av0, y);
    2155             : }
    2156             : 
    2157             : #endif
    2158             : 
    2159             : /*******************************************************************
    2160             :  *                                                                 *
    2161             :  *                           Base Conversion                       *
    2162             :  *                                                                 *
    2163             :  *******************************************************************/
    2164             : 
    2165             : static void
    2166      708297 : convi_dac(GEN x, ulong l, ulong *res)
    2167             : {
    2168      708297 :   pari_sp ltop=avma;
    2169             :   ulong m;
    2170             :   GEN x1,x2;
    2171      708297 :   if (l==1) { *res=itou(x); return; }
    2172      336108 :   m=l>>1;
    2173      336108 :   x1=dvmdii(x,powuu(1000000000UL,m),&x2);
    2174      336108 :   convi_dac(x1,l-m,res+m);
    2175      336108 :   convi_dac(x2,m,res);
    2176      336108 :   set_avma(ltop);
    2177             : }
    2178             : 
    2179             : /* convert integer --> base 10^9 [not memory clean] */
    2180             : ulong *
    2181      303604 : convi(GEN x, long *l)
    2182             : {
    2183      303604 :   long lz, lx = lgefint(x);
    2184             :   ulong *z;
    2185      303604 :   if (lx == 3 && uel(x,2) < 1000000000UL) {
    2186      267523 :     z = (ulong*)new_chunk(1);
    2187      267523 :     *z = x[2];
    2188      267523 :     *l = 1; return z+1;
    2189             :   }
    2190       36081 :   lz = 1 + (long)bit_accuracy_mul(lx, LOG10_2/9);
    2191       36081 :   z = (ulong*)new_chunk(lz);
    2192       36081 :   convi_dac(x,(ulong)lz,z);
    2193       65424 :   while (z[lz-1]==0) lz--;
    2194       36081 :   *l=lz; return z+lz;
    2195             : }
    2196             : 

Generated by: LCOV version 1.13