Code coverage tests

This page documents the degree to which the PARI/GP source code is tested by our public test suite, distributed with the source distribution in directory src/test/. This is measured by the gcov utility; we then process gcov output using the lcov frond-end.

We test a few variants depending on Configure flags on the pari.math.u-bordeaux.fr machine (x86_64 architecture), and agregate them in the final report:

The target is to exceed 90% coverage for all mathematical modules (given that branches depending on DEBUGLEVEL or DEBUGMEM are not covered). This script is run to produce the results below.

LCOV - code coverage report
Current view: top level - kernel/none - mp.c (source / functions) Hit Total Coverage
Test: PARI/GP v2.12.0 lcov report (development 23344-f0cc1b3f6) Lines: 1119 1151 97.2 %
Date: 2018-12-12 05:41:43 Functions: 69 69 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : #line 2 "../src/kernel/none/mp.c"
       2             : /* Copyright (C) 2000-2003 The PARI group.
       3             : 
       4             : This file is part of the PARI/GP package.
       5             : 
       6             : PARI/GP is free software; you can redistribute it and/or modify it under the
       7             : terms of the GNU General Public License as published by the Free Software
       8             : Foundation. It is distributed in the hope that it will be useful, but WITHOUT
       9             : ANY WARRANTY WHATSOEVER.
      10             : 
      11             : Check the License for details. You should have received a copy of it, along
      12             : with the package; see the file 'COPYING'. If not, write to the Free Software
      13             : Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */
      14             : 
      15             : /***********************************************************************/
      16             : /**                                                                   **/
      17             : /**                       MULTIPRECISION KERNEL                       **/
      18             : /**                                                                   **/
      19             : /***********************************************************************/
      20             : #include "pari.h"
      21             : #include "paripriv.h"
      22             : #include "../src/kernel/none/tune-gen.h"
      23             : 
      24         659 : void pari_kernel_init(void) { }
      25         657 : void pari_kernel_close(void) { }
      26             : 
      27             : /* NOTE: arguments of "spec" routines (muliispec, addiispec, etc.) aren't
      28             :  * GENs but pairs (long *a, long na) representing a list of digits (in basis
      29             :  * BITS_IN_LONG) : a[0], ..., a[na-1]. [ In ordre to facilitate splitting: no
      30             :  * need to reintroduce codewords ] */
      31             : 
      32             : #define LIMBS(x)  ((x)+2)
      33             : #define NLIMBS(x) (lgefint(x)-2)
      34             : 
      35             : /* Normalize a non-negative integer */
      36             : GEN
      37   175569780 : int_normalize(GEN x, long known_zero_words)
      38             : {
      39   175569780 :   long i, lx = lgefint(x);
      40             :   GEN x0;
      41   175569780 :   if (lx == 2) { x[1] = evalsigne(0) | evallgefint(2); return x; }
      42   175569780 :   if (!known_zero_words && x[2]) return x;
      43   734743572 :   for (i = 2+known_zero_words; i < lx; i++)
      44   713983413 :     if (x[i]) break;
      45    80195604 :   x0 = x; i -= 2; x += i;
      46    80195604 :   if (x0 == (GEN)avma) set_avma((pari_sp)x);
      47    33362520 :   else stackdummy((pari_sp)(x0+i), (pari_sp)x0);
      48    80195604 :   lx -= i;
      49    80195604 :   x[0] = evaltyp(t_INT) | evallg(lx);
      50    80195604 :   if (lx == 2) x[1] = evalsigne(0) | evallgefint(lx);
      51    59435445 :   else         x[1] = evalsigne(1) | evallgefint(lx);
      52    80195604 :   return x;
      53             : }
      54             : 
      55             : /***********************************************************************/
      56             : /**                                                                   **/
      57             : /**                      ADDITION / SUBTRACTION                       **/
      58             : /**                                                                   **/
      59             : /***********************************************************************/
      60             : 
      61             : GEN
      62     2235288 : setloop(GEN a)
      63             : {
      64     2235288 :   pari_sp av = avma;
      65     2235288 :   (void)cgetg(lgefint(a) + 3, t_VECSMALL);
      66     2235288 :   return icopy_avma(a, av); /* two cells of extra space before a */
      67             : }
      68             : 
      69             : /* we had a = setloop(?), then some incloops. Reset a to b */
      70             : GEN
      71      127959 : resetloop(GEN a, GEN b) {
      72      127959 :   long lb = lgefint(b);
      73      127959 :   a += lgefint(a) - lb;
      74      127959 :   a[0] = evaltyp(t_INT) | evallg(lb);
      75      127959 :   affii(b, a); return a;
      76             : }
      77             : 
      78             : /* assume a > 0, initialized by setloop. Do a++ */
      79             : static GEN
      80    20330145 : incpos(GEN a)
      81             : {
      82    20330145 :   long i, l = lgefint(a);
      83    20330148 :   for (i=l-1; i>1; i--)
      84    20330145 :     if (++a[i]) return a;
      85           3 :   l++; a--; /* use extra cell */
      86           3 :   a[0]=evaltyp(t_INT) | _evallg(l);
      87           3 :   a[1]=evalsigne(1) | evallgefint(l);
      88           3 :   a[2]=1; return a;
      89             : }
      90             : 
      91             : /* assume a < 0, initialized by setloop. Do a++ */
      92             : static GEN
      93        6054 : incneg(GEN a)
      94             : {
      95        6054 :   long i, l = lgefint(a)-1;
      96        6054 :   if (uel(a,l)--)
      97             :   {
      98        6051 :     if (l == 2 && !a[2])
      99             :     {
     100         435 :       a++; /* save one cell */
     101         435 :       a[0] = evaltyp(t_INT) | _evallg(2);
     102         435 :       a[1] = evalsigne(0) | evallgefint(2);
     103             :     }
     104        6051 :     return a;
     105             :   }
     106           3 :   for (i = l-1;; i--) /* finishes since a[2] != 0 */
     107           3 :     if (uel(a,i)--) break;
     108           3 :   if (!a[2])
     109             :   {
     110           3 :     a++; /* save one cell */
     111           3 :     a[0] = evaltyp(t_INT) | _evallg(l);
     112           3 :     a[1] = evalsigne(-1) | evallgefint(l);
     113             :   }
     114           3 :   return a;
     115             : }
     116             : 
     117             : /* assume a initialized by setloop. Do a++ */
     118             : GEN
     119    20584947 : incloop(GEN a)
     120             : {
     121    20584947 :   switch(signe(a))
     122             :   {
     123      248748 :     case 0: a--; /* use extra cell */
     124      248748 :       a[0]=evaltyp(t_INT) | _evallg(3);
     125      248748 :       a[1]=evalsigne(1) | evallgefint(3);
     126      248748 :       a[2]=1; return a;
     127        6054 :     case -1: return incneg(a);
     128    20330145 :     default: return incpos(a);
     129             :   }
     130             : }
     131             : 
     132             : INLINE GEN
     133   940379493 : adduispec(ulong s, GEN x, long nx)
     134             : {
     135   940379493 :   GEN xd, zd = (GEN)avma;
     136             :   long lz;
     137             : 
     138   940379493 :   if (nx == 1) return adduu(s, uel(x,0));
     139   113934627 :   lz = nx+3; (void)new_chunk(lz);
     140   113934627 :   xd = x + nx;
     141   113934627 :   *--zd = (ulong)*--xd + s;
     142   113934627 :   if ((ulong)*zd < s)
     143             :     for(;;)
     144             :     {
     145    21922938 :       if (xd == x) { *--zd = 1; break; } /* enlarge z */
     146    12419196 :       *--zd = ((ulong)*--xd) + 1;
     147    12419196 :       if (*zd) { lz--; break; }
     148             :     }
     149   103756845 :   else lz--;
     150   113934627 :   while (xd > x) *--zd = *--xd;
     151   113934627 :   *--zd = evalsigne(1) | evallgefint(lz);
     152   113934627 :   *--zd = evaltyp(t_INT) | evallg(lz);
     153   113934627 :   set_avma((pari_sp)zd); return zd;
     154             : }
     155             : 
     156             : GEN
     157   141327201 : adduispec_offset(ulong s, GEN x, long offset, long nx)
     158             : {
     159   141327201 :   GEN xd = x+lgefint(x)-nx-offset;
     160   141327201 :   while (nx && *xd==0) {xd++; nx--;}
     161   141327201 :   if (!nx) return utoi(s);
     162   130087608 :   return adduispec(s,xd,nx);
     163             : }
     164             : 
     165             : static GEN
     166  1834828814 : addiispec(GEN x, GEN y, long nx, long ny)
     167             : {
     168             :   GEN xd, yd, zd;
     169  1834828814 :   long lz, i = -2;
     170             :   LOCAL_OVERFLOW;
     171             : 
     172  1834828814 :   if (nx < ny) swapspec(x,y, nx,ny);
     173  1834828814 :   if (ny == 1) return adduispec(*y,x,nx);
     174  1070879331 :   zd = (GEN)avma;
     175  1070879331 :   lz = nx+3; (void)new_chunk(lz);
     176  1070879331 :   xd = x + nx;
     177  1070879331 :   yd = y + ny;
     178  1070879331 :   zd[-1] = addll(xd[-1], yd[-1]);
     179             : #ifdef addllx8
     180   692670166 :   for (  ; i-8 > -ny; i-=8)
     181   335710389 :     addllx8(xd+i, yd+i, zd+i, overflow);
     182             : #endif
     183  1070879331 :   for (  ; i >= -ny; i--) zd[i] = addllx(xd[i], yd[i]);
     184  1070879331 :   if (overflow)
     185             :     for(;;)
     186             :     {
     187   161968050 :       if (i < -nx) { zd[i] = 1; i--; break; } /* enlarge z */
     188    93426621 :       zd[i] = uel(xd,i) + 1;
     189    93426621 :       if (zd[i]) { i--; lz--; break; }
     190    31908318 :       i--;
     191             :     }
     192   972727917 :   else lz--;
     193  1070879331 :   for (; i >= -nx; i--) zd[i] = xd[i];
     194  1070879331 :   zd += i+1;
     195  1070879331 :   *--zd = evalsigne(1) | evallgefint(lz);
     196  1070879331 :   *--zd = evaltyp(t_INT) | evallg(lz);
     197  1070879331 :   set_avma((pari_sp)zd); return zd;
     198             : }
     199             : 
     200             : /* assume x >= s */
     201             : INLINE GEN
     202   698687699 : subiuspec(GEN x, ulong s, long nx)
     203             : {
     204   698687699 :   GEN xd, zd = (GEN)avma;
     205             :   long lz;
     206             :   LOCAL_OVERFLOW;
     207             : 
     208   698687699 :   if (nx == 1) return utoi(x[0] - s);
     209             : 
     210    84448089 :   lz = nx+2; (void)new_chunk(lz);
     211    84448089 :   xd = x + nx;
     212    84448089 :   *--zd = subll(*--xd, s);
     213    84448089 :   if (overflow)
     214             :     for(;;)
     215             :     {
     216    40156097 :       *--zd = ((ulong)*--xd) - 1;
     217    37167920 :       if (*xd) break;
     218             :     }
     219    84448089 :   if (xd == x)
     220    33620159 :     while (*zd == 0) { zd++; lz--; } /* shorten z */
     221             :   else
     222   630406880 :     do  *--zd = *--xd; while (xd > x);
     223    84448089 :   *--zd = evalsigne(1) | evallgefint(lz);
     224    84448089 :   *--zd = evaltyp(t_INT) | evallg(lz);
     225    84448089 :   set_avma((pari_sp)zd); return zd;
     226             : }
     227             : 
     228             : /* assume x > y */
     229             : static GEN
     230  1394209676 : subiispec(GEN x, GEN y, long nx, long ny)
     231             : {
     232             :   GEN xd,yd,zd;
     233  1394209676 :   long lz, i = -2;
     234             :   LOCAL_OVERFLOW;
     235             : 
     236  1394209676 :   if (ny==1) return subiuspec(x,*y,nx);
     237   721011294 :   zd = (GEN)avma;
     238   721011294 :   lz = nx+2; (void)new_chunk(lz);
     239   721011294 :   xd = x + nx;
     240   721011294 :   yd = y + ny;
     241   721011294 :   zd[-1] = subll(xd[-1], yd[-1]);
     242             : #ifdef subllx8
     243   513772249 :   for (  ; i-8 > -ny; i-=8)
     244   273435152 :     subllx8(xd+i, yd+i, zd+i, overflow);
     245             : #endif
     246   721011294 :   for (  ; i >= -ny; i--) zd[i] = subllx(xd[i], yd[i]);
     247   721011294 :   if (overflow)
     248             :     for(;;)
     249             :     {
     250   256342652 :       zd[i] = uel(xd,i) - 1;
     251   160756487 :       if (xd[i--]) break;
     252             :     }
     253   721011294 :   if (i>=-nx)
     254    83839832 :     for (; i >= -nx; i--) zd[i] = xd[i];
     255             :   else
     256   637171462 :     while (zd[i+1] == 0) { i++; lz--; } /* shorten z */
     257   721011294 :   zd += i+1;
     258   721011294 :   *--zd = evalsigne(1) | evallgefint(lz);
     259   721011294 :   *--zd = evaltyp(t_INT) | evallg(lz);
     260   721011294 :   set_avma((pari_sp)zd); return zd;
     261             : }
     262             : 
     263             : static void
     264   264899189 : roundr_up_ip(GEN x, long l)
     265             : {
     266   264899189 :   long i = l;
     267             :   for(;;)
     268             :   {
     269   267351383 :     if (++uel(x,--i)) break;
     270     1335228 :     if (i == 2) { x[2] = (long)HIGHBIT; shiftr_inplace(x, 1); break; }
     271             :   }
     272   264899189 : }
     273             : 
     274             : void
     275   111498441 : affir(GEN x, GEN y)
     276             : {
     277   111498441 :   const long s = signe(x), ly = lg(y);
     278             :   long lx, sh, i;
     279             : 
     280   111498441 :   if (!s)
     281             :   {
     282     3884973 :     y[1] = evalexpo(-prec2nbits(ly));
     283     3884973 :     return;
     284             :   }
     285             : 
     286   107613468 :   lx = lgefint(x); sh = bfffo(x[2]);
     287   107613468 :   y[1] = evalsigne(s) | evalexpo(bit_accuracy(lx)-sh-1);
     288   107613468 :   if (sh) {
     289   106702785 :     if (lx <= ly)
     290             :     {
     291    68845761 :       for (i=lx; i<ly; i++) y[i]=0;
     292    68845761 :       shift_left(y,x,2,lx-1, 0,sh);
     293    68845761 :       return;
     294             :     }
     295    37857024 :     shift_left(y,x,2,ly-1, x[ly],sh);
     296             :     /* lx > ly: round properly */
     297    37857024 :     if ((uel(x,ly)<<sh) & HIGHBIT) roundr_up_ip(y, ly);
     298             :   }
     299             :   else {
     300      910683 :     if (lx <= ly)
     301             :     {
     302      492846 :       for (i=2; i<lx; i++) y[i]=x[i];
     303      492846 :       for (   ; i<ly; i++) y[i]=0;
     304      492846 :       return;
     305             :     }
     306      417837 :     for (i=2; i<ly; i++) y[i]=x[i];
     307             :     /* lx > ly: round properly */
     308      417837 :     if (uel(x,ly) & HIGHBIT) roundr_up_ip(y, ly);
     309             :   }
     310             : }
     311             : 
     312             : INLINE GEN
     313   427227572 : shiftispec(GEN x, long nx, long n)
     314             : {
     315             :   long ny, i, m;
     316             :   GEN y, yd;
     317   427227572 :   if (!n)  return icopyspec(x, nx);
     318             : 
     319   405326075 :   if (n > 0)
     320             :   {
     321   255354990 :     GEN z = (GEN)avma;
     322   255354990 :     long d = dvmdsBIL(n, &m);
     323             : 
     324   255354990 :     ny = nx+d; y = new_chunk(ny + 2); yd = y + 2;
     325   255354990 :     for ( ; d; d--) *--z = 0;
     326   255354990 :     if (!m) for (i=0; i<nx; i++) yd[i]=x[i];
     327             :     else
     328             :     {
     329   252263379 :       register const ulong sh = BITS_IN_LONG - m;
     330   252263379 :       shift_left(yd,x, 0,nx-1, 0,m);
     331   252263379 :       i = uel(x,0) >> sh;
     332             :       /* Extend y on the left? */
     333   252263379 :       if (i) { ny++; y = new_chunk(1); y[2] = i; }
     334             :     }
     335             :   }
     336             :   else
     337             :   {
     338   149971085 :     ny = nx - dvmdsBIL(-n, &m);
     339   149971085 :     if (ny<1) return gen_0;
     340   149735408 :     y = new_chunk(ny + 2); yd = y + 2;
     341   149735408 :     if (m) {
     342   126958157 :       shift_right(yd,x, 0,ny, 0,m);
     343   126958157 :       if (yd[0] == 0)
     344             :       {
     345     8522475 :         if (ny==1) { set_avma((pari_sp)(y+3)); return gen_0; }
     346     7453929 :         ny--; set_avma((pari_sp)(++y));
     347             :       }
     348             :     } else {
     349    22777251 :       for (i=0; i<ny; i++) yd[i]=x[i];
     350             :     }
     351             :   }
     352   404021852 :   y[1] = evalsigne(1)|evallgefint(ny + 2);
     353   404021852 :   y[0] = evaltyp(t_INT)|evallg(ny + 2); return y;
     354             : }
     355             : 
     356             : GEN
     357    12042798 : mantissa2nr(GEN x, long n)
     358             : { /*This is a kludge since x is not an integer*/
     359    12042798 :   long s = signe(x);
     360             :   GEN y;
     361             : 
     362    12042798 :   if(s == 0) return gen_0;
     363    12041928 :   y = shiftispec(x + 2, lg(x) - 2, n);
     364    12041928 :   if (signe(y)) setsigne(y, s);
     365    12041928 :   return y;
     366             : }
     367             : 
     368             : GEN
     369     1449012 : truncr(GEN x)
     370             : {
     371             :   long d,e,i,s,m;
     372             :   GEN y;
     373             : 
     374     1449012 :   if ((s=signe(x)) == 0 || (e=expo(x)) < 0) return gen_0;
     375     1169271 :   d = nbits2lg(e+1); m = remsBIL(e);
     376     1169271 :   if (d > lg(x)) pari_err_PREC( "truncr (precision loss in truncation)");
     377             : 
     378     1169271 :   y=cgeti(d); y[1] = evalsigne(s) | evallgefint(d);
     379     1169271 :   if (++m == BITS_IN_LONG)
     380          90 :     for (i=2; i<d; i++) y[i]=x[i];
     381             :   else
     382     1169181 :     shift_right(y,x, 2,d,0, BITS_IN_LONG - m);
     383     1169271 :   return y;
     384             : }
     385             : 
     386             : /* integral part */
     387             : GEN
     388      556869 : floorr(GEN x)
     389             : {
     390             :   long d,e,i,lx,m;
     391             :   GEN y;
     392             : 
     393      556869 :   if (signe(x) >= 0) return truncr(x);
     394      183840 :   if ((e=expo(x)) < 0) return gen_m1;
     395       67965 :   d = nbits2lg(e+1); m = remsBIL(e);
     396       67965 :   lx=lg(x); if (d>lx) pari_err_PREC( "floorr (precision loss in truncation)");
     397       67965 :   y = new_chunk(d);
     398       67965 :   if (++m == BITS_IN_LONG)
     399             :   {
     400           6 :     for (i=2; i<d; i++) y[i]=x[i];
     401           6 :     i=d; while (i<lx && !x[i]) i++;
     402           6 :     if (i==lx) goto END;
     403             :   }
     404             :   else
     405             :   {
     406       67959 :     shift_right(y,x, 2,d,0, BITS_IN_LONG - m);
     407       67959 :     if (uel(x,d-1)<<m == 0)
     408             :     {
     409       16920 :       i=d; while (i<lx && !x[i]) i++;
     410       16920 :       if (i==lx) goto END;
     411             :     }
     412             :   }
     413             :   /* set y:=y+1 */
     414       59091 :   for (i=d-1; i>=2; i--) { uel(y,i)++; if (y[i]) goto END; }
     415           0 :   y=new_chunk(1); y[2]=1; d++;
     416             : END:
     417       67965 :   y[1] = evalsigne(-1) | evallgefint(d);
     418       67965 :   y[0] = evaltyp(t_INT) | evallg(d); return y;
     419             : }
     420             : 
     421             : INLINE int
     422  1821197897 : cmpiispec(GEN x, GEN y, long lx, long ly)
     423             : {
     424             :   long i;
     425  1821197897 :   if (lx < ly) return -1;
     426  1743633612 :   if (lx > ly) return  1;
     427  1612936846 :   i = 0; while (i<lx && x[i]==y[i]) i++;
     428  1612936846 :   if (i==lx) return 0;
     429  1514662824 :   return (uel(x,i) > uel(y,i))? 1: -1;
     430             : }
     431             : 
     432             : INLINE int
     433   115744276 : equaliispec(GEN x, GEN y, long lx, long ly)
     434             : {
     435             :   long i;
     436   115744276 :   if (lx != ly) return 0;
     437   115744243 :   i = ly-1; while (i>=0 && x[i]==y[i]) i--;
     438   115744243 :   return i < 0;
     439             : }
     440             : 
     441             : /***********************************************************************/
     442             : /**                                                                   **/
     443             : /**                          MULTIPLICATION                           **/
     444             : /**                                                                   **/
     445             : /***********************************************************************/
     446             : /* assume ny > 0 */
     447             : INLINE GEN
     448  1869055738 : muluispec(ulong x, GEN y, long ny)
     449             : {
     450  1869055738 :   GEN yd, z = (GEN)avma;
     451  1869055738 :   long lz = ny+3;
     452             :   LOCAL_HIREMAINDER;
     453             : 
     454  1869055738 :   (void)new_chunk(lz);
     455  1869055738 :   yd = y + ny; *--z = mulll(x, *--yd);
     456  1869055738 :   while (yd > y) *--z = addmul(x,*--yd);
     457  1869055738 :   if (hiremainder) *--z = hiremainder; else lz--;
     458  1869055738 :   *--z = evalsigne(1) | evallgefint(lz);
     459  1869055738 :   *--z = evaltyp(t_INT) | evallg(lz);
     460  1869055738 :   set_avma((pari_sp)z); return z;
     461             : }
     462             : 
     463             : /* a + b*|Y| */
     464             : GEN
     465      535077 : addumului(ulong a, ulong b, GEN Y)
     466             : {
     467             :   GEN yd,y,z;
     468             :   long ny,lz;
     469             :   LOCAL_HIREMAINDER;
     470             :   LOCAL_OVERFLOW;
     471             : 
     472      535077 :   if (!b || !signe(Y)) return utoi(a);
     473             : 
     474      535074 :   y = LIMBS(Y); z = (GEN)avma;
     475      535074 :   ny = NLIMBS(Y);
     476      535074 :   lz = ny+3;
     477             : 
     478      535074 :   (void)new_chunk(lz);
     479      535074 :   yd = y + ny; *--z = addll(a, mulll(b, *--yd));
     480      535074 :   if (overflow) hiremainder++; /* can't overflow */
     481      535074 :   while (yd > y) *--z = addmul(b,*--yd);
     482      535074 :   if (hiremainder) *--z = hiremainder; else lz--;
     483      535074 :   *--z = evalsigne(1) | evallgefint(lz);
     484      535074 :   *--z = evaltyp(t_INT) | evallg(lz);
     485      535074 :   set_avma((pari_sp)z); return z;
     486             : }
     487             : 
     488             : /***********************************************************************/
     489             : /**                                                                   **/
     490             : /**                          DIVISION                                 **/
     491             : /**                                                                   **/
     492             : /***********************************************************************/
     493             : 
     494             : ulong
     495  1044757225 : umodiu(GEN y, ulong x)
     496             : {
     497  1044757225 :   long sy=signe(y),ly,i;
     498             :   ulong xi;
     499             :   LOCAL_HIREMAINDER;
     500             : 
     501  1044757225 :   if (!x) pari_err_INV("umodiu",gen_0);
     502  1044757225 :   if (!sy) return 0;
     503   911868145 :   ly = lgefint(y);
     504   911868145 :   if (x <= uel(y,2))
     505             :   {
     506   356185772 :     hiremainder=0;
     507   356185772 :     if (ly==3)
     508             :     {
     509   342568654 :       hiremainder=uel(y,2)%x;
     510   342568654 :       if (!hiremainder) return 0;
     511   314848631 :       return (sy > 0)? hiremainder: x - hiremainder;
     512             :     }
     513             :   }
     514             :   else
     515             :   {
     516   555682373 :     if (ly==3) return (sy > 0)? uel(y,2): x - uel(y,2);
     517    28539587 :     hiremainder=uel(y,2); ly--; y++;
     518             :   }
     519    42156705 :   xi = get_Fl_red(x);
     520    42156705 :   for (i=2; i<ly; i++) (void)divll_pre(y[i],x,xi);
     521    42156705 :   if (!hiremainder) return 0;
     522    38288496 :   return (sy > 0)? hiremainder: x - hiremainder;
     523             : }
     524             : 
     525             : /* return |y| \/ x */
     526             : GEN
     527   115583811 : absdiviu_rem(GEN y, ulong x, ulong *rem)
     528             : {
     529             :   long ly,i;
     530             :   GEN z;
     531             :   ulong xi;
     532             :   LOCAL_HIREMAINDER;
     533             : 
     534   115583811 :   if (!x) pari_err_INV("absdiviu_rem",gen_0);
     535   115583811 :   if (!signe(y)) { *rem = 0; return gen_0; }
     536             : 
     537   115168284 :   ly = lgefint(y);
     538   115168284 :   if (x <= uel(y,2))
     539             :   {
     540    95858016 :     hiremainder=0;
     541    95858016 :     if (ly==3)
     542             :     {
     543    32810856 :       z = cgetipos(3);
     544    32810856 :       z[2] = divll(uel(y,2),x);
     545    32810856 :       *rem = hiremainder; return z;
     546             :     }
     547             :   }
     548             :   else
     549             :   {
     550    19310268 :     if (ly==3) { *rem = uel(y,2); return gen_0; }
     551    13828233 :     hiremainder = uel(y,2); ly--; y++;
     552             :   }
     553    76875393 :   xi = get_Fl_red(x);
     554    76875393 :   z = cgetipos(ly);
     555    76875393 :   for (i=2; i<ly; i++) z[i]=divll_pre(y[i],x,xi);
     556    76875393 :   *rem = hiremainder; return z;
     557             : }
     558             : 
     559             : GEN
     560    35145213 : divis_rem(GEN y, long x, long *rem)
     561             : {
     562    35145213 :   long sy=signe(y),ly,s,i;
     563             :   GEN z;
     564             :   ulong xi;
     565             :   LOCAL_HIREMAINDER;
     566             : 
     567    35145213 :   if (!x) pari_err_INV("divis_rem",gen_0);
     568    35145213 :   if (!sy) { *rem=0; return gen_0; }
     569    26028744 :   if (x<0) { s = -sy; x = -x; } else s = sy;
     570             : 
     571    26028744 :   ly = lgefint(y);
     572    26028744 :   if ((ulong)x <= uel(y,2))
     573             :   {
     574    16679916 :     hiremainder=0;
     575    16679916 :     if (ly==3)
     576             :     {
     577    16347027 :       z = cgeti(3); z[1] = evallgefint(3) | evalsigne(s);
     578    16347027 :       z[2] = divll(uel(y,2),x);
     579    16347027 :       if (sy<0) hiremainder = - ((long)hiremainder);
     580    16347027 :       *rem = (long)hiremainder; return z;
     581             :     }
     582             :   }
     583             :   else
     584             :   {
     585     9348828 :     if (ly==3) { *rem = itos(y); return gen_0; }
     586       98415 :     hiremainder = uel(y,2); ly--; y++;
     587             :   }
     588      431304 :   xi = get_Fl_red(x);
     589      431304 :   z = cgeti(ly); z[1] = evallgefint(ly) | evalsigne(s);
     590      431304 :   for (i=2; i<ly; i++) z[i]=divll_pre(y[i],x,xi);
     591      431304 :   if (sy<0) hiremainder = - ((long)hiremainder);
     592      431304 :   *rem = (long)hiremainder; return z;
     593             : }
     594             : 
     595             : GEN
     596      869109 : divis(GEN y, long x)
     597             : {
     598      869109 :   long sy=signe(y),ly,s,i;
     599             :   ulong xi;
     600             :   GEN z;
     601             :   LOCAL_HIREMAINDER;
     602             : 
     603      869109 :   if (!x) pari_err_INV("divis",gen_0);
     604      869109 :   if (!sy) return gen_0;
     605      869109 :   if (x<0) { s = -sy; x = -x; } else s = sy;
     606             : 
     607      869109 :   ly = lgefint(y);
     608      869109 :   if ((ulong)x <= uel(y,2))
     609             :   {
     610      851073 :     hiremainder=0;
     611      851073 :     if (ly==3)
     612             :     {
     613      689940 :       z = cgeti(3); z[1] = evallgefint(3) | evalsigne(s);
     614      689940 :       z[2] = divll(y[2],x);
     615      689940 :       return z;
     616             :     }
     617             :   }
     618             :   else
     619             :   {
     620       18036 :     if (ly==3) return gen_0;
     621       18036 :     hiremainder=y[2]; ly--; y++;
     622             :   }
     623      179169 :   xi = get_Fl_red(x);
     624      179169 :   z = cgeti(ly); z[1] = evallgefint(ly) | evalsigne(s);
     625      179169 :   for (i=2; i<ly; i++) z[i]=divll_pre(y[i],x, xi);
     626      179169 :   return z;
     627             : }
     628             : 
     629             : GEN
     630    84176916 : divrr(GEN x, GEN y)
     631             : {
     632    84176916 :   long sx=signe(x), sy=signe(y), lx,ly,lr,e,i,j;
     633             :   ulong y0,y1;
     634             :   GEN r, r1;
     635             : 
     636    84176916 :   if (!x) pari_err_INV("divrr",y);
     637    84176916 :   e = expo(x) - expo(y);
     638    84176916 :   if (!sx) return real_0_bit(e);
     639    82970856 :   if (sy<0) sx = -sx;
     640             : 
     641    82970856 :   lx=lg(x); ly=lg(y);
     642    82970856 :   if (ly==3)
     643             :   {
     644    47606553 :     ulong k = x[2], l = (lx>3)? x[3]: 0;
     645             :     LOCAL_HIREMAINDER;
     646    47606553 :     if (k < uel(y,2)) e--;
     647             :     else
     648             :     {
     649    21775881 :       l >>= 1; if (k&1) l |= HIGHBIT;
     650    21775881 :       k >>= 1;
     651             :     }
     652    47606553 :     hiremainder = k; k = divll(l,y[2]);
     653    47606553 :     if (hiremainder & HIGHBIT)
     654             :     {
     655    13234683 :       k++;
     656    13234683 :       if (!k) { k = HIGHBIT; e++; }
     657             :     }
     658    47606553 :     r = cgetr(3);
     659    47606553 :     r[1] = evalsigne(sx) | evalexpo(e);
     660    47606553 :     r[2] = k; return r;
     661             :   }
     662             : 
     663    35364303 :   lr = minss(lx,ly); r = new_chunk(lr);
     664    35364303 :   r1 = r-1;
     665    35364303 :   r1[1] = 0; for (i=2; i<lr; i++) r1[i]=x[i];
     666    35364303 :   r1[lr] = (lx>ly)? x[lr]: 0;
     667    35364303 :   y0 = y[2]; y1 = y[3];
     668   279071199 :   for (i=0; i<lr-1; i++)
     669             :   { /* r1 = r + (i-1), OK up to r1[2] (accesses at most r[lr]) */
     670             :     ulong k, qp;
     671             :     LOCAL_HIREMAINDER;
     672             :     LOCAL_OVERFLOW;
     673             : 
     674   243706896 :     if (uel(r1,1) == y0)
     675             :     {
     676      831582 :       qp = ULONG_MAX; k = addll(y0,r1[2]);
     677             :     }
     678             :     else
     679             :     {
     680   242875314 :       if (uel(r1,1) > y0) /* can't happen if i=0 */
     681             :       {
     682           0 :         GEN y1 = y+1;
     683           0 :         j = lr-i; r1[j] = subll(r1[j],y1[j]);
     684           0 :         for (j--; j>0; j--) r1[j] = subllx(r1[j],y1[j]);
     685           0 :         j=i; do uel(r,--j)++; while (j && !uel(r,j));
     686             :       }
     687   242875314 :       hiremainder = r1[1]; overflow = 0;
     688   242875314 :       qp = divll(r1[2],y0); k = hiremainder;
     689             :     }
     690   243706896 :     j = lr-i+1;
     691   243706896 :     if (!overflow)
     692             :     {
     693             :       long k3, k4;
     694   242995602 :       k3 = mulll(qp,y1);
     695   242995602 :       if (j == 3) /* i = lr - 2 maximal, r1[3] undefined -> 0 */
     696    35335185 :         k4 = subll(hiremainder,k);
     697             :       else
     698             :       {
     699   207660417 :         k3 = subll(k3, r1[3]);
     700   207660417 :         k4 = subllx(hiremainder,k);
     701             :       }
     702   242995602 :       while (!overflow && k4) { qp--; k3 = subll(k3,y1); k4 = subllx(k4,y0); }
     703             :     }
     704   243706896 :     if (j<ly) (void)mulll(qp,y[j]); else { hiremainder = 0 ; j = ly; }
     705  3440246817 :     for (j--; j>1; j--)
     706             :     {
     707  3196539921 :       r1[j] = subll(r1[j], addmul(qp,y[j]));
     708  3196539921 :       hiremainder += overflow;
     709             :     }
     710   243706896 :     if (uel(r1,1) != hiremainder)
     711             :     {
     712      242148 :       if (uel(r1,1) < hiremainder)
     713             :       {
     714      242148 :         qp--;
     715      242148 :         j = lr-i-(lr-i>=ly); r1[j] = addll(r1[j], y[j]);
     716      242148 :         for (j--; j>1; j--) r1[j] = addllx(r1[j], y[j]);
     717             :       }
     718             :       else
     719             :       {
     720           0 :         r1[1] -= hiremainder;
     721           0 :         while (r1[1])
     722             :         {
     723           0 :           qp++; if (!qp) { j=i; do uel(r,--j)++; while (j && !r[j]); }
     724           0 :           j = lr-i-(lr-i>=ly); r1[j] = subll(r1[j],y[j]);
     725           0 :           for (j--; j>1; j--) r1[j] = subllx(r1[j],y[j]);
     726           0 :           r1[1] -= overflow;
     727             :         }
     728             :       }
     729             :     }
     730   243706896 :     *++r1 = qp;
     731             :   }
     732             :   /* i = lr-1 */
     733             :   /* round correctly */
     734    35364303 :   if (uel(r1,1) > (y0>>1))
     735             :   {
     736    17358059 :     j=i; do uel(r,--j)++; while (j && !r[j]);
     737             :   }
     738    35364303 :   r1 = r-1; for (j=i; j>=2; j--) r[j]=r1[j];
     739    35364303 :   if (r[0] == 0) e--;
     740    14362899 :   else if (r[0] == 1) { shift_right(r,r, 2,lr, 1,1); }
     741             :   else { /* possible only when rounding up to 0x2 0x0 ... */
     742           0 :     r[2] = (long)HIGHBIT; e++;
     743             :   }
     744    35364303 :   r[0] = evaltyp(t_REAL)|evallg(lr);
     745    35364303 :   r[1] = evalsigne(sx) | evalexpo(e);
     746    35364303 :   return r;
     747             : }
     748             : 
     749             : GEN
     750    24227538 : divri(GEN x, GEN y)
     751             : {
     752    24227538 :   long lx, s = signe(y);
     753             :   pari_sp av;
     754             :   GEN z;
     755             : 
     756    24227538 :   if (!s) pari_err_INV("divri",y);
     757    24227538 :   if (!signe(x)) return real_0_bit(expo(x) - expi(y));
     758    24136299 :   if (!is_bigint(y)) {
     759    20325450 :     GEN z = divru(x, y[2]);
     760    20325450 :     if (s < 0) togglesign(z);
     761    20325450 :     return z;
     762             :   }
     763     3810849 :   lx = lg(x); z = cgetr(lx); av = avma;
     764     3810849 :   affrr(divrr(x, itor(y, lx+1)), z);
     765     3810849 :   set_avma(av); return z;
     766             : }
     767             : 
     768             : /* Integer division x / y: such that sign(r) = sign(x)
     769             :  *   if z = ONLY_REM return remainder, otherwise return quotient
     770             :  *   if z != NULL set *z to remainder
     771             :  *   *z is the last object on stack (and thus can be disposed of with cgiv
     772             :  *   instead of gerepile)
     773             :  * If *z is zero, we put gen_0 here and no copy.
     774             :  * space needed: lx + ly */
     775             : GEN
     776   556615065 : dvmdii(GEN x, GEN y, GEN *z)
     777             : {
     778   556615065 :   long sx = signe(x), sy = signe(y);
     779   556615065 :   long lx, ly = lgefint(y), lz, i, j, sh, lq, lr;
     780             :   pari_sp av;
     781             :   ulong y0,y0i,y1, *xd,*rd,*qd;
     782             :   GEN q, r, r1;
     783             : 
     784   556615065 :   if (!sx)
     785             :   {
     786    17473869 :     if (ly < 3) pari_err_INV("dvmdii",gen_0);
     787    17473866 :     if (!z || z == ONLY_REM) return gen_0;
     788     6961635 :     *z=gen_0; return gen_0;
     789             :   }
     790   539141196 :   if (ly <= 3)
     791             :   {
     792             :     ulong rem;
     793   303812592 :     if (ly < 3) pari_err_INV("dvmdii",gen_0);
     794   303812592 :     if (z == ONLY_REM)
     795             :     {
     796   260137149 :       rem = umodiu(x,uel(y,2));
     797   260137149 :       if (!rem) return gen_0;
     798   236190562 :       return (sx < 0)? utoineg(uel(y,2) - rem): utoipos(rem);
     799             :     }
     800    43675443 :     q = absdiviu_rem(x, uel(y,2), &rem);
     801    43675443 :     if (sx != sy) togglesign(q);
     802    43675443 :     if (!z) return q;
     803    40799199 :     if (!rem) *z = gen_0;
     804    19323873 :     else *z = sx < 0? utoineg(rem): utoipos(rem);
     805    40799199 :     return q;
     806             :   }
     807   235328604 :   lx=lgefint(x);
     808   235328604 :   lz=lx-ly;
     809   235328604 :   if (lz <= 0)
     810             :   {
     811   102527118 :     if (lz == 0)
     812             :     {
     813    93575061 :       for (i=2; i<lx; i++)
     814    93273774 :         if (x[i] != y[i])
     815             :         {
     816    89132496 :           if (uel(x,i) > uel(y,i)) goto DIVIDE;
     817    32520981 :           goto TRIVIAL;
     818             :         }
     819      301287 :       if (z == ONLY_REM) return gen_0;
     820       13545 :       if (z) *z = gen_0;
     821       13545 :       if (sx < 0) sy = -sy;
     822       13545 :       return stoi(sy);
     823             :     }
     824             : TRIVIAL:
     825    45614316 :     if (z == ONLY_REM) return icopy(x);
     826      668025 :     if (z) *z = icopy(x);
     827      668025 :     return gen_0;
     828             :   }
     829             : DIVIDE: /* quotient is non-zero */
     830   189413001 :   av=avma; if (sx<0) sy = -sy;
     831   189413001 :   r1 = new_chunk(lx); sh = bfffo(y[2]);
     832   189413001 :   if (sh)
     833             :   { /* normalize so that highbit(y) = 1 (shift left x and y by sh bits)*/
     834   181109874 :     register const ulong m = BITS_IN_LONG - sh;
     835   181109874 :     r = new_chunk(ly);
     836   181109874 :     shift_left(r, y,2,ly-1, 0,sh); y = r;
     837   181109874 :     shift_left(r1,x,2,lx-1, 0,sh);
     838   181109874 :     r1[1] = uel(x,2) >> m;
     839             :   }
     840             :   else
     841             :   {
     842     8303127 :     r1[1] = 0; for (j=2; j<lx; j++) r1[j] = x[j];
     843             :   }
     844   189413001 :   x = r1;
     845   189413001 :   y0 = y[2]; y0i = get_Fl_red(y0);
     846   189413001 :   y1 = y[3];
     847  1065608115 :   for (i=0; i<=lz; i++)
     848             :   { /* r1 = x + i */
     849             :     ulong k, qp;
     850             :     LOCAL_HIREMAINDER;
     851             :     LOCAL_OVERFLOW;
     852             : 
     853   876195114 :     if (uel(r1,1) == y0)
     854             :     {
     855       82607 :       qp = ULONG_MAX; k = addll(y0,r1[2]);
     856             :     }
     857             :     else
     858             :     {
     859   876112507 :       hiremainder = r1[1]; overflow = 0;
     860   876112507 :       qp = divll_pre(r1[2],y0,y0i); k = hiremainder;
     861             :     }
     862   876195114 :     if (!overflow)
     863             :     {
     864   876122763 :       long k3 = subll(mulll(qp,y1), r1[3]);
     865   876122763 :       long k4 = subllx(hiremainder,k);
     866   876122763 :       while (!overflow && k4) { qp--; k3 = subll(k3,y1); k4 = subllx(k4,y0); }
     867             :     }
     868   876195114 :     hiremainder = 0; j = ly;
     869 22334694825 :     for (j--; j>1; j--)
     870             :     {
     871 21458499711 :       r1[j] = subll(r1[j], addmul(qp,y[j]));
     872 21458499711 :       hiremainder += overflow;
     873             :     }
     874   876195114 :     if (uel(r1,1) < hiremainder)
     875             :     {
     876      517822 :       qp--;
     877      517822 :       j = ly-1; r1[j] = addll(r1[j],y[j]);
     878      517822 :       for (j--; j>1; j--) r1[j] = addllx(r1[j],y[j]);
     879             :     }
     880   876195114 :     *++r1 = qp;
     881             :   }
     882             : 
     883   189413001 :   lq = lz+2;
     884   189413001 :   if (!z)
     885             :   {
     886      475401 :     qd = (ulong*)av;
     887      475401 :     xd = (ulong*)(x + lq);
     888      475401 :     if (x[1]) { lz++; lq++; }
     889      475401 :     while (lz--) *--qd = *--xd;
     890      475401 :     *--qd = evalsigne(sy) | evallgefint(lq);
     891      475401 :     *--qd = evaltyp(t_INT) | evallg(lq);
     892      475401 :     set_avma((pari_sp)qd); return (GEN)qd;
     893             :   }
     894             : 
     895   188937600 :   j=lq; while (j<lx && !x[j]) j++;
     896   188937600 :   lz = lx-j;
     897   188937600 :   if (z == ONLY_REM)
     898             :   {
     899   141421842 :     if (lz==0) { set_avma(av); return gen_0; }
     900   138597801 :     rd = (ulong*)av; lr = lz+2;
     901   138597801 :     xd = (ulong*)(x + lx);
     902   138597801 :     if (!sh) while (lz--) *--rd = *--xd;
     903             :     else
     904             :     { /* shift remainder right by sh bits */
     905   130356423 :       const ulong shl = BITS_IN_LONG - sh;
     906             :       ulong l;
     907   130356423 :       xd--;
     908   755553426 :       while (--lz) /* fill r[3..] */
     909             :       {
     910   494840580 :         l = *xd >> sh;
     911   494840580 :         *--rd = l | (*--xd << shl);
     912             :       }
     913   130356423 :       l = *xd >> sh;
     914   130356423 :       if (l) *--rd = l; else lr--;
     915             :     }
     916   138597801 :     *--rd = evalsigne(sx) | evallgefint(lr);
     917   138597801 :     *--rd = evaltyp(t_INT) | evallg(lr);
     918   138597801 :     set_avma((pari_sp)rd); return (GEN)rd;
     919             :   }
     920             : 
     921    47515758 :   lr = lz+2;
     922    47515758 :   rd = NULL; /* gcc -Wall */
     923    47515758 :   if (lz)
     924             :   { /* non zero remainder: initialize rd */
     925    46119843 :     xd = (ulong*)(x + lx);
     926    46119843 :     if (!sh)
     927             :     {
     928       26133 :       rd = (ulong*)avma; (void)new_chunk(lr);
     929       26133 :       while (lz--) *--rd = *--xd;
     930             :     }
     931             :     else
     932             :     { /* shift remainder right by sh bits */
     933    46093710 :       const ulong shl = BITS_IN_LONG - sh;
     934             :       ulong l;
     935    46093710 :       rd = (ulong*)x; /* overwrite shifted y */
     936    46093710 :       xd--;
     937   226618374 :       while (--lz)
     938             :       {
     939   134430954 :         l = *xd >> sh;
     940   134430954 :         *--rd = l | (*--xd << shl);
     941             :       }
     942    46093710 :       l = *xd >> sh;
     943    46093710 :       if (l) *--rd = l; else lr--;
     944             :     }
     945    46119843 :     *--rd = evalsigne(sx) | evallgefint(lr);
     946    46119843 :     *--rd = evaltyp(t_INT) | evallg(lr);
     947    46119843 :     rd += lr;
     948             :   }
     949    47515758 :   qd = (ulong*)av;
     950    47515758 :   xd = (ulong*)(x + lq);
     951    47515758 :   if (x[1]) lq++;
     952    47515758 :   j = lq-2; while (j--) *--qd = *--xd;
     953    47515758 :   *--qd = evalsigne(sy) | evallgefint(lq);
     954    47515758 :   *--qd = evaltyp(t_INT) | evallg(lq);
     955    47515758 :   q = (GEN)qd;
     956    47515758 :   if (lr==2) *z = gen_0;
     957             :   else
     958             :   { /* rd has been properly initialized: we had lz > 0 */
     959    46119843 :     while (lr--) *--qd = *--rd;
     960    46119843 :     *z = (GEN)qd;
     961             :   }
     962    47515758 :   set_avma((pari_sp)qd); return q;
     963             : }
     964             : 
     965             : /* Montgomery reduction.
     966             :  * N has k words, assume T >= 0 has less than 2k.
     967             :  * Return res := T / B^k mod N, where B = 2^BIL
     968             :  * such that 0 <= res < T/B^k + N  and  res has less than k words */
     969             : GEN
     970    21574278 : red_montgomery(GEN T, GEN N, ulong inv)
     971             : {
     972             :   pari_sp av;
     973             :   GEN Te, Td, Ne, Nd, scratch;
     974    21574278 :   ulong i, j, m, t, d, k = NLIMBS(N);
     975             :   int carry;
     976             :   LOCAL_HIREMAINDER;
     977             :   LOCAL_OVERFLOW;
     978             : 
     979    21574278 :   if (k == 0) return gen_0;
     980    21574278 :   d = NLIMBS(T); /* <= 2*k */
     981    21574278 :   if (d == 0) return gen_0;
     982             : #ifdef DEBUG
     983             :   if (d > 2*k) pari_err_BUG("red_montgomery");
     984             : #endif
     985    21574269 :   if (k == 1)
     986             :   { /* as below, special cased for efficiency */
     987         492 :     ulong n = uel(N,2);
     988         492 :     if (d == 1) {
     989         492 :       hiremainder = uel(T,2);
     990         492 :       m = hiremainder * inv;
     991         492 :       (void)addmul(m, n); /* t + m*n = 0 */
     992         492 :       return utoi(hiremainder);
     993             :     } else { /* d = 2 */
     994           0 :       hiremainder = uel(T,3);
     995           0 :       m = hiremainder * inv;
     996           0 :       (void)addmul(m, n); /* t + m*n = 0 */
     997           0 :       t = addll(hiremainder, uel(T,2));
     998           0 :       if (overflow) t -= n; /* t > n doesn't fit in 1 word */
     999           0 :       return utoi(t);
    1000             :     }
    1001             :   }
    1002             :   /* assume k >= 2 */
    1003    21573777 :   av = avma; scratch = new_chunk(k<<1); /* >= k + 2: result fits */
    1004             : 
    1005             :   /* copy T to scratch space (pad with zeroes to 2k words) */
    1006    21573777 :   Td = (GEN)av;
    1007    21573777 :   Te = T + (d+2);
    1008    21573777 :   for (i=0; i < d     ; i++) *--Td = *--Te;
    1009    21573777 :   for (   ; i < (k<<1); i++) *--Td = 0;
    1010             : 
    1011    21573777 :   Te = (GEN)av; /* 1 beyond end of current T mantissa (in scratch) */
    1012    21573777 :   Ne = N + k+2; /* 1 beyond end of N mantissa */
    1013             : 
    1014    21573777 :   carry = 0;
    1015   323577450 :   for (i=0; i<k; i++) /* set T := T/B nod N, k times */
    1016             :   {
    1017   302003673 :     Td = Te; /* one beyond end of (new) T mantissa */
    1018   302003673 :     Nd = Ne;
    1019   302003673 :     hiremainder = *--Td;
    1020   302003673 :     m = hiremainder * inv; /* solve T + m N = O(B) */
    1021             : 
    1022             :     /* set T := (T + mN) / B */
    1023   302003673 :     Te = Td;
    1024   302003673 :     (void)addmul(m, *--Nd); /* = 0 */
    1025  5622632307 :     for (j=1; j<k; j++)
    1026             :     {
    1027  5320628634 :       t = addll(addmul(m, *--Nd), *--Td);
    1028  5320628634 :       *Td = t;
    1029  5320628634 :       hiremainder += overflow;
    1030             :     }
    1031   302003673 :     t = addll(hiremainder, *--Td); *Td = t + carry;
    1032   302003673 :     carry = (overflow || (carry && *Td == 0));
    1033             :   }
    1034    21573777 :   if (carry)
    1035             :   { /* Td > N overflows (k+1 words), set Td := Td - N */
    1036      348417 :     Td = Te;
    1037      348417 :     Nd = Ne;
    1038      348417 :     t = subll(*--Td, *--Nd); *Td = t;
    1039      348417 :     while (Td > scratch) { t = subllx(*--Td, *--Nd); *Td = t; }
    1040             :   }
    1041             : 
    1042             :   /* copy result */
    1043    21573777 :   Td = (GEN)av;
    1044    21573777 :   while (*scratch == 0 && Te > scratch) scratch++; /* strip leading 0s */
    1045    21573777 :   while (Te > scratch) *--Td = *--Te;
    1046    21573777 :   k = (GEN)av - Td; if (!k) { set_avma(av); return gen_0; }
    1047    21573777 :   k += 2;
    1048    21573777 :   *--Td = evalsigne(1) | evallgefint(k);
    1049    21573777 :   *--Td = evaltyp(t_INT) | evallg(k);
    1050             : #ifdef DEBUG
    1051             : {
    1052             :   long l = NLIMBS(N), s = BITS_IN_LONG*l;
    1053             :   GEN R = int2n(s);
    1054             :   GEN res = remii(mulii(T, Fp_inv(R, N)), N);
    1055             :   if (k > lgefint(N)
    1056             :     || !equalii(remii(Td,N),res)
    1057             :     || cmpii(Td, addii(shifti(T, -s), N)) >= 0) pari_err_BUG("red_montgomery");
    1058             : }
    1059             : #endif
    1060    21573777 :   set_avma((pari_sp)Td); return Td;
    1061             : }
    1062             : 
    1063             : /* EXACT INTEGER DIVISION */
    1064             : 
    1065             : /* assume xy>0, the division is exact and y is odd. Destroy x */
    1066             : static GEN
    1067    17184390 : diviuexact_i(GEN x, ulong y)
    1068             : {
    1069             :   long i, lz, lx;
    1070             :   ulong q, yinv;
    1071             :   GEN z, z0, x0, x0min;
    1072             : 
    1073    17184390 :   if (y == 1) return icopy(x);
    1074    13632915 :   lx = lgefint(x);
    1075    13632915 :   if (lx == 3)
    1076             :   {
    1077      556620 :     q = uel(x,2) / y;
    1078      556620 :     if (!q) pari_err_OP("exact division", x, utoi(y));
    1079      556620 :     return utoipos(q);
    1080             :   }
    1081    13076295 :   yinv = invmod2BIL(y);
    1082    13076295 :   lz = (y <= uel(x,2)) ? lx : lx-1;
    1083    13076295 :   z = new_chunk(lz);
    1084    13076295 :   z0 = z + lz;
    1085    13076295 :   x0 = x + lx; x0min = x + lx-lz+2;
    1086             : 
    1087    54939153 :   while (x0 > x0min)
    1088             :   {
    1089    28786563 :     *--z0 = q = yinv*uel(--x0,0); /* i-th quotient */
    1090    28786563 :     if (!q) continue;
    1091             :     /* x := x - q * y */
    1092             :     { /* update neither lowest word (could set it to 0) nor highest ones */
    1093    27747351 :       register GEN x1 = x0 - 1;
    1094             :       LOCAL_HIREMAINDER;
    1095    27747351 :       (void)mulll(q,y);
    1096    27747351 :       if (hiremainder)
    1097             :       {
    1098    23692290 :         if (uel(x1,0) < hiremainder)
    1099             :         {
    1100      881724 :           uel(x1,0) -= hiremainder;
    1101      883551 :           do uel(--x1,0)--; while (uel(x1,0) == ULONG_MAX);
    1102             :         }
    1103             :         else
    1104    22810566 :           uel(x1,0) -= hiremainder;
    1105             :       }
    1106             :     }
    1107             :   }
    1108    13076295 :   i=2; while(!z[i]) i++;
    1109    13076295 :   z += i-2; lz -= i-2;
    1110    13076295 :   z[0] = evaltyp(t_INT)|evallg(lz);
    1111    13076295 :   z[1] = evalsigne(1)|evallg(lz);
    1112    13076295 :   if (lz == 2) pari_err_OP("exact division", x, utoi(y));
    1113    13076295 :   set_avma((pari_sp)z); return z;
    1114             : }
    1115             : 
    1116             : /* assume y != 0 and the division is exact */
    1117             : GEN
    1118    25278387 : diviuexact(GEN x, ulong y)
    1119             : {
    1120             :   pari_sp av;
    1121    25278387 :   long lx, vy, s = signe(x);
    1122             :   GEN z;
    1123             : 
    1124    25278387 :   if (!s) return gen_0;
    1125    25074699 :   if (y == 1) return icopy(x);
    1126    25059711 :   lx = lgefint(x);
    1127    25059711 :   if (lx == 3) {
    1128    23796921 :     ulong q = uel(x,2) / y;
    1129    23796921 :     if (!q) pari_err_OP("exact division", x, utoi(y));
    1130    23796921 :     return (s > 0)? utoipos(q): utoineg(q);
    1131             :   }
    1132     1262790 :   av = avma; (void)new_chunk(lx); vy = vals(y);
    1133     1262790 :   if (vy) {
    1134      662916 :     y >>= vy;
    1135      662916 :     if (y == 1) { set_avma(av); return shifti(x, -vy); }
    1136      602340 :     x = shifti(x, -vy);
    1137      602340 :     if (lx == 3) {
    1138           0 :       ulong q = uel(x,2) / y;
    1139           0 :       set_avma(av);
    1140           0 :       if (!q) pari_err_OP("exact division", x, utoi(y));
    1141           0 :       return (s > 0)? utoipos(q): utoineg(q);
    1142             :     }
    1143      599874 :   } else x = icopy(x);
    1144     1202214 :   set_avma(av);
    1145     1202214 :   z = diviuexact_i(x, y);
    1146     1202214 :   setsigne(z, s); return z;
    1147             : }
    1148             : 
    1149             : /* Find z such that x=y*z, knowing that y | x (unchecked)
    1150             :  * Method: y0 z0 = x0 mod B = 2^BITS_IN_LONG ==> z0 = 1/y0 mod B.
    1151             :  *    Set x := (x - z0 y) / B, updating only relevant words, and repeat */
    1152             : GEN
    1153   231209306 : diviiexact(GEN x, GEN y)
    1154             : {
    1155   231209306 :   long lx, ly, lz, vy, i, ii, sx = signe(x), sy = signe(y);
    1156             :   pari_sp av;
    1157             :   ulong y0inv,q;
    1158             :   GEN z;
    1159             : 
    1160   231209306 :   if (!sy) pari_err_INV("diviiexact",gen_0);
    1161   231209306 :   if (!sx) return gen_0;
    1162   190716989 :   lx = lgefint(x);
    1163   190716989 :   if (lx == 3) {
    1164   164985305 :     q = uel(x,2) / uel(y,2);
    1165   164985305 :     if (!q) pari_err_OP("exact division", x, y);
    1166   164985296 :     return (sx+sy) ? utoipos(q): utoineg(q);
    1167             :   }
    1168    25731684 :   vy = vali(y); av = avma;
    1169    25731684 :   (void)new_chunk(lx); /* enough room for z */
    1170    25731684 :   if (vy)
    1171             :   { /* make y odd */
    1172    11851701 :     y = shifti(y,-vy);
    1173    11851701 :     x = shifti(x,-vy); lx = lgefint(x);
    1174             :   }
    1175    13879983 :   else x = icopy(x); /* necessary because we destroy x */
    1176    25731684 :   set_avma(av); /* will erase our x,y when exiting */
    1177             :   /* now y is odd */
    1178    25731684 :   ly = lgefint(y);
    1179    25731684 :   if (ly == 3)
    1180             :   {
    1181    15982176 :     z = diviuexact_i(x,uel(y,2)); /* x != 0 */
    1182    15982176 :     setsigne(z, (sx+sy)? 1: -1); return z;
    1183             :   }
    1184     9749508 :   y0inv = invmod2BIL(y[ly-1]);
    1185     9749508 :   i=2; while (i<ly && y[i]==x[i]) i++;
    1186     9749508 :   lz = (i==ly || uel(y,i) < uel(x,i)) ? lx-ly+3 : lx-ly+2;
    1187     9749508 :   z = new_chunk(lz);
    1188             : 
    1189     9749508 :   y += ly - 1; /* now y[-i] = i-th word of y */
    1190    45632892 :   for (ii=lx-1,i=lz-1; i>=2; i--,ii--)
    1191             :   {
    1192             :     long limj;
    1193             :     LOCAL_HIREMAINDER;
    1194             :     LOCAL_OVERFLOW;
    1195             : 
    1196    35883384 :     z[i] = q = y0inv*uel(x,ii); /* i-th quotient */
    1197    35883384 :     if (!q) continue;
    1198             : 
    1199             :     /* x := x - q * y */
    1200    35815383 :     (void)mulll(q,y[0]); limj = maxss(lx - lz, ii+3-ly);
    1201             :     { /* update neither lowest word (could set it to 0) nor highest ones */
    1202    35815383 :       register GEN x0 = x + (ii - 1), y0 = y - 1, xlim = x + limj;
    1203   194402658 :       for (; x0 >= xlim; x0--, y0--)
    1204             :       {
    1205   158587275 :         *x0 = subll(*x0, addmul(q,*y0));
    1206   158587275 :         hiremainder += overflow;
    1207             :       }
    1208    35815383 :       if (hiremainder && limj != lx - lz)
    1209             :       {
    1210    18251850 :         if ((ulong)*x0 < hiremainder)
    1211             :         {
    1212      214992 :           *x0 -= hiremainder;
    1213      214992 :           do (*--x0)--; while ((ulong)*x0 == ULONG_MAX);
    1214             :         }
    1215             :         else
    1216    18036858 :           *x0 -= hiremainder;
    1217             :       }
    1218             :     }
    1219             :   }
    1220     9749508 :   i=2; while(!z[i]) i++;
    1221     9749508 :   z += i-2; lz -= (i-2);
    1222     9749508 :   z[0] = evaltyp(t_INT)|evallg(lz);
    1223     9749508 :   z[1] = evalsigne((sx+sy)? 1: -1) | evallg(lz);
    1224     9749508 :   if (lz == 2) pari_err_OP("exact division", x, y);
    1225     9749508 :   set_avma((pari_sp)z); return z;
    1226             : }
    1227             : 
    1228             : /* assume yz != and yz | x */
    1229             : GEN
    1230      475569 : diviuuexact(GEN x, ulong y, ulong z)
    1231             : {
    1232             :   long tmp[4];
    1233             :   ulong t;
    1234             :   LOCAL_HIREMAINDER;
    1235      475569 :   t = mulll(y, z);
    1236      475569 :   if (!hiremainder) return diviuexact(x, t);
    1237           0 :   tmp[0] = evaltyp(t_INT)|_evallg(4);
    1238           0 :   tmp[1] = evalsigne(1)|evallgefint(4);
    1239           0 :   tmp[2] = hiremainder;
    1240           0 :   tmp[3] = t;
    1241           0 :   return diviiexact(x, tmp);
    1242             : }
    1243             : 
    1244             : /********************************************************************/
    1245             : /**                                                                **/
    1246             : /**               INTEGER MULTIPLICATION (BASECASE)                **/
    1247             : /**                                                                **/
    1248             : /********************************************************************/
    1249             : /* nx >= ny = num. of digits of x, y (not GEN, see mulii) */
    1250             : INLINE GEN
    1251  1892867702 : muliispec_basecase(GEN x, GEN y, long nx, long ny)
    1252             : {
    1253             :   GEN z2e,z2d,yd,xd,ye,zd;
    1254             :   long p1,lz;
    1255             :   LOCAL_HIREMAINDER;
    1256             : 
    1257  1892867702 :   if (ny == 1) return muluispec((ulong)*y, x, nx);
    1258   561400752 :   if (ny == 0) return gen_0;
    1259   560783727 :   zd = (GEN)avma; lz = nx+ny+2;
    1260   560783727 :   (void)new_chunk(lz);
    1261   560783727 :   xd = x + nx;
    1262   560783727 :   yd = y + ny;
    1263   560783727 :   ye = yd; p1 = *--xd;
    1264             : 
    1265   560783727 :   *--zd = mulll(p1, *--yd); z2e = zd;
    1266   560783727 :   while (yd > y) *--zd = addmul(p1, *--yd);
    1267   560783727 :   *--zd = hiremainder;
    1268             : 
    1269  4383153162 :   while (xd > x)
    1270             :   {
    1271             :     LOCAL_OVERFLOW;
    1272  3261585708 :     yd = ye; p1 = *--xd;
    1273             : 
    1274  3261585708 :     z2d = --z2e;
    1275  3261585708 :     *z2d = addll(mulll(p1, *--yd), *z2d); z2d--;
    1276 40407023448 :     while (yd > y)
    1277             :     {
    1278 33883852032 :       hiremainder += overflow;
    1279 33883852032 :       *z2d = addll(addmul(p1, *--yd), *z2d); z2d--;
    1280             :     }
    1281  3261585708 :     *--zd = hiremainder + overflow;
    1282             :   }
    1283   560783727 :   if (*zd == 0) { zd++; lz--; } /* normalize */
    1284   560783727 :   *--zd = evalsigne(1) | evallgefint(lz);
    1285   560783727 :   *--zd = evaltyp(t_INT) | evallg(lz);
    1286   560783727 :   set_avma((pari_sp)zd); return zd;
    1287             : }
    1288             : 
    1289             : INLINE GEN
    1290   610064763 : sqrispec_basecase(GEN x, long nx)
    1291             : {
    1292             :   GEN z2e,z2d,yd,xd,zd,x0,z0;
    1293             :   long p1,lz;
    1294             :   LOCAL_HIREMAINDER;
    1295             :   LOCAL_OVERFLOW;
    1296             : 
    1297   610064763 :   if (nx == 1) return sqru((ulong)*x);
    1298   520397325 :   if (nx == 0) return gen_0;
    1299   114882630 :   zd = (GEN)avma; lz = (nx+1) << 1;
    1300   114882630 :   z0 = new_chunk(lz);
    1301   114882630 :   if (nx == 1)
    1302             :   {
    1303           0 :     *--zd = mulll(*x, *x);
    1304           0 :     *--zd = hiremainder; goto END;
    1305             :   }
    1306   114882630 :   xd = x + nx;
    1307             : 
    1308             :   /* compute double products --> zd */
    1309   114882630 :   p1 = *--xd; yd = xd; --zd;
    1310   114882630 :   *--zd = mulll(p1, *--yd); z2e = zd;
    1311   114882630 :   while (yd > x) *--zd = addmul(p1, *--yd);
    1312   114882630 :   *--zd = hiremainder;
    1313             : 
    1314   114882630 :   x0 = x+1;
    1315  1159882287 :   while (xd > x0)
    1316             :   {
    1317             :     LOCAL_OVERFLOW;
    1318   930117027 :     p1 = *--xd; yd = xd;
    1319             : 
    1320   930117027 :     z2e -= 2; z2d = z2e;
    1321   930117027 :     *z2d = addll(mulll(p1, *--yd), *z2d); z2d--;
    1322  9119269239 :     while (yd > x)
    1323             :     {
    1324  7259035185 :       hiremainder += overflow;
    1325  7259035185 :       *z2d = addll(addmul(p1, *--yd), *z2d); z2d--;
    1326             :     }
    1327   930117027 :     *--zd = hiremainder + overflow;
    1328             :   }
    1329             :   /* multiply zd by 2 (put result in zd - 1) */
    1330   114882630 :   zd[-1] = ((*zd & HIGHBIT) != 0);
    1331   114882630 :   shift_left(zd, zd, 0, (nx<<1)-3, 0, 1);
    1332             : 
    1333             :   /* add the squares */
    1334   114882630 :   xd = x + nx; zd = z0 + lz;
    1335   114882630 :   p1 = *--xd;
    1336   114882630 :   zd--; *zd = mulll(p1,p1);
    1337   114882630 :   zd--; *zd = addll(hiremainder, *zd);
    1338  1274764917 :   while (xd > x)
    1339             :   {
    1340  1044999657 :     p1 = *--xd;
    1341  1044999657 :     zd--; *zd = addll(mulll(p1,p1)+ overflow, *zd);
    1342  1044999657 :     zd--; *zd = addll(hiremainder + overflow, *zd);
    1343             :   }
    1344             : 
    1345             : END:
    1346   114882630 :   if (*zd == 0) { zd++; lz--; } /* normalize */
    1347   114882630 :   *--zd = evalsigne(1) | evallgefint(lz);
    1348   114882630 :   *--zd = evaltyp(t_INT) | evallg(lz);
    1349   114882630 :   set_avma((pari_sp)zd); return zd;
    1350             : }
    1351             : 
    1352             : /********************************************************************/
    1353             : /**                                                                **/
    1354             : /**               INTEGER MULTIPLICATION (FFT)                     **/
    1355             : /**                                                                **/
    1356             : /********************************************************************/
    1357             : 
    1358             : /*
    1359             :  Compute parameters for FFT:
    1360             :    len: result length
    1361             :    k: FFT depth.
    1362             :    n: number of blocks (2^k)
    1363             :    bs: block size
    1364             :    mod: Modulus is M=2^(BIL*mod)+1
    1365             :    ord: order of 2 in Z/MZ.
    1366             :  We must have:
    1367             :    bs*n >= l
    1368             :    2^(BIL*mod) > nb*2^(2*BIL*bs)
    1369             :    2^k | 2*BIL*mod
    1370             : */
    1371             : static void
    1372       11988 : mulliifft_params(long len, long *k, long *mod, long *bs, long *n, ulong *ord)
    1373             : {
    1374             :   long r;
    1375       11988 :   *k = expu((3*len)>>2)-3;
    1376             :   do {
    1377       11988 :     (*k)--;
    1378       11988 :     r  = *k-(TWOPOTBITS_IN_LONG+2);
    1379       11988 :     *n = 1L<<*k;
    1380       11988 :     *bs = (len+*n-1)>>*k;
    1381       11988 :     *mod= 2**bs+1;
    1382       11988 :     if (r>0)
    1383         606 :       *mod=((*mod+(1L<<r)-1)>>r)<<r;
    1384       11988 :   } while(*mod>=3**bs);
    1385       11988 :   *ord= 4**mod*BITS_IN_LONG;
    1386       11988 : }
    1387             : 
    1388             : /* Zf_: arithmetic in ring Z/MZ where M= 2^(BITS_IN_LONG*mod)+1
    1389             :  * for some mod.
    1390             :  * Do not garbage collect.
    1391             :  */
    1392             : 
    1393             : static GEN
    1394    24642624 : Zf_add(GEN a, GEN b, GEN M)
    1395             : {
    1396    24642624 :   GEN y, z = addii(a,b);
    1397    24642624 :   long mod = lgefint(M)-3;
    1398    24642624 :   long l = NLIMBS(z);
    1399    24642624 :   if (l<=mod) return z;
    1400     9162894 :   y = subiu(z, 1);
    1401     9162894 :   if (NLIMBS(y)<=mod) return z;
    1402     9162894 :   return int_normalize(y,1);
    1403             : }
    1404             : 
    1405             : static GEN
    1406    25001739 : Zf_sub(GEN a, GEN b, GEN M)
    1407             : {
    1408    25001739 :   GEN z = subii(a,b);
    1409    25001739 :   return signe(z)>=0? z: addii(M,z);
    1410             : }
    1411             : 
    1412             : /* destroy z */
    1413             : static GEN
    1414    51164184 : Zf_red_destroy(GEN z, GEN M)
    1415             : {
    1416    51164184 :   long mod = lgefint(M)-3;
    1417    51164184 :   long l = NLIMBS(z);
    1418             :   GEN y;
    1419    51164184 :   if (l<=mod) return z;
    1420    22562757 :   y = shifti(z, -mod*BITS_IN_LONG);
    1421    22562757 :   z = int_normalize(z, NLIMBS(y));
    1422    22562757 :   y = Zf_red_destroy(y, M);
    1423    22562757 :   z = subii(z, y);
    1424    22562757 :   if (signe(z)<0) z = addii(z, M);
    1425    22562757 :   return z;
    1426             : }
    1427             : 
    1428             : INLINE GEN
    1429    26435667 : Zf_shift(GEN a, ulong s, GEN M) { return Zf_red_destroy(shifti(a, s), M); }
    1430             : 
    1431             : /*
    1432             :  Multiply by sqrt(2)^s
    1433             :  We use the formula sqrt(2)=z_8*(1-z_4)) && z_8=2^(ord/16) [2^(ord/4)+1]
    1434             : */
    1435             : 
    1436             : static GEN
    1437    24642624 : Zf_mulsqrt2(GEN a, ulong s, ulong ord, GEN M)
    1438             : {
    1439    24642624 :   ulong hord = ord>>1;
    1440    24642624 :   if (!signe(a)) return gen_0;
    1441    23551677 :   if (odd(s)) /* Multiply by 2^(s/2) */
    1442             :   {
    1443      359115 :     GEN az8  = Zf_shift(a,   ord>>4, M);
    1444      359115 :     GEN az83 = Zf_shift(az8, ord>>3, M);
    1445      359115 :     a = Zf_sub(az8, az83, M);
    1446      359115 :     s--;
    1447             :   }
    1448    23551677 :   if (s < hord)
    1449    17368836 :     return Zf_shift(a, s>>1, M);
    1450             :   else
    1451     6182841 :     return subii(M,Zf_shift(a, (s-hord)>>1, M));
    1452             : }
    1453             : 
    1454             : INLINE GEN
    1455      269184 : Zf_sqr(GEN a, GEN M) { return Zf_red_destroy(sqri(a), M); }
    1456             : 
    1457             : INLINE GEN
    1458     1896576 : Zf_mul(GEN a, GEN b, GEN M) { return Zf_red_destroy(mulii(a,b), M); }
    1459             : 
    1460             : /* In place, bit reversing FFT */
    1461             : static void
    1462     4039542 : muliifft_dit(ulong o, ulong ord, GEN M, GEN FFT, long d, long step)
    1463             : {
    1464     4039542 :   pari_sp av = avma;
    1465             :   long i;
    1466     4039542 :   ulong j, no = (o<<1)%ord;
    1467     4039542 :   long hstep=step>>1;
    1468    20090550 :   for (i = d+1, j = 0; i <= d+hstep; ++i, j =(j+o)%ord)
    1469             :   {
    1470    16051008 :     GEN a = Zf_add(gel(FFT,i), gel(FFT,i+hstep), M);
    1471    16051008 :     GEN b = Zf_mulsqrt2(Zf_sub(gel(FFT,i), gel(FFT,i+hstep), M), j, ord, M);
    1472    16051008 :     affii(a,gel(FFT,i));
    1473    16051008 :     affii(b,gel(FFT,i+hstep));
    1474    16051008 :     set_avma(av);
    1475             :   }
    1476     4039542 :   if (hstep>1)
    1477             :   {
    1478     2008374 :     muliifft_dit(no, ord, M, FFT, d, hstep);
    1479     2008374 :     muliifft_dit(no, ord, M, FFT, d+hstep, hstep);
    1480             :   }
    1481     4039542 : }
    1482             : 
    1483             : /* In place, bit reversed FFT, inverse of muliifft_dit */
    1484             : static void
    1485     2153772 : muliifft_dis(ulong o, ulong ord, GEN M, GEN FFT, long d, long step)
    1486             : {
    1487     2153772 :   pari_sp av = avma;
    1488             :   long i;
    1489     2153772 :   ulong j, no = (o<<1)%ord;
    1490     2153772 :   long hstep=step>>1;
    1491     2153772 :   if (hstep>1)
    1492             :   {
    1493     1070892 :     muliifft_dis(no, ord, M, FFT, d, hstep);
    1494     1070892 :     muliifft_dis(no, ord, M, FFT, d+hstep, hstep);
    1495             :   }
    1496    10745388 :   for (i = d+1, j = 0; i <= d+hstep; ++i, j =(j+o)%ord)
    1497             :   {
    1498     8591616 :     GEN z = Zf_mulsqrt2(gel(FFT,i+hstep), j, ord, M);
    1499     8591616 :     GEN a = Zf_add(gel(FFT,i), z, M);
    1500     8591616 :     GEN b = Zf_sub(gel(FFT,i), z, M);
    1501     8591616 :     affii(a,gel(FFT,i));
    1502     8591616 :     affii(b,gel(FFT,i+hstep));
    1503     8591616 :     set_avma(av);
    1504             :   }
    1505     2153772 : }
    1506             : 
    1507             : static GEN
    1508       22794 : muliifft_spliti(GEN a, long na, long bs, long n, long mod)
    1509             : {
    1510       22794 :   GEN ap = a+na-1;
    1511       22794 :   GEN c  = cgetg(n+1, t_VEC);
    1512             :   long i,j;
    1513     4085130 :   for(i=1;i<=n;i++)
    1514             :   {
    1515     4062336 :     GEN z = cgeti(mod+3);
    1516     4062336 :     if (na)
    1517             :     {
    1518     2006733 :       long m = minss(bs, na), v=0;
    1519     2006733 :       GEN zp, aa=ap-m+1;
    1520     2006733 :       while (!*aa && v<m) {aa++; v++;}
    1521     2006733 :       zp = z+m-v+1;
    1522    45028554 :       for (j=v; j < m; j++)
    1523    43021821 :         *zp-- = *ap--;
    1524     2006733 :       ap -= v; na -= m;
    1525     2006733 :       z[1] = evalsigne(m!=v) | evallgefint(m-v+2);
    1526             :     } else
    1527     2055603 :       z[1] = evalsigne(0) | evallgefint(2);
    1528     4062336 :     gel(c, i) = z;
    1529             :   }
    1530       22794 :   return c;
    1531             : }
    1532             : 
    1533             : static GEN
    1534       11988 : muliifft_unspliti(GEN V, long bs, long len)
    1535             : {
    1536       11988 :   long s, i, j, l = lg(V);
    1537       11988 :   GEN a = cgeti(len);
    1538       11988 :   a[1] = evalsigne(1)|evallgefint(len);
    1539    59641284 :   for(i=2;i<len;i++)
    1540    59629296 :     a[i] = 0;
    1541     2177748 :   for(i=1, s=0; i<l; i++, s+=bs)
    1542             :   {
    1543     2165760 :     GEN  u = gel(V,i);
    1544     2165760 :     if (signe(u))
    1545             :     {
    1546     1999422 :       GEN ap = int_W(a,s);
    1547     1999422 :       GEN up = int_LSW(u);
    1548     1999422 :       long lu = NLIMBS(u);
    1549             :       LOCAL_OVERFLOW;
    1550     1999422 :       *ap = addll(*ap, *up--); ap--;
    1551   108249042 :       for (j=1; j<lu; j++)
    1552   106249620 :        { *ap = addllx(*ap, *up--); ap--; }
    1553     3998970 :       while (overflow)
    1554         126 :        { *ap = addll(*ap, 1); ap--; }
    1555             :     }
    1556             :   }
    1557       11988 :   return int_normalize(a,0);
    1558             : }
    1559             : 
    1560             : static GEN
    1561        1182 : sqrispec_fft(GEN a, long na)
    1562             : {
    1563        1182 :   pari_sp av, ltop = avma;
    1564        1182 :   long len = 2*na;
    1565             :   long k, mod, bs, n;
    1566             :   GEN  FFT, M;
    1567             :   long i;
    1568             :   ulong o, ord;
    1569             : 
    1570        1182 :   mulliifft_params(len,&k,&mod,&bs,&n,&ord);
    1571        1182 :   o = ord>>k;
    1572        1182 :   M = int2n(mod*BITS_IN_LONG);
    1573        1182 :   M[2+mod] = 1;
    1574        1182 :   FFT = muliifft_spliti(a, na, bs, n, mod);
    1575        1182 :   muliifft_dit(o, ord, M, FFT, 0, n);
    1576        1182 :   av = avma;
    1577      270366 :   for(i=1; i<=n; i++)
    1578             :   {
    1579      269184 :     affii(Zf_sqr(gel(FFT,i), M), gel(FFT,i));
    1580      269184 :     set_avma(av);
    1581             :   }
    1582        1182 :   muliifft_dis(ord-o, ord, M, FFT, 0, n);
    1583      270366 :   for(i=1; i<=n; i++)
    1584             :   {
    1585      269184 :     affii(Zf_shift(gel(FFT,i), (ord>>1)-k, M), gel(FFT,i));
    1586      269184 :     set_avma(av);
    1587             :   }
    1588        1182 :   return gerepileuptoint(ltop, muliifft_unspliti(FFT,bs,2+len));
    1589             : }
    1590             : 
    1591             : static GEN
    1592       10806 : muliispec_fft(GEN a, GEN b, long na, long nb)
    1593             : {
    1594       10806 :   pari_sp av, av2, ltop = avma;
    1595       10806 :   long len = na+nb;
    1596             :   long k, mod, bs, n;
    1597             :   GEN FFT, FFTb, M;
    1598             :   long i;
    1599             :   ulong o, ord;
    1600             : 
    1601       10806 :   mulliifft_params(len,&k,&mod,&bs,&n,&ord);
    1602       10806 :   o = ord>>k;
    1603       10806 :   M = int2n(mod*BITS_IN_LONG);
    1604       10806 :   M[2+mod] = 1;
    1605       10806 :   FFT = muliifft_spliti(a, na, bs, n, mod);
    1606       10806 :   av=avma;
    1607       10806 :   muliifft_dit(o, ord, M, FFT, 0, n);
    1608       10806 :   FFTb = muliifft_spliti(b, nb, bs, n, mod);
    1609       10806 :   av2 = avma;
    1610       10806 :   muliifft_dit(o, ord, M, FFTb, 0, n);
    1611     1907382 :   for(i=1; i<=n; i++)
    1612             :   {
    1613     1896576 :     affii(Zf_mul(gel(FFT,i), gel(FFTb,i), M), gel(FFT,i));
    1614     1896576 :     set_avma(av2);
    1615             :   }
    1616       10806 :   set_avma(av);
    1617       10806 :   muliifft_dis(ord-o, ord, M, FFT, 0, n);
    1618     1907382 :   for(i=1; i<=n; i++)
    1619             :   {
    1620     1896576 :     affii(Zf_shift(gel(FFT,i),(ord>>1)-k,M), gel(FFT,i));
    1621     1896576 :     set_avma(av);
    1622             :   }
    1623       10806 :   return gerepileuptoint(ltop, muliifft_unspliti(FFT,bs,2+len));
    1624             : }
    1625             : 
    1626             : /********************************************************************/
    1627             : /**                                                                **/
    1628             : /**               INTEGER MULTIPLICATION (KARATSUBA)               **/
    1629             : /**                                                                **/
    1630             : /********************************************************************/
    1631             : 
    1632             : /* return (x shifted left d words) + y. Assume d > 0, x > 0 and y >= 0 */
    1633             : static GEN
    1634   140497461 : addshiftw(GEN x, GEN y, long d)
    1635             : {
    1636   140497461 :   GEN z,z0,y0,yd, zd = (GEN)avma;
    1637   140497461 :   long a,lz,ly = lgefint(y);
    1638             : 
    1639   140497461 :   z0 = new_chunk(d);
    1640   140497461 :   a = ly-2; yd = y+ly;
    1641   140497461 :   if (a >= d)
    1642             :   {
    1643   138210216 :     y0 = yd-d; while (yd > y0) *--zd = *--yd; /* copy last d words of y */
    1644   138210216 :     a -= d;
    1645   138210216 :     if (a)
    1646    94216833 :       z = addiispec(LIMBS(x), LIMBS(y), NLIMBS(x), a);
    1647             :     else
    1648    43993383 :       z = icopy(x);
    1649             :   }
    1650             :   else
    1651             :   {
    1652     2287245 :     y0 = yd-a; while (yd > y0) *--zd = *--yd; /* copy last a words of y */
    1653     2287245 :     while (zd > z0) *--zd = 0;    /* complete with 0s */
    1654     2287245 :     z = icopy(x);
    1655             :   }
    1656   140497461 :   lz = lgefint(z)+d;
    1657   140497461 :   z[1] = evalsigne(1) | evallgefint(lz);
    1658   140497461 :   z[0] = evaltyp(t_INT) | evallg(lz); return z;
    1659             : }
    1660             : 
    1661             : /* Fast product (Karatsuba) of integers. a and b are "special" GENs
    1662             :  * c,c0,c1,c2 are genuine GENs.
    1663             :  */
    1664             : GEN
    1665  1936788887 : muliispec(GEN a, GEN b, long na, long nb)
    1666             : {
    1667             :   GEN a0,c,c0;
    1668             :   long n0, n0a, i;
    1669             :   pari_sp av;
    1670             : 
    1671  1936788887 :   if (na < nb) swapspec(a,b, na,nb);
    1672  1936788887 :   if (nb < MULII_KARATSUBA_LIMIT) return muliispec_basecase(a,b,na,nb);
    1673    43921185 :   if (nb >= MULII_FFT_LIMIT)      return muliispec_fft(a,b,na,nb);
    1674    43910379 :   i=(na>>1); n0=na-i; na=i;
    1675    43910379 :   av=avma; a0=a+na; n0a=n0;
    1676    43910379 :   while (n0a && !*a0) { a0++; n0a--; }
    1677             : 
    1678    43910379 :   if (n0a && nb > n0)
    1679    42342960 :   { /* nb <= na <= n0 */
    1680             :     GEN b0,c1,c2;
    1681             :     long n0b;
    1682             : 
    1683    42342960 :     nb -= n0;
    1684    42342960 :     c = muliispec(a,b,na,nb);
    1685    42342960 :     b0 = b+nb; n0b = n0;
    1686    42342960 :     while (n0b && !*b0) { b0++; n0b--; }
    1687    42342960 :     if (n0b)
    1688             :     {
    1689    41670366 :       c0 = muliispec(a0,b0, n0a,n0b);
    1690             : 
    1691    41670366 :       c2 = addiispec(a0,a, n0a,na);
    1692    41670366 :       c1 = addiispec(b0,b, n0b,nb);
    1693    41670366 :       c1 = muliispec(LIMBS(c1),LIMBS(c2), NLIMBS(c1),NLIMBS(c2));
    1694    41670366 :       c2 = addiispec(LIMBS(c0),LIMBS(c),  NLIMBS(c0),NLIMBS(c));
    1695             : 
    1696    41670366 :       c1 = subiispec(LIMBS(c1),LIMBS(c2), NLIMBS(c1),NLIMBS(c2));
    1697             :     }
    1698             :     else
    1699             :     {
    1700      672594 :       c0 = gen_0;
    1701      672594 :       c1 = muliispec(a0,b, n0a,nb);
    1702             :     }
    1703    42342960 :     c = addshiftw(c,c1, n0);
    1704             :   }
    1705             :   else
    1706             :   {
    1707     1567419 :     c = muliispec(a,b,na,nb);
    1708     1567419 :     c0 = muliispec(a0,b,n0a,nb);
    1709             :   }
    1710    43910379 :   return gerepileuptoint(av, addshiftw(c,c0, n0));
    1711             : }
    1712             : GEN
    1713      492237 : muluui(ulong x, ulong y, GEN z)
    1714             : {
    1715      492237 :   long t, s = signe(z);
    1716             :   GEN r;
    1717             :   LOCAL_HIREMAINDER;
    1718             : 
    1719      492237 :   if (!x || !y || !signe(z)) return gen_0;
    1720      491940 :   t = mulll(x,y);
    1721      491940 :   if (!hiremainder)
    1722      491940 :     r = muluispec(t, z+2, lgefint(z)-2);
    1723             :   else
    1724             :   {
    1725             :     long tmp[2];
    1726           0 :     tmp[0] = hiremainder;
    1727           0 :     tmp[1] = t;
    1728           0 :     r = muliispec(z+2,tmp,lgefint(z)-2,2);
    1729             :   }
    1730      491940 :   setsigne(r,s); return r;
    1731             : }
    1732             : 
    1733             : #define sqrispec_mirror sqrispec
    1734             : #define muliispec_mirror muliispec
    1735             : 
    1736             : /* x % (2^n), assuming n >= 0 */
    1737             : GEN
    1738     8435715 : remi2n(GEN x, long n)
    1739             : {
    1740     8435715 :   long hi,l,k,lx,ly, sx = signe(x);
    1741             :   GEN z, xd, zd;
    1742             : 
    1743     8435715 :   if (!sx || !n) return gen_0;
    1744             : 
    1745     8417565 :   k = dvmdsBIL(n, &l);
    1746     8417565 :   lx = lgefint(x);
    1747     8417565 :   if (lx < k+3) return icopy(x);
    1748             : 
    1749     8378187 :   xd = x + (lx-k-1);
    1750             :   /* x = |_|...|#|1|...|k| : copy the last l bits of # and the last k words
    1751             :    *            ^--- initial xd  */
    1752     8378187 :   hi = ((ulong)*xd) & ((1UL<<l)-1); /* last l bits of # = top bits of result */
    1753     8378187 :   if (!hi)
    1754             :   { /* strip leading zeroes from result */
    1755      279261 :     xd++; while (k && !*xd) { k--; xd++; }
    1756      279261 :     if (!k) return gen_0;
    1757       72117 :     ly = k+2; xd--;
    1758             :   }
    1759             :   else
    1760     8098926 :     ly = k+3;
    1761             : 
    1762     8171043 :   zd = z = cgeti(ly);
    1763     8171043 :   *++zd = evalsigne(sx) | evallgefint(ly);
    1764     8171043 :   if (hi) *++zd = hi;
    1765     8171043 :   for ( ;k; k--) *++zd = *++xd;
    1766     8171043 :   return z;
    1767             : }
    1768             : 
    1769             : GEN
    1770   614933172 : sqrispec(GEN a, long na)
    1771             : {
    1772             :   GEN a0,c;
    1773             :   long n0, n0a, i;
    1774             :   pari_sp av;
    1775             : 
    1776   614933172 :   if (na < SQRI_KARATSUBA_LIMIT) return sqrispec_basecase(a,na);
    1777     4868409 :   if (na >= SQRI_FFT_LIMIT) return sqrispec_fft(a,na);
    1778     4867227 :   i=(na>>1); n0=na-i; na=i;
    1779     4867227 :   av=avma; a0=a+na; n0a=n0;
    1780     4867227 :   while (n0a && !*a0) { a0++; n0a--; }
    1781     4867227 :   c = sqrispec(a,na);
    1782     4867227 :   if (n0a)
    1783             :   {
    1784     4855449 :     GEN t, c1, c0 = sqrispec(a0,n0a);
    1785             : #if 0
    1786             :     c1 = shifti(muliispec(a0,a, n0a,na),1);
    1787             : #else /* faster */
    1788     4855449 :     t = addiispec(a0,a,n0a,na);
    1789     4855449 :     t = sqrispec(LIMBS(t),NLIMBS(t));
    1790     4855449 :     c1= addiispec(LIMBS(c0),LIMBS(c), NLIMBS(c0), NLIMBS(c));
    1791     4855449 :     c1= subiispec(LIMBS(t),LIMBS(c1), NLIMBS(t), NLIMBS(c1));
    1792             : #endif
    1793     4855449 :     c = addshiftw(c,c1, n0);
    1794     4855449 :     c = addshiftw(c,c0, n0);
    1795             :   }
    1796             :   else
    1797       11778 :     c = addshiftw(c,gen_0,n0<<1);
    1798     4867227 :   return gerepileuptoint(av, c);
    1799             : }
    1800             : 
    1801             : /********************************************************************/
    1802             : /**                                                                **/
    1803             : /**                    KARATSUBA SQUARE ROOT                       **/
    1804             : /**      adapted from Paul Zimmermann's implementation of          **/
    1805             : /**      his algorithm in GMP (mpn_sqrtrem)                        **/
    1806             : /**                                                                **/
    1807             : /********************************************************************/
    1808             : 
    1809             : /* Square roots table */
    1810             : static const unsigned char approx_tab[192] = {
    1811             :   128,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,
    1812             :   143,144,144,145,146,147,148,149,150,150,151,152,153,154,155,155,
    1813             :   156,157,158,159,160,160,161,162,163,163,164,165,166,167,167,168,
    1814             :   169,170,170,171,172,173,173,174,175,176,176,177,178,178,179,180,
    1815             :   181,181,182,183,183,184,185,185,186,187,187,188,189,189,190,191,
    1816             :   192,192,193,193,194,195,195,196,197,197,198,199,199,200,201,201,
    1817             :   202,203,203,204,204,205,206,206,207,208,208,209,209,210,211,211,
    1818             :   212,212,213,214,214,215,215,216,217,217,218,218,219,219,220,221,
    1819             :   221,222,222,223,224,224,225,225,226,226,227,227,228,229,229,230,
    1820             :   230,231,231,232,232,233,234,234,235,235,236,236,237,237,238,238,
    1821             :   239,240,240,241,241,242,242,243,243,244,244,245,245,246,246,247,
    1822             :   247,248,248,249,249,250,250,251,251,252,252,253,253,254,254,255
    1823             : };
    1824             : 
    1825             : /* N[0], assume N[0] >= 2^(BIL-2).
    1826             :  * Return r,s such that s^2 + r = N, 0 <= r <= 2s */
    1827             : static void
    1828    22699946 : p_sqrtu1(ulong *N, ulong *ps, ulong *pr)
    1829             : {
    1830    22699946 :   ulong prec, r, s, q, u, n0 = N[0];
    1831             : 
    1832    22699946 :   q = n0 >> (BITS_IN_LONG - 8);
    1833             :   /* 2^6 = 64 <= q < 256 = 2^8 */
    1834    22699946 :   s = approx_tab[q - 64];                                /* 128 <= s < 255 */
    1835    22699946 :   r = (n0 >> (BITS_IN_LONG - 16)) - s * s;                /* r <= 2*s */
    1836    22699946 :   if (r > (s << 1)) { r -= (s << 1) | 1; s++; }
    1837             : 
    1838             :   /* 8-bit approximation from the high 8-bits of N[0] */
    1839    22699946 :   prec = 8;
    1840    22699946 :   n0 <<= 2 * prec;
    1841    90799784 :   while (2 * prec < BITS_IN_LONG)
    1842             :   { /* invariant: s has prec bits, and r <= 2*s */
    1843    45399892 :     r = (r << prec) + (n0 >> (BITS_IN_LONG - prec));
    1844    45399892 :     n0 <<= prec;
    1845    45399892 :     u = 2 * s;
    1846    45399892 :     q = r / u; u = r - q * u;
    1847    45399892 :     s = (s << prec) + q;
    1848    45399892 :     u = (u << prec) + (n0 >> (BITS_IN_LONG - prec));
    1849    45399892 :     q = q * q;
    1850    45399892 :     r = u - q;
    1851    45399892 :     if (u < q) { s--; r += (s << 1) | 1; }
    1852    45399892 :     n0 <<= prec;
    1853    45399892 :     prec = 2 * prec;
    1854             :   }
    1855    22699946 :   *ps = s;
    1856    22699946 :   *pr = r;
    1857    22699946 : }
    1858             : 
    1859             : /* N[0..1], assume N[0] >= 2^(BIL-2).
    1860             :  * Return 1 if remainder overflows, 0 otherwise */
    1861             : static int
    1862    20344074 : p_sqrtu2(ulong *N, ulong *ps, ulong *pr)
    1863             : {
    1864    20344074 :   ulong cc, qhl, r, s, q, u, n1 = N[1];
    1865             :   LOCAL_OVERFLOW;
    1866             : 
    1867    20344074 :   p_sqrtu1(N, &s, &r); /* r <= 2s */
    1868    20344074 :   qhl = 0; while (r >= s) { qhl++; r -= s; }
    1869             :   /* now r < s < 2^(BIL/2) */
    1870    20344074 :   r = (r << BITS_IN_HALFULONG) | (n1 >> BITS_IN_HALFULONG);
    1871    20344074 :   u = s << 1;
    1872    20344074 :   q = r / u; u = r - q * u;
    1873    20344074 :   q += (qhl & 1) << (BITS_IN_HALFULONG - 1);
    1874    20344074 :   qhl >>= 1;
    1875             :   /* (initial r)<<(BIL/2) + n1>>(BIL/2) = (qhl<<(BIL/2) + q) * 2s + u */
    1876    20344074 :   s = ((s + qhl) << BITS_IN_HALFULONG) + q;
    1877    20344074 :   cc = u >> BITS_IN_HALFULONG;
    1878    20344074 :   r = (u << BITS_IN_HALFULONG) | (n1 & LOWMASK);
    1879    20344074 :   r = subll(r, q * q);
    1880    20344074 :   cc -= overflow + qhl;
    1881             :   /* now subtract 2*q*2^(BIL/2) + 2^BIL if qhl is set */
    1882    20344074 :   if ((long)cc < 0)
    1883             :   {
    1884     5380248 :     if (s) {
    1885     5354091 :       r = addll(r, s);
    1886     5354091 :       cc += overflow;
    1887     5354091 :       s--;
    1888             :     } else {
    1889       26157 :       cc++;
    1890       26157 :       s = ~0UL;
    1891             :     }
    1892     5380248 :     r = addll(r, s);
    1893     5380248 :     cc += overflow;
    1894             :   }
    1895    20344074 :   *ps = s;
    1896    20344074 :   *pr = r; return cc;
    1897             : }
    1898             : 
    1899             : static void
    1900    19935591 : xmpn_zero(GEN x, long n)
    1901             : {
    1902    19935591 :   while (--n >= 0) x[n]=0;
    1903    19935591 : }
    1904             : static void
    1905   207259170 : xmpn_copy(GEN z, GEN x, long n)
    1906             : {
    1907   207259170 :   long k = n;
    1908   207259170 :   while (--k >= 0) z[k] = x[k];
    1909   207259170 : }
    1910             : /* a[0..la-1] * 2^(lb BIL) | b[0..lb-1] */
    1911             : static GEN
    1912    89042892 : catii(GEN a, long la, GEN b, long lb)
    1913             : {
    1914    89042892 :   long l = la + lb + 2;
    1915    89042892 :   GEN z = cgetipos(l);
    1916    89042892 :   xmpn_copy(LIMBS(z), a, la);
    1917    89042892 :   xmpn_copy(LIMBS(z) + la, b, lb);
    1918    89042892 :   return int_normalize(z, 0);
    1919             : }
    1920             : 
    1921             : /* sqrt n[0..1], assume n normalized */
    1922             : static GEN
    1923    20019354 : sqrtispec2(GEN n, GEN *pr)
    1924             : {
    1925             :   ulong s, r;
    1926    20019354 :   int hi = p_sqrtu2((ulong*)n, &s, &r);
    1927    20019354 :   GEN S = utoi(s);
    1928    20019354 :   *pr = hi? uutoi(1,r): utoi(r);
    1929    20019354 :   return S;
    1930             : }
    1931             : 
    1932             : /* sqrt n[0], _dont_ assume n normalized */
    1933             : static GEN
    1934     2355872 : sqrtispec1_sh(GEN n, GEN *pr)
    1935             : {
    1936             :   GEN S;
    1937     2355872 :   ulong r, s, u0 = uel(n,0);
    1938     2355872 :   int sh = bfffo(u0) & ~1UL;
    1939     2355872 :   if (sh) u0 <<= sh;
    1940     2355872 :   p_sqrtu1(&u0, &s, &r);
    1941             :   /* s^2 + r = u0, s < 2^(BIL/2). Rescale back:
    1942             :    * 2^(2k) n = S^2 + R
    1943             :    * so 2^(2k) n = (S - s0)^2 + (2*S*s0 - s0^2 + R), s0 = S mod 2^k. */
    1944     2355872 :   if (sh) {
    1945     2102798 :     int k = sh >> 1;
    1946     2102798 :     ulong s0 = s & ((1L<<k) - 1);
    1947     2102798 :     r += s * (s0<<1);
    1948     2102798 :     s >>= k;
    1949     2102798 :     r >>= sh;
    1950             :   }
    1951     2355872 :   S = utoi(s);
    1952     2355872 :   if (pr) *pr = utoi(r);
    1953     2355872 :   return S;
    1954             : }
    1955             : 
    1956             : /* sqrt n[0..1], _dont_ assume n normalized */
    1957             : static GEN
    1958      324720 : sqrtispec2_sh(GEN n, GEN *pr)
    1959             : {
    1960             :   GEN S;
    1961      324720 :   ulong U[2], r, s, u0 = uel(n,0), u1 = uel(n,1);
    1962      324720 :   int hi, sh = bfffo(u0) & ~1UL;
    1963      324720 :   if (sh) {
    1964      324612 :     u0 = (u0 << sh) | (u1 >> (BITS_IN_LONG-sh));
    1965      324612 :     u1 <<= sh;
    1966             :   }
    1967      324720 :   U[0] = u0;
    1968      324720 :   U[1] = u1; hi = p_sqrtu2(U, &s, &r);
    1969             :   /* s^2 + R = u0|u1. Rescale back:
    1970             :    * 2^(2k) n = S^2 + R
    1971             :    * so 2^(2k) n = (S - s0)^2 + (2*S*s0 - s0^2 + R), s0 = S mod 2^k. */
    1972      324720 :   if (sh) {
    1973      324612 :     int k = sh >> 1;
    1974      324612 :     ulong s0 = s & ((1L<<k) - 1);
    1975             :     LOCAL_HIREMAINDER;
    1976             :     LOCAL_OVERFLOW;
    1977      324612 :     r = addll(r, mulll(s, (s0<<1)));
    1978      324612 :     if (overflow) hiremainder++;
    1979      324612 :     hiremainder += hi; /* + 0 or 1 */
    1980      324612 :     s >>= k;
    1981      324612 :     r = (r>>sh) | (hiremainder << (BITS_IN_LONG-sh));
    1982      324612 :     hi = (hiremainder & (1L<<sh));
    1983             :   }
    1984      324720 :   S = utoi(s);
    1985      324720 :   if (pr) *pr = hi? uutoi(1,r): utoi(r);
    1986      324720 :   return S;
    1987             : }
    1988             : 
    1989             : /* Let N = N[0..2n-1]. Return S (and set R) s.t S^2 + R = N, 0 <= R <= 2S
    1990             :  * Assume N normalized */
    1991             : static GEN
    1992    64540800 : sqrtispec(GEN N, long n, GEN *r)
    1993             : {
    1994             :   GEN S, R, q, z, u;
    1995             :   long l, h;
    1996             : 
    1997    64540800 :   if (n == 1) return sqrtispec2(N, r);
    1998    44521446 :   l = n >> 1;
    1999    44521446 :   h = n - l; /* N = a3(h) | a2(h) | a1(l) | a0(l words) */
    2000    44521446 :   S = sqrtispec(N, h, &R); /* S^2 + R = a3|a2 */
    2001             : 
    2002    44521446 :   z = catii(LIMBS(R), NLIMBS(R), N + 2*h, l); /* = R | a1(l) */
    2003    44521446 :   q = dvmdii(z, shifti(S,1), &u);
    2004    44521446 :   z = catii(LIMBS(u), NLIMBS(u), N + n + h, l); /* = u | a0(l) */
    2005             : 
    2006    44521446 :   S = addshiftw(S, q, l);
    2007    44521446 :   R = subii(z, sqri(q));
    2008    44521446 :   if (signe(R) < 0)
    2009             :   {
    2010     7091619 :     GEN S2 = shifti(S,1);
    2011     7091619 :     R = addis(subiispec(LIMBS(S2),LIMBS(R), NLIMBS(S2),NLIMBS(R)), -1);
    2012     7091619 :     S = addis(S, -1);
    2013             :   }
    2014    44521446 :   *r = R; return S;
    2015             : }
    2016             : 
    2017             : /* Return S (and set R) s.t S^2 + R = N, 0 <= R <= 2S.
    2018             :  * As for dvmdii, R is last on stack and guaranteed to be gen_0 in case the
    2019             :  * remainder is 0. R = NULL is allowed. */
    2020             : GEN
    2021     2764361 : sqrtremi(GEN N, GEN *r)
    2022             : {
    2023             :   pari_sp av;
    2024     2764361 :   GEN S, R, n = N+2;
    2025     2764361 :   long k, l2, ln = NLIMBS(N);
    2026             :   int sh;
    2027             : 
    2028     2764361 :   if (ln <= 2)
    2029             :   {
    2030     2680598 :     if (ln == 2) return sqrtispec2_sh(n, r);
    2031     2355878 :     if (ln == 1) return sqrtispec1_sh(n, r);
    2032           6 :     if (r) *r = gen_0;
    2033           6 :     return gen_0;
    2034             :   }
    2035       83763 :   av = avma;
    2036       83763 :   sh = bfffo(n[0]) >> 1;
    2037       83763 :   l2 = (ln + 1) >> 1;
    2038      167331 :   if (sh || (ln & 1)) { /* normalize n, so that n[0] >= 2^BIL / 4 */
    2039       83568 :     GEN s0, t = new_chunk(ln + 1);
    2040       83568 :     t[ln] = 0;
    2041       83568 :     if (sh)
    2042       83166 :       shift_left(t, n, 0,ln-1, 0, sh << 1);
    2043             :     else
    2044         402 :       xmpn_copy(t, n, ln);
    2045       83568 :     S = sqrtispec(t, l2, &R); /* t normalized, 2 * l2 words */
    2046             :     /* Rescale back:
    2047             :      * 2^(2k) n = S^2 + R, k = sh + (ln & 1)*BIL/2
    2048             :      * so 2^(2k) n = (S - s0)^2 + (2*S*s0 - s0^2 + R), s0 = S mod 2^k. */
    2049       83568 :     k = sh + (ln & 1) * (BITS_IN_LONG/2);
    2050       83568 :     s0 = remi2n(S, k);
    2051       83568 :     R = addii(shifti(R,-1), mulii(s0, S));
    2052       83568 :     R = shifti(R, 1 - (k<<1));
    2053       83568 :     S = shifti(S, -k);
    2054             :   }
    2055             :   else
    2056         195 :     S = sqrtispec(n, l2, &R);
    2057             : 
    2058       83763 :   if (!r) { set_avma((pari_sp)S); return gerepileuptoint(av, S); }
    2059        6708 :   gerepileall(av, 2, &S, &R); *r = R; return S;
    2060             : }
    2061             : 
    2062             : /* compute sqrt(|a|), assuming a != 0 */
    2063             : 
    2064             : #if 1
    2065             : GEN
    2066    19935591 : sqrtr_abs(GEN x)
    2067             : {
    2068    19935591 :   long l = realprec(x) - 2, e = expo(x), er = e>>1;
    2069    19935591 :   GEN b, c, res = cgetr(2 + l);
    2070    19935591 :   res[1] = evalsigne(1) | evalexpo(er);
    2071    19935591 :   if (e&1) {
    2072     9237393 :     b = new_chunk(l << 1);
    2073     9237393 :     xmpn_copy(b, x+2, l);
    2074     9237393 :     xmpn_zero(b + l,l);
    2075     9237393 :     b = sqrtispec(b, l, &c);
    2076     9237393 :     xmpn_copy(res+2, b+2, l);
    2077     9237393 :     if (cmpii(c, b) > 0) roundr_up_ip(res, l+2);
    2078             :   } else {
    2079             :     ulong u;
    2080    10698198 :     b = new_chunk(2 + (l << 1));
    2081    10698198 :     shift_left(b+1, x+2, 0,l-1, 0, BITS_IN_LONG-1);
    2082    10698198 :     b[0] = uel(x,2)>>1;
    2083    10698198 :     xmpn_zero(b + l+1,l+1);
    2084    10698198 :     b = sqrtispec(b, l+1, &c);
    2085    10698198 :     xmpn_copy(res+2, b+2, l);
    2086    10698198 :     u = uel(b,l+2);
    2087    10698198 :     if ( u&HIGHBIT || (u == ~HIGHBIT && cmpii(c,b) > 0))
    2088     5262533 :       roundr_up_ip(res, l+2);
    2089             :   }
    2090    19935591 :   set_avma((pari_sp)res); return res;
    2091             : }
    2092             : 
    2093             : #else /* use t_REAL: currently much slower (quadratic division) */
    2094             : 
    2095             : #ifdef LONG_IS_64BIT
    2096             : /* 64 bits of b = sqrt(a[0] * 2^64 + a[1])  [ up to 1ulp ] */
    2097             : static ulong
    2098             : sqrtu2(ulong *a)
    2099             : {
    2100             :   ulong c, b = dblmantissa( sqrt((double)a[0]) );
    2101             :   LOCAL_HIREMAINDER;
    2102             :   LOCAL_OVERFLOW;
    2103             : 
    2104             :   /* > 32 correct bits, 1 Newton iteration to reach 64 */
    2105             :   if (b <= a[0]) return HIGHBIT | (a[0] >> 1);
    2106             :   hiremainder = a[0]; c = divll(a[1], b);
    2107             :   return (addll(c, b) >> 1) | HIGHBIT;
    2108             : }
    2109             : /* 64 bits of sqrt(a[0] * 2^63) */
    2110             : static ulong
    2111             : sqrtu2_1(ulong *a)
    2112             : {
    2113             :   ulong t[2];
    2114             :   t[0] = (a[0] >> 1);
    2115             :   t[1] = (a[0] << (BITS_IN_LONG-1)) | (a[1] >> 1);
    2116             :   return sqrtu2(t);
    2117             : }
    2118             : #else
    2119             : /* 32 bits of sqrt(a[0] * 2^32) */
    2120             : static ulong
    2121             : sqrtu2(ulong *a)   { return dblmantissa( sqrt((double)a[0]) ); }
    2122             : /* 32 bits of sqrt(a[0] * 2^31) */
    2123             : static ulong
    2124             : sqrtu2_1(ulong *a) { return dblmantissa( sqrt(2. * a[0]) ); }
    2125             : #endif
    2126             : 
    2127             : GEN
    2128             : sqrtr_abs(GEN x)
    2129             : {
    2130             :   long l1, i, l = lg(x), ex = expo(x);
    2131             :   GEN a, t, y = cgetr(l);
    2132             :   pari_sp av, av0 = avma;
    2133             : 
    2134             :   a = rtor(x, l+1);
    2135             :   t = cgetr(l+1);
    2136             :   if (ex & 1) { /* odd exponent */
    2137             :     a[1] = evalsigne(1) | _evalexpo(1);
    2138             :     t[2] = (long)sqrtu2((ulong*)a + 2);
    2139             :   } else { /* even exponent */
    2140             :     a[1] = evalsigne(1) | _evalexpo(0);
    2141             :     t[2] = (long)sqrtu2_1((ulong*)a + 2);
    2142             :   }
    2143             :   t[1] = evalsigne(1) | _evalexpo(0);
    2144             :   for (i = 3; i <= l; i++) t[i] = 0;
    2145             : 
    2146             :   /* |x| = 2^(ex/2) a, t ~ sqrt(a) */
    2147             :   l--; l1 = 1; av = avma;
    2148             :   while (l1 < l) { /* let t := (t + a/t)/2 */
    2149             :     l1 <<= 1; if (l1 > l) l1 = l;
    2150             :     setlg(a, l1 + 2);
    2151             :     setlg(t, l1 + 2);
    2152             :     affrr(addrr(t, divrr(a,t)), t); shiftr_inplace(t, -1);
    2153             :     set_avma(av);
    2154             :   }
    2155             :   affrr(t,y); shiftr_inplace(y, (ex>>1));
    2156             :   set_avma(av0); return y;
    2157             : }
    2158             : 
    2159             : #endif
    2160             : 
    2161             : /*******************************************************************
    2162             :  *                                                                 *
    2163             :  *                           Base Conversion                       *
    2164             :  *                                                                 *
    2165             :  *******************************************************************/
    2166             : 
    2167             : static void
    2168      937299 : convi_dac(GEN x, ulong l, ulong *res)
    2169             : {
    2170      937299 :   pari_sp ltop=avma;
    2171             :   ulong m;
    2172             :   GEN x1,x2;
    2173      937299 :   if (l==1) { *res=itou(x); return; }
    2174      430728 :   m=l>>1;
    2175      430728 :   x1=dvmdii(x,powuu(1000000000UL,m),&x2);
    2176      430728 :   convi_dac(x1,l-m,res+m);
    2177      430728 :   convi_dac(x2,m,res);
    2178      430728 :   set_avma(ltop);
    2179             : }
    2180             : 
    2181             : /* convert integer --> base 10^9 [not memory clean] */
    2182             : ulong *
    2183      270469 : convi(GEN x, long *l)
    2184             : {
    2185      270469 :   long lz, lx = lgefint(x);
    2186             :   ulong *z;
    2187      270469 :   if (lx == 3 && uel(x,2) < 1000000000UL) {
    2188      194626 :     z = (ulong*)new_chunk(1);
    2189      194626 :     *z = x[2];
    2190      194626 :     *l = 1; return z+1;
    2191             :   }
    2192       75843 :   lz = 1 + (long)bit_accuracy_mul(lx, LOG10_2/9);
    2193       75843 :   z = (ulong*)new_chunk(lz);
    2194       75843 :   convi_dac(x,(ulong)lz,z);
    2195       75843 :   while (z[lz-1]==0) lz--;
    2196       75843 :   *l=lz; return z+lz;
    2197             : }
    2198             : 

Generated by: LCOV version 1.13