Bill Allombert on Sun, 11 Jan 2004 19:22:15 +0100


[Date Prev] [Date Next] [Thread Prev] [Thread Next] [Date Index] [Thread Index]

Re: Flx_sqr/sqri comparaison


On Sun, Jan 11, 2004 at 01:14:34PM +0100, Bill Allombert wrote:
> Hello PARI-dev,
> I have a plan to reuse GMP fast multiplication in Flx_sqr.

Well, here a prototype patch that implement that. Dues to the
bench above it is also worthwhile to do it with the PARI kernel.
This patch deserve some tuning.

Here the results :
i     : old   : PARI  : GMP   
1     : 120   : 140   : 170   
2     : 80    : 70    : 80    
3     : 80    : 50    : 50    
4     : 90    : 60    : 70    
5     : 130   : 70    : 100   
6     : 190   : 110   : 120   
7     : 300   : 150   : 160   
8     : 460   : 230   : 280   
9     : 700   : 340   : 370   
10    : 1060  : 490   : 560   
11    : 1620  : 750   : 780   
12    : 2450  : 1120  : 960   
13    : 3690  : 1690  : 1000  
14    : 5600  : 2560  : 1140  
15    : 8420  : 3830  : 1220  
16    : 12670 : 5780  : 1540  

I am still puzzled by straight Flx_sqr being so slow.

Cheers,
Bill
Index: src/basemath/Flx.c
===================================================================
RCS file: /home/cvs/pari/src/basemath/Flx.c,v
retrieving revision 1.10
diff -u -r1.10 Flx.c
--- src/basemath/Flx.c	11 Jan 2004 00:21:04 -0000	1.10
+++ src/basemath/Flx.c	11 Jan 2004 17:35:14 -0000
@@ -14,6 +14,8 @@
 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
 
 #include "pari.h"
+GEN muliispec(GEN x, GEN y, long nx, long ny);
+GEN sqrispec(GEN x, long nx);
 
 /* Not so fast arithmetic with polynomials with small coefficients. */
 
@@ -210,15 +212,25 @@
   return x;
 }
 
+/*Do not renormalize. Must not use z[0],z[1]*/
+static GEN
+Flx_red_lg_i(GEN z, long l, ulong p)
+{
+  long i;
+  ulong *y=(ulong *)z;
+  GEN x = cgetg(l, t_VECSMALL);
+  for (i=2; i<l; i++)
+    x[i] = (long) y[i]%p;
+  return x; 
+}
+
 GEN
 Flx_red(GEN z, ulong p)
 {
-  long i,l;
-  GEN x;
-  l = lg(z); x = cgetg(l, t_VECSMALL);
-  for (i=2; i<l; i++)
-    x[i] = z[i]%p;
-  x[1] = z[1]; return Flx_renormalize(x,l);
+  long l = lg(z);
+  GEN x = Flx_red_lg_i(z,l,p); 
+  x[1] = z[1]; 
+  return Flx_renormalize(x,l);
 }
 
 GEN
@@ -356,6 +368,18 @@
   return y;
 }
 
+static long
+maxlenghtcoeffpol (ulong p, long n)
+{
+  pari_sp ltop=avma;
+  long l;
+  GEN z=muluu(p-1,p-1);
+  z=mulis(z,n);
+  l=lgefint(z)-2;
+  avma=ltop;
+  return l;
+}
+
 INLINE ulong
 Flx_mullimb_ok(GEN x, GEN y, ulong p, long a, long b)
 { /* Assume OK_ULONG*/
@@ -367,7 +391,8 @@
       p1 += y[i] * x[-i];
       if (p1 & HIGHBIT) p1 %= p;
     }
-  return p1 % p;
+  if (p1>=p) p1 %= p;
+  return p1;
 }
 
 INLINE ulong
@@ -382,7 +407,7 @@
 }
 
 /* assume nx >= ny > 0 */
-GEN
+static GEN
 Flx_mulspec_basecase(GEN x, GEN y, ulong p, long nx, long ny)
 {
   long i,lz,nz;
@@ -405,6 +430,13 @@
   z -= 2; return Flx_renormalize(z, lz);
 }
 
+static GEN
+Flx_mulspec_mulii(GEN a, GEN b, ulong p, long na, long nb)
+{
+  GEN z=muliispec(a,b,na,nb);
+  return Flx_red_lg_i(z,lgefint(z),p);
+}
+
 /* fast product (Karatsuba) of polynomials a,b. These are not real GENs, a+2,
  * b+2 were sent instead. na, nb = number of terms of a, b.
  * Only c, c0, c1, c2 are genuine GEN.
@@ -422,6 +454,8 @@
   if (!nb) return Flx_zero(0);
 
   av = avma;
+  if (maxlenghtcoeffpol(p,nb)==1)
+    return Flx_shiftip(av,Flx_mulspec_mulii(a,b,p,na,nb), v);
   if (nb < Flx_MUL_LIMIT)
     return Flx_shiftip(av,Flx_mulspec_basecase(a,b,p,na,nb), v);
   i=(na>>1); n0=na-i; na=i;
@@ -464,7 +498,7 @@
  z[1] = x[1]; return z;
 }
 
-GEN
+static GEN
 Flx_sqrspec_basecase(GEN x, ulong p, long nx)
 {
   long i, lz, nz;
@@ -520,6 +554,13 @@
   z[1] = x[1]; return z;
 }
 
+static GEN
+Flx_sqrspec_sqri(GEN a, ulong p, long na)
+{
+  GEN z=sqrispec(a,na);
+  return Flx_red_lg_i(z,lgefint(z),p);
+}
+
 GEN
 Flx_sqrspec(GEN a, ulong p, long na)
 {
@@ -529,6 +570,8 @@
 
   while (na && !a[0]) { a++; na--; v += 2; }
   av = avma;
+  if (maxlenghtcoeffpol(p,na)==1)
+    return Flx_shiftip(av, Flx_sqrspec_sqri(a,p,na), v);
   if (na < Flx_SQR_LIMIT) 
     return Flx_shiftip(av, Flx_sqrspec_basecase(a,p,na), v);
   i=(na>>1); n0=na-i; na=i;
Index: src/kernel/gmp/mp.c
===================================================================
RCS file: /home/cvs/pari/src/kernel/gmp/mp.c,v
retrieving revision 1.31
diff -u -r1.31 mp.c
--- src/kernel/gmp/mp.c	14 Oct 2003 08:57:35 -0000	1.31
+++ src/kernel/gmp/mp.c	11 Jan 2004 17:35:16 -0000
@@ -791,8 +791,7 @@
   z[1] = evalsigne(s) | evalexpo(m+e); return z;
 }
 
-INLINE GEN
-muliispec(GEN x, GEN y, long nx, long ny);
+GEN muliispec(GEN x, GEN y, long nx, long ny);
 
 /* We must have nx>=ny. This lets garbage on the stack.
    This handle squares correctly (mpn_mul is optimized
@@ -800,7 +799,7 @@
 */
 
 INLINE GEN
-quickmulii(GEN x, GEN y, long nx, long ny)
+muliispec_mirror(GEN x, GEN y, long nx, long ny)
 {
   GEN cx=new_chunk(nx),cy;
   GEN z;
@@ -826,11 +825,11 @@
   long i, l, lz2 = (lz+2)>>1, lz3 = lz-lz2;
   GEN lo1, lo2, hi;
 
-  hi = quickmulii(x,y, lz2,lz2);
+  hi = muliispec_mirror(x,y, lz2,lz2);
   i = lz2; while (i<lz && !x[i]) i++;
-  lo1 = quickmulii(y,x+i, lz2,lz-i);
+  lo1 = muliispec_mirror(y,x+i, lz2,lz-i);
   i = lz2; while (i<ly && !y[i]) i++;
-  lo2 = quickmulii(x,y+i, lz2,ly-i);
+  lo2 = muliispec_mirror(x,y+i, lz2,ly-i);
   if (signe(lo1))
   {
     if (ly!=lz) { lo2 = addshiftw(lo1,lo2,1); lz3++; }
@@ -861,7 +860,7 @@
 #ifdef KARAMULR_VARIANT
     GEN hi = karamulrr1(y+2, x+2, lz+flag-2, lz-2); 
 #else
-    GEN hi = quickmulii(y+2, x+2, lz+flag-2, lz-2);
+    GEN hi = muliispec_mirror(y+2, x+2, lz+flag-2, lz-2);
 #endif
     long i, garde = hi[lz];
     if (hi[2] < 0)
@@ -1753,7 +1752,7 @@
 /********************************************************************/
 
 /* nx >= ny = num. of digits of x, y (not GEN, see mulii) */
-INLINE GEN
+GEN
 muliispec(GEN x, GEN y, long nx, long ny)
 {
   GEN zd;
@@ -1775,7 +1774,7 @@
   return zd;
 }
 
-INLINE GEN
+GEN
 sqrispec(GEN x, long nx)
 {
   GEN zd;
Index: src/kernel/none/mp.c
===================================================================
RCS file: /home/cvs/pari/src/kernel/none/mp.c,v
retrieving revision 1.121
diff -u -r1.121 mp.c
--- src/kernel/none/mp.c	10 Jan 2004 16:33:02 -0000	1.121
+++ src/kernel/none/mp.c	11 Jan 2004 17:35:17 -0000
@@ -720,7 +720,7 @@
   z[1] = evalsigne(s) | evalexpo(m+e); return z;
 }
 
-static GEN muliispec(GEN a, GEN b, long na, long nb);
+GEN muliispec(GEN a, GEN b, long na, long nb);
 /*#define KARAMULR_VARIANT*/
 
 #ifdef KARAMULR_VARIANT
@@ -1974,7 +1974,7 @@
 /* Fast product (Karatsuba) of integers. a and b are "special" GENs
  * c,c0,c1,c2 are genuine GENs.
  */
-static GEN
+GEN
 muliispec(GEN a, GEN b, long na, long nb)
 {
   GEN a0,c,c0;
@@ -2100,7 +2100,7 @@
   return z;
 }
 
-static GEN
+GEN
 sqrispec(GEN a, long na)
 {
   GEN a0,c;