| Bill Allombert on Tue, 20 Jan 2004 00:55:33 +0100 |
[Date Prev] [Date Next] [Thread Prev] [Thread Next] [Date Index] [Thread Index]
| patch for Montgomery-style reduction in Flxq_pow |
Hello Pari-dev,
Here a new patch that implement Montgomery-style reduction
for Flxq_pow.
This patch ned more tuning, and the inversion function would deserve
to not be quadratic.
Here the benchmark used:
---
install(Flx_invmontgomery,GL);
install(Flxq_pow,GGGL);
install(FpXQ_pow,GGGG);
install(Flx_redmontgomery,GGGL);
install(Flx_rem,GGL);
install(ZX_Flx,GL);
montgo1(P,p)=Flx_ZX(Flx_invmontgomery(ZX_Flx(P,p),p))
montgo2(P,p)=variable(P)/polrecip(P*Mod(1,p))+O(x^poldegree(P))
montgo(P,p)=montgo1(P,p)==montgo2(P,p)
{
print("i\t:\ttmg\t:\tmon\t:\trem");
for(k=1,15,e=2^k;
P=Pol(vector(e,i,random(17)));
Pv=ZX_Flx(P,17);
gettime();
mg=Flx_invmontgomery(Pv,17);
tmg=gettime();
Xv=ZX_Flx(x^(2*e-2),17);
gettime();
for(i=1,2^(16-k),Flx_rem(Xv,Pv,17));
trem=gettime();
for(i=1,2^(16-k),Flx_redmontgomery(Xv,mg,Pv,17));
tmon=gettime();
print(k,"\t:\t",tmg,"\t:\t",tmon,"\t:\t",trem);
)
}
-----
i : tmg : mon : rem
1 : 0 : 50 : 40
2 : 0 : 30 : 30
3 : 0 : 20 : 20
4 : 0 : 20 : 10
5 : 0 : 30 : 20
6 : 0 : 30 : 30
7 : 0 : 50 : 50
8 : 0 : 70 : 90
9 : 0 : 100 : 170
10 : 20 : 150 : 340
11 : 80 : 220 : 670
12 : 310 : 330 : 1320
13 : 1220 : 510 : 2650
14 : 4920 : 760 : 5520
15 : 19750 : 1170 : 11430
(this is with the PARI kernel, GMP would be faster)
Cheers,
Bill.
Index: src/basemath/Flx.c
===================================================================
RCS file: /home/cvs/pari/src/basemath/Flx.c,v
retrieving revision 1.12
diff -u -r1.12 Flx.c
--- src/basemath/Flx.c 17 Jan 2004 18:47:50 -0000 1.12
+++ src/basemath/Flx.c 19 Jan 2004 23:12:08 -0000
@@ -233,6 +233,18 @@
return Flx_renormalize(x,l);
}
+/*UNCLEAN modify y[2] (FpX_Fp_add do the same)*/
+GEN
+Flx_Fl_add(GEN y,ulong x,ulong p)
+{
+ long v=varn(y);
+ if (lg(y)==2)
+ return Fl_Flx(x,v);
+ y[2] = adduumod((ulong)y[2],x,p);
+ if (y[2]==0 && degpol(y) == 0) return Flx_zero(v);
+ return y;
+}
+
GEN
Flx_addspec(GEN x, GEN y, ulong p, long lx, long ly)
{
@@ -261,6 +273,27 @@
}
GEN
+Flx_subspec(GEN x, GEN y, ulong p, long lx, long ly)
+{
+ long i,lz;
+ GEN z;
+
+ if (ly <= lx)
+ {
+ lz = lx+2; z = cgetg(lz, t_VECSMALL)+2;
+ for (i=0; i<ly; i++) z[i] = (long)subuumod((ulong)x[i],(ulong)y[i],p);
+ for ( ; i<lx; i++) z[i] = x[i];
+ }
+ else
+ {
+ lz = ly+2; z = cgetg(lz, t_VECSMALL)+2;
+ for (i=0; i<lx; i++) z[i] = (long)subuumod((ulong)x[i],(ulong)y[i],p);
+ for ( ; i<ly; i++) z[i] = y[i]? (long)(p - y[i]): y[i];
+ }
+ return Flx_renormalize(z-2, lz);
+}
+
+GEN
Flx_sub(GEN x, GEN y, ulong p)
{
long i,lz,lx = lg(x), ly = lg(y);
@@ -282,12 +315,21 @@
}
GEN
+Flx_negspec(GEN x, ulong p, long l)
+{
+ long i;
+ GEN z = cgetg(l+2, t_VECSMALL) + 2;
+ for (i=0; i<l; i++)
+ z[i] = x[i]? (long)p - x[i]: 0;
+ return z-2;
+}
+
+
+GEN
Flx_neg(GEN x, ulong p)
{
- long i, l = lg(x);
- GEN z = cgetg(l, t_VECSMALL);
+ GEN z = Flx_negspec(x+2, p, lgpol(x));
z[1] = x[1];
- for (i=2; i<l; i++) z[i] = x[i]? (long)p - x[i]: 0;
return z;
}
@@ -426,7 +468,7 @@
for ( ; i<nx; i++) z[i] = (long)Flx_mullimb(x+i,y,p,0,ny);
for ( ; i<nz; i++) z[i] = (long)Flx_mullimb(x+i,y,p,i-nx+1,ny);
}
- z -= 2; return Flx_renormalize(z, lz);
+ z -= 2; return z;
}
INLINE GEN
@@ -541,7 +583,7 @@
z[i] = (long)p1;
}
}
- z -= 2; return Flx_renormalize(z,lz);
+ z -= 2; return z;
}
GEN
@@ -780,6 +822,80 @@
}
GEN
+Flx_recipspec(GEN x, long l, long n)
+{
+ long i;
+ GEN z=cgetg(n+2,t_VECSMALL)+2;
+ for(i=0; i<l; i++)
+ z[n-i-1] = x[i];
+ for( ; i<n; i++)
+ z[n-i-1] = 0;
+ return Flx_renormalize(z-2,n+2);
+}
+
+GEN
+Flx_recip(GEN x)
+{
+ GEN z=Flx_recipspec(x+2,lgpol(x),lgpol(x));
+ z[1]=x[1];
+ return z;
+}
+
+/*
+ * x/polrecip(P)+O(x^n)
+ */
+GEN Flx_invmontgomery(GEN T, ulong p)
+{
+ pari_sp ltop=avma;
+ long i, l=lg(T), k;
+ long vt=T[1];
+ GEN r;
+ ulong c=T[l-1], ci=1;
+ if (l<5) return Flx_zero(vt);
+ if (c!=1)
+ {
+ ci=invumod(c,p);
+ T=Flx_Fl_mul(T, ci, p);
+ }
+ r=cgetg(l-1,t_VECSMALL); r[1]=vt;
+ r[2]=0; r[3]=1;
+ for (i=4;i<l-1;i++)
+ {
+ r[i] = 0;
+ for (k=3;k<i;k++)
+ r[i]= (long) subuumod((ulong)r[i],muluumod((ulong)T[l-i+k-1],(ulong)r[k],p),p);
+ }
+ r = Flx_renormalize(r,l-1);
+ if (c!=1) r=Flx_Fl_mul(r,ci,p);
+ return gerepileupto(ltop, r);
+}
+
+/* Compute x mod T where lg(x)<=2*lg(T)-2
+ * and mg is the Montgomery inverse of T.
+ */
+GEN Flx_redmontgomery(GEN x, GEN mg, GEN T, ulong p)
+{
+ pari_sp ltop=avma;
+ GEN z;
+ long l=lgpol(x);
+ long lt=degpol(T); /*We discard the leading term*/
+ long lead=lt-1;
+ long ld=l-lt+1;
+ long lm=min(ld,lgpol(mg));
+ if (l<=lt)
+ return Flx_copy(x);
+ new_chunk(lt);
+ z=Flx_recipspec(x+2+lead,ld,ld); /* z = rec(x)*/
+ z=Flx_mulspec(z+2,mg+2,p,min(ld,lgpol(z)),lm);/* z = rec(x) * mg */
+ z=Flx_recipspec(z+2,lgpol(z),ld); /* z = rec (rec(x) * mg) */
+ z=Flx_mulspec(z+2,T+2,p,min(ld,lgpol(z)),lt); /* z *= pol */
+ avma=ltop;
+ z=Flx_subspec(x+2,z+2,p,lt,min(lt,lgpol(z))); /* z = x - z */
+ z[1]=T[1];
+ return z;
+}
+
+GEN
Flx_deriv(GEN z, ulong p)
{
long i,l = lg(z)-1;
@@ -1098,6 +1214,7 @@
typedef struct {
GEN pol;
+ GEN mg;
ulong p;
} Flxq_muldata;
@@ -1106,13 +1223,13 @@
_u_sqr(void *data, GEN x)
{
Flxq_muldata *D = (Flxq_muldata*)data;
- return Flxq_sqr(x, D->pol, D->p);
+ return Flx_redmontgomery(Flx_sqr(x,D->p),D->mg, D->pol, D->p);
}
static GEN
_u_mul(void *data, GEN x, GEN y)
{
Flxq_muldata *D = (Flxq_muldata*)data;
- return Flxq_mul(x,y, D->pol, D->p);
+ return Flx_redmontgomery(Flx_mul(x,y,D->p),D->mg, D->pol, D->p);
}
/* n-Power of x in Z/pZ[X]/(pol), as t_VECSMALL. */
@@ -1125,6 +1242,8 @@
GEN y;
D.pol = pol;
D.p = p;
+ D.mg = Flx_invmontgomery(pol,p);
+ x=Flx_rem(x, pol, p);
y = leftright_pow(x, n, (void*)&D, &_u_sqr, &_u_mul);
return gerepileupto(av, y);
}