Bill Allombert on Sun, 11 Jan 2004 19:22:15 +0100 |
[Date Prev] [Date Next] [Thread Prev] [Thread Next] [Date Index] [Thread Index]
Re: Flx_sqr/sqri comparaison |
On Sun, Jan 11, 2004 at 01:14:34PM +0100, Bill Allombert wrote: > Hello PARI-dev, > I have a plan to reuse GMP fast multiplication in Flx_sqr. Well, here a prototype patch that implement that. Dues to the bench above it is also worthwhile to do it with the PARI kernel. This patch deserve some tuning. Here the results : i : old : PARI : GMP 1 : 120 : 140 : 170 2 : 80 : 70 : 80 3 : 80 : 50 : 50 4 : 90 : 60 : 70 5 : 130 : 70 : 100 6 : 190 : 110 : 120 7 : 300 : 150 : 160 8 : 460 : 230 : 280 9 : 700 : 340 : 370 10 : 1060 : 490 : 560 11 : 1620 : 750 : 780 12 : 2450 : 1120 : 960 13 : 3690 : 1690 : 1000 14 : 5600 : 2560 : 1140 15 : 8420 : 3830 : 1220 16 : 12670 : 5780 : 1540 I am still puzzled by straight Flx_sqr being so slow. Cheers, Bill
Index: src/basemath/Flx.c =================================================================== RCS file: /home/cvs/pari/src/basemath/Flx.c,v retrieving revision 1.10 diff -u -r1.10 Flx.c --- src/basemath/Flx.c 11 Jan 2004 00:21:04 -0000 1.10 +++ src/basemath/Flx.c 11 Jan 2004 17:35:14 -0000 @@ -14,6 +14,8 @@ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ #include "pari.h" +GEN muliispec(GEN x, GEN y, long nx, long ny); +GEN sqrispec(GEN x, long nx); /* Not so fast arithmetic with polynomials with small coefficients. */ @@ -210,15 +212,25 @@ return x; } +/*Do not renormalize. Must not use z[0],z[1]*/ +static GEN +Flx_red_lg_i(GEN z, long l, ulong p) +{ + long i; + ulong *y=(ulong *)z; + GEN x = cgetg(l, t_VECSMALL); + for (i=2; i<l; i++) + x[i] = (long) y[i]%p; + return x; +} + GEN Flx_red(GEN z, ulong p) { - long i,l; - GEN x; - l = lg(z); x = cgetg(l, t_VECSMALL); - for (i=2; i<l; i++) - x[i] = z[i]%p; - x[1] = z[1]; return Flx_renormalize(x,l); + long l = lg(z); + GEN x = Flx_red_lg_i(z,l,p); + x[1] = z[1]; + return Flx_renormalize(x,l); } GEN @@ -356,6 +368,18 @@ return y; } +static long +maxlenghtcoeffpol (ulong p, long n) +{ + pari_sp ltop=avma; + long l; + GEN z=muluu(p-1,p-1); + z=mulis(z,n); + l=lgefint(z)-2; + avma=ltop; + return l; +} + INLINE ulong Flx_mullimb_ok(GEN x, GEN y, ulong p, long a, long b) { /* Assume OK_ULONG*/ @@ -367,7 +391,8 @@ p1 += y[i] * x[-i]; if (p1 & HIGHBIT) p1 %= p; } - return p1 % p; + if (p1>=p) p1 %= p; + return p1; } INLINE ulong @@ -382,7 +407,7 @@ } /* assume nx >= ny > 0 */ -GEN +static GEN Flx_mulspec_basecase(GEN x, GEN y, ulong p, long nx, long ny) { long i,lz,nz; @@ -405,6 +430,13 @@ z -= 2; return Flx_renormalize(z, lz); } +static GEN +Flx_mulspec_mulii(GEN a, GEN b, ulong p, long na, long nb) +{ + GEN z=muliispec(a,b,na,nb); + return Flx_red_lg_i(z,lgefint(z),p); +} + /* fast product (Karatsuba) of polynomials a,b. These are not real GENs, a+2, * b+2 were sent instead. na, nb = number of terms of a, b. * Only c, c0, c1, c2 are genuine GEN. @@ -422,6 +454,8 @@ if (!nb) return Flx_zero(0); av = avma; + if (maxlenghtcoeffpol(p,nb)==1) + return Flx_shiftip(av,Flx_mulspec_mulii(a,b,p,na,nb), v); if (nb < Flx_MUL_LIMIT) return Flx_shiftip(av,Flx_mulspec_basecase(a,b,p,na,nb), v); i=(na>>1); n0=na-i; na=i; @@ -464,7 +498,7 @@ z[1] = x[1]; return z; } -GEN +static GEN Flx_sqrspec_basecase(GEN x, ulong p, long nx) { long i, lz, nz; @@ -520,6 +554,13 @@ z[1] = x[1]; return z; } +static GEN +Flx_sqrspec_sqri(GEN a, ulong p, long na) +{ + GEN z=sqrispec(a,na); + return Flx_red_lg_i(z,lgefint(z),p); +} + GEN Flx_sqrspec(GEN a, ulong p, long na) { @@ -529,6 +570,8 @@ while (na && !a[0]) { a++; na--; v += 2; } av = avma; + if (maxlenghtcoeffpol(p,na)==1) + return Flx_shiftip(av, Flx_sqrspec_sqri(a,p,na), v); if (na < Flx_SQR_LIMIT) return Flx_shiftip(av, Flx_sqrspec_basecase(a,p,na), v); i=(na>>1); n0=na-i; na=i; Index: src/kernel/gmp/mp.c =================================================================== RCS file: /home/cvs/pari/src/kernel/gmp/mp.c,v retrieving revision 1.31 diff -u -r1.31 mp.c --- src/kernel/gmp/mp.c 14 Oct 2003 08:57:35 -0000 1.31 +++ src/kernel/gmp/mp.c 11 Jan 2004 17:35:16 -0000 @@ -791,8 +791,7 @@ z[1] = evalsigne(s) | evalexpo(m+e); return z; } -INLINE GEN -muliispec(GEN x, GEN y, long nx, long ny); +GEN muliispec(GEN x, GEN y, long nx, long ny); /* We must have nx>=ny. This lets garbage on the stack. This handle squares correctly (mpn_mul is optimized @@ -800,7 +799,7 @@ */ INLINE GEN -quickmulii(GEN x, GEN y, long nx, long ny) +muliispec_mirror(GEN x, GEN y, long nx, long ny) { GEN cx=new_chunk(nx),cy; GEN z; @@ -826,11 +825,11 @@ long i, l, lz2 = (lz+2)>>1, lz3 = lz-lz2; GEN lo1, lo2, hi; - hi = quickmulii(x,y, lz2,lz2); + hi = muliispec_mirror(x,y, lz2,lz2); i = lz2; while (i<lz && !x[i]) i++; - lo1 = quickmulii(y,x+i, lz2,lz-i); + lo1 = muliispec_mirror(y,x+i, lz2,lz-i); i = lz2; while (i<ly && !y[i]) i++; - lo2 = quickmulii(x,y+i, lz2,ly-i); + lo2 = muliispec_mirror(x,y+i, lz2,ly-i); if (signe(lo1)) { if (ly!=lz) { lo2 = addshiftw(lo1,lo2,1); lz3++; } @@ -861,7 +860,7 @@ #ifdef KARAMULR_VARIANT GEN hi = karamulrr1(y+2, x+2, lz+flag-2, lz-2); #else - GEN hi = quickmulii(y+2, x+2, lz+flag-2, lz-2); + GEN hi = muliispec_mirror(y+2, x+2, lz+flag-2, lz-2); #endif long i, garde = hi[lz]; if (hi[2] < 0) @@ -1753,7 +1752,7 @@ /********************************************************************/ /* nx >= ny = num. of digits of x, y (not GEN, see mulii) */ -INLINE GEN +GEN muliispec(GEN x, GEN y, long nx, long ny) { GEN zd; @@ -1775,7 +1774,7 @@ return zd; } -INLINE GEN +GEN sqrispec(GEN x, long nx) { GEN zd; Index: src/kernel/none/mp.c =================================================================== RCS file: /home/cvs/pari/src/kernel/none/mp.c,v retrieving revision 1.121 diff -u -r1.121 mp.c --- src/kernel/none/mp.c 10 Jan 2004 16:33:02 -0000 1.121 +++ src/kernel/none/mp.c 11 Jan 2004 17:35:17 -0000 @@ -720,7 +720,7 @@ z[1] = evalsigne(s) | evalexpo(m+e); return z; } -static GEN muliispec(GEN a, GEN b, long na, long nb); +GEN muliispec(GEN a, GEN b, long na, long nb); /*#define KARAMULR_VARIANT*/ #ifdef KARAMULR_VARIANT @@ -1974,7 +1974,7 @@ /* Fast product (Karatsuba) of integers. a and b are "special" GENs * c,c0,c1,c2 are genuine GENs. */ -static GEN +GEN muliispec(GEN a, GEN b, long na, long nb) { GEN a0,c,c0; @@ -2100,7 +2100,7 @@ return z; } -static GEN +GEN sqrispec(GEN a, long na) { GEN a0,c;