Speed up RgX_mul, RgX_sqr, RgX_divrem over Z, Fp, FpXQ

Peter Bruin on Wed, 07 Jun 2017 19:43:54 +0200

[Date Prev] [Date Next] [Thread Prev] [Thread Next] [Date Index] [Thread Index]

Speed up RgX_mul, RgX_sqr, RgX_divrem over Z, Fp, FpXQ

To: pari-dev@pari.math.u-bordeaux1.fr
Subject: Speed up RgX_mul, RgX_sqr, RgX_divrem over Z, Fp, FpXQ
From: Peter Bruin <P.J.Bruin@math.leidenuniv.nl>
Date: Wed, 07 Jun 2017 19:43:32 +0200
Cancel-lock: sha1:jOrn2vdk1HN2LzURR+p/EZzn69o=
Delivery-date: Wed, 07 Jun 2017 19:43:54 +0200
User-agent: Gnus/5.13 (Gnus v5.13) Emacs/24.5 (gnu/linux)

Hello,

The following resultant computation (with output of degree 9480)
currently takes about 10 minutes:

g = x^11+t^13+3*x^7+(2*(t+1)^8+2*t)^7*x^6+(2*t^8+t^6+2*t)^45*x^3+(t+1)^5*t^4*(2*x+x)+t^12+2;
f = Mod(g^2+x^4*(t+2)^4-t^12, 5);
r = polresultant(f, f', 'x);

Claus Fieker pointed out that this only takes about 0.3 seconds in Bill
Hart's implementation of the subresultant algorithm in Julia.

It turns out that the PARI implementation can also be significantly sped
up by dispatching to more specialised functions in RgX_mul, RgX_sqr and
RgX_divrem.  After applying the two attached patches, the time for the
above computation drops to about 0.7 seconds.

A possible further speed-up over small prime finite fields would be to
implement a function Flx_to_mod_inplace that combines the calls to
Flx_to_ZX_inplace and FpX_to_mod.

Thanks,

Peter

>From c83deaa76e23b9951745f17766538b695c4366aa Mon Sep 17 00:00:00 2001
From: Peter Bruin <P.J.Bruin@math.leidenuniv.nl>
Date: Wed, 7 Jun 2017 02:27:22 +0200
Subject: [PATCH 1/2] check for ZX, FpX, FpXQ in RgX_mul and RgX_sqr

---
 src/basemath/RgX.c  | 55 +++++++++++++++++++++++++++++++++++++++++++++++++++--
 src/basemath/gen1.c | 17 +----------------
 src/test/32/ell     |  4 ++--
 3 files changed, 56 insertions(+), 20 deletions(-)

diff --git a/src/basemath/RgX.c b/src/basemath/RgX.c
index 3c02342..4c16aee 100644
--- a/src/basemath/RgX.c
+++ b/src/basemath/RgX.c
@@ -1514,14 +1514,65 @@ RgX_mul_normalized(GEN A, long a, GEN B, long b)
 GEN
 RgX_mul(GEN x, GEN y)
 {
-  GEN z = RgX_mulspec(y+2, x+2, lgpol(y), lgpol(x));
+  GEN p, pol, xx, z;
+  pari_sp av;
+  if (RgX_is_ZX(x) && RgX_is_ZX(y)) return ZX_mul(x, y);
+  av = avma;
+  p = NULL;
+  if (RgX_is_FpX(x, &p) && RgX_is_FpX(y, &p))
+  {
+    if (lgefint(p) == 3)
+    {
+      ulong pp = uel(p, 2);
+      z = Flx_to_ZX_inplace(Flx_mul(RgX_to_Flx(x, pp),
+                                    RgX_to_Flx(y, pp), pp));
+    }
+    else
+      z = FpX_mul(RgX_to_FpX(x, p), RgX_to_FpX(y, p), p);
+    return gerepileupto(av, FpX_to_mod(z, p));
+  }
+  p = NULL; pol = NULL; xx = x;
+  if (ff_poltype(&xx, &p, &pol) && ff_poltype(&y, &p, &pol))
+  {
+    z = ZX_mul(xx, y);
+    if (p) z = FpX_to_mod(z, p);
+    if (pol) z = Kronecker_to_mod(z, pol);
+    return gerepileupto(av, z);
+  }
+  avma = av;
+  z = RgX_mulspec(y+2, x+2, lgpol(y), lgpol(x));
   setvarn(z,varn(x)); return z;
 }
 
 GEN
 RgX_sqr(GEN x)
 {
-  GEN z = RgX_sqrspec(x+2, lgpol(x));
+  GEN p, pol, z;
+  pari_sp av;
+  if (RgX_is_ZX(x)) return ZX_sqr(x);
+  av = avma;
+  p = NULL;
+  if (RgX_is_FpX(x, &p))
+  {
+    if (lgefint(p) == 3)
+    {
+      ulong pp = uel(p, 2);
+      z = Flx_to_ZX_inplace(Flx_sqr(RgX_to_Flx(x, pp), pp));
+    }
+    else
+      z = FpX_sqr(RgX_to_FpX(x, p), p);
+    return gerepileupto(av, FpX_to_mod(z, p));
+  }
+  p = NULL; pol = NULL;
+  if (ff_poltype(&x, &p, &pol))
+  {
+    z = ZX_sqr(x);
+    if (p) z = FpX_to_mod(z, p);
+    if (pol) z = Kronecker_to_mod(z, pol);
+    return gerepileupto(av, z);
+  }
+  avma = av;
+  z = RgX_sqrspec(x+2, lgpol(x));
   setvarn(z,varn(x)); return z;
 }
 
diff --git a/src/basemath/gen1.c b/src/basemath/gen1.c
index c2223a9..ff288b0 100644
--- a/src/basemath/gen1.c
+++ b/src/basemath/gen1.c
@@ -1880,7 +1880,6 @@ gmul(GEN x, GEN y)
         if (varncmp(vx, vy) < 0) return RgX_Rg_mul(x, y);
         else                     return RgX_Rg_mul(y, x);
       }
-      if (RgX_is_ZX(x) && RgX_is_ZX(y)) return ZX_mul(x,y);
       return RgX_mul(x, y);
 
     case t_SER: {
@@ -2245,21 +2244,7 @@ gsqr(GEN x)
 
     case t_FFELT: return FF_sqr(x);
 
-    case t_POL:
-    {
-      GEN a = x, p = NULL, pol = NULL;
-      av = avma;
-      if (RgX_is_ZX(x)) return ZX_sqr(x);
-      if (ff_poltype(&x,&p,&pol))
-      {
-        z = ZX_sqr(x);
-        if (p) z = FpX_to_mod(z,p);
-        if (pol) z = Kronecker_to_mod(z,pol);
-        z = gerepileupto(av, z);
-      }
-      else { avma = av; z = RgX_sqr(a); }
-      return z;
-    }
+    case t_POL: return RgX_sqr(x);
 
     case t_SER:
       lx = lg(x);
diff --git a/src/test/32/ell b/src/test/32/ell
index 8954671..26c5492 100644
--- a/src/test/32/ell
+++ b/src/test/32/ell
@@ -140,8 +140,8 @@ x^6 + 884*z^4*x^4 + (50*z^4 + 809*z^3 + 859*z^2 + 150*z + 50)*x^2 + (5*z^4 +
 *z, z^2 + 5)*x^6 + Mod(-150, z^2 + 5)*x^4 + Mod(60*z, z^2 + 5)*x^2 + Mod(25,
  z^2 + 5)]
 (E)->ellxn(E,3):Mod(4, 1009):[Mod(1, 1009)*x^9 + Mod(961, 1009)*x^7 + Mod(48
-0, 1009)*x^5 + Mod(286, 1009)*x^3 + Mod(286, 1009)*x, 9*x^8 + Mod(144, 1009)
-*x^6 + Mod(480, 1009)*x^4 + Mod(241, 1009)*x^2 + Mod(256, 1009)]
+0, 1009)*x^5 + Mod(286, 1009)*x^3 + Mod(286, 1009)*x, Mod(9, 1009)*x^8 + Mod
+(144, 1009)*x^6 + Mod(480, 1009)*x^4 + Mod(241, 1009)*x^2 + Mod(256, 1009)]
 (E)->ellxn(E,3):z:[Mod(1, 1009)*x^9 + 997*z*x^7 + 30*z^2*x^5 + 36*z^3*x^3 + 
 9*z^4*x, 9*x^8 + 36*z*x^6 + 30*z^2*x^4 + 997*z^3*x^2 + z^4]
 (E)->ellmul(E,[0,0],0):1:[0]
-- 
2.10.2

>From c33013f45b88da612d17ffbb56d6a58d41695a07 Mon Sep 17 00:00:00 2001
From: Peter Bruin <P.J.Bruin@math.leidenuniv.nl>
Date: Wed, 7 Jun 2017 02:32:51 +0200
Subject: [PATCH 2/2] check for FpX in RgX_divrem

---
 src/basemath/RgX.c | 33 ++++++++++++++++++++++++++++-----
 1 file changed, 28 insertions(+), 5 deletions(-)

diff --git a/src/basemath/RgX.c b/src/basemath/RgX.c
index 4c16aee..b989fee 100644
--- a/src/basemath/RgX.c
+++ b/src/basemath/RgX.c
@@ -1636,17 +1636,16 @@ RgX_div_by_X_x(GEN a, GEN x, GEN *r)
   return z;
 }
 /* Polynomial division x / y:
- *   if z = ONLY_REM  return remainder, otherwise return quotient
- *   if z != NULL set *z to remainder
- *   *z is the last object on stack (and thus can be disposed of with cgiv
- *   instead of gerepile) */
+ *   if pr = ONLY_REM return remainder, otherwise return quotient
+ *   if pr = ONLY_DIVIDES return quotient if division is exact, else NULL
+ *   if pr != NULL set *pr to remainder, as the last object on stack */
 /* assume, typ(x) = typ(y) = t_POL, same variable */
 GEN
 RgX_divrem(GEN x, GEN y, GEN *pr)
 {
   pari_sp avy, av, av1;
   long dx,dy,dz,i,j,sx,lr;
-  GEN z,p1,p2,rem,y_lead,mod;
+  GEN z,p1,p2,rem,y_lead,mod,p;
   GEN (*f)(GEN,GEN);
 
   if (!signe(y)) pari_err_INV("RgX_divrem",y);
@@ -1682,6 +1681,30 @@ RgX_divrem(GEN x, GEN y, GEN *pr)
 
   /* x,y in R[X], y non constant */
   av = avma;
+  p = NULL;
+  if (RgX_is_FpX(x, &p) && RgX_is_FpX(y, &p) && p)
+  {
+    if (lgefint(p) == 3)
+    {
+      ulong pp = uel(p, 2);
+      z = Flx_divrem(RgX_to_Flx(x, pp), RgX_to_Flx(y, pp), pp, pr);
+      if (!z) { avma = av; return NULL; }
+      z = Flx_to_ZX_inplace(z);
+      if (pr && pr != ONLY_REM && pr != ONLY_DIVIDES)
+	*pr = Flx_to_ZX_inplace(*pr);
+    }
+    else
+    {
+      z = FpX_divrem(RgX_to_FpX(x, p), RgX_to_FpX(y, p), p, pr);
+      if (!z) { avma = av; return NULL; }
+    }
+    z = FpX_to_mod(z, p);
+    if (!pr || pr == ONLY_REM || pr == ONLY_DIVIDES)
+      return gerepileupto(av, z);
+    *pr = FpX_to_mod(*pr, p);
+    gerepileall(av, 2, pr, &z);
+    return z;
+  }
   switch(typ(y_lead))
   {
     case t_REAL:
-- 
2.10.2

Follow-Ups:
- Re: Speed up RgX_mul, RgX_sqr, RgX_divrem over Z, Fp, FpXQ
  - From: Bill Allombert <Bill.Allombert@math.u-bordeaux.fr>

Next by Date: Re: Speed up RgX_mul, RgX_sqr, RgX_divrem over Z, Fp, FpXQ
Next by thread: Re: Speed up RgX_mul, RgX_sqr, RgX_divrem over Z, Fp, FpXQ
Index(es):
- Date
- Thread