Peter Bruin on Mon, 13 Mar 2017 08:24:45 +0100


[Date Prev] [Date Next] [Thread Prev] [Thread Next] [Date Index] [Thread Index]

Re: Linear algebra via CUP decomposition and reduction to matrix multiplication


Hello Bill,

> On Thu, Mar 09, 2017 at 11:02:43AM +0100, Peter Bruin wrote:
>> Hello,
>> 
>> Here is a set of patches that speeds up many linear algebra computations
>> over fields of small characteristic (Flm, FlxqM) by using the CUP matrix
>> decomposition [1].  This decomposes any m × n matrix of rank r over a
>> field as C*U*P with C in column echelon form of size m × r, U upper
>> triangular of size r × n and P a permutation matrix of size n × n.
>> 
>> There is one small user-visible change: the various gauss_pivot and
>> indexrank functions used to return the list of pivot columns in the row
>> echelon form, and for each pivot column the first possible pivot row.
>> The new algorithm instead returns the list of pivot rows in the column
>> echelon from, and for each pivot row some choice of pivot column.  This
>> explains the changed output in the "nf" and "rnfkummer" tests in the
>> last patch.  I checked that the new output is mathematically equivalent
>> to the old output.
>
> Applied, thanks!
>
> I took the opportunuity to add a GP function permsign.

Thanks!  Here are two follow-up patches to simplify a few things:

- The CUP_gauss functions were unnecessarily moving certain matrix
  entries up, thereby destroying known zeroes which then had to be put
  back by other functions; the first patch avoids this unnecessary work.

- The matrix U and the permutation P were always computed, even when
  only the echelon form was needed; the second patch allows U = P = NULL
  in the CUP_gauss functions.  Also, this patch removes a useless
  gerepile in Flm_CUP_gauss and moves the gerepile in FlxqM_CUP_gauss
  into the outermost loop (potentially uses less memory with the same
  time complexity).

Peter

>From 89221d47e0365cdaa7587323e7646f4d68b71044 Mon Sep 17 00:00:00 2001
From: Peter Bruin <P.J.Bruin@math.leidenuniv.nl>
Date: Fri, 10 Mar 2017 15:55:38 +0100
Subject: [PATCH 1/2] simplify CUP decomposition: keep known zeroes in
 non-pivot rows

---
 src/basemath/alglin1.c | 72 +++++++-------------------------------------------
 1 file changed, 10 insertions(+), 62 deletions(-)

diff --git a/src/basemath/alglin1.c b/src/basemath/alglin1.c
index 4f80b73..5e00a12 100644
--- a/src/basemath/alglin1.c
+++ b/src/basemath/alglin1.c
@@ -779,23 +779,19 @@ Flm_CUP_gauss(GEN A, GEN *R, GEN *C, GEN *U, GEN *P, ulong p) {
       swap(gel(A, j), gel(A, pc));
       lswap((*P)[j], (*P)[pc]);
     }
-    if (pr != j) {
-      for (k = j; k <= n; k++)
-        ucoeff(A, j, k) = ucoeff(A, pr, k);
-    }
-    u = Fl_inv(ucoeff(A, j, j), p);
+    u = Fl_inv(ucoeff(A, pr, j), p);
     for (i = pr + 1; i <= m; i++) {
       v = Fl_mul(ucoeff(A, i, j), u, p);
       ucoeff(A, i, j) = v;
       for (k = j + 1; k <= n; k++) {
         ucoeff(A, i, k) = Fl_sub(ucoeff(A, i, k),
-                                 Fl_mul(ucoeff(A, j, k), v, p), p);
+                                 Fl_mul(ucoeff(A, pr, k), v, p), p);
       }
     }
   }
   setlg(*R, j);
   *C = vecslice(A, 1, j - 1);
-  *U = rowslice(A, 1, j - 1);
+  *U = rowpermute(A, *R);
   if (gc_needed(av, 1))
     gerepileall(av, 2, C, U);
   return j - 1;
@@ -866,7 +862,7 @@ Flm_echelon_gauss(GEN A, GEN *R, GEN *C, ulong p) {
 /* column echelon form */
 static ulong
 Flm_echelon(GEN A, GEN *R, GEN *C, ulong p) {
-  long i, j, j1, j2, m = nbrows(A), n = lg(A) - 1, n1, r, r1, r2;
+  long j, j1, j2, m = nbrows(A), n = lg(A) - 1, n1, r, r1, r2;
   GEN A1, A2, R1, R1c, C1, R2, C2;
   GEN A12, A22, B2, C11, C21, M12;
   pari_sp av = avma;
@@ -888,11 +884,6 @@ Flm_echelon(GEN A, GEN *R, GEN *C, ulong p) {
   R1c = indexcompl(R1, m);
   C11 = rowpermute(C1, R1);
   C21 = rowpermute(C1, R1c);
-  if (R1[r1] != r1) {
-    for (j = 1; j <= r1; j++)
-      for (i = 1; i <= R1[j] - j; i++)
-        ucoeff(C21, i, j) = 0;
-  }
   A12 = rowpermute(A2, R1);
   A22 = rowpermute(A2, R1c);
   M12 = Flm_rsolve_lower_unit(C11, A12, p);
@@ -1144,23 +1135,19 @@ FlxqM_CUP_gauss(GEN A, GEN *R, GEN *C, GEN *U, GEN *P, GEN T, ulong p) {
       swap(gel(A, j), gel(A, pc));
       lswap((*P)[j], (*P)[pc]);
     }
-    if (pr != j) {
-      for (k = j; k <= n; k++)
-        gcoeff(A, j, k) = gcoeff(A, pr, k);
-    }
-    u = Flxq_inv(gcoeff(A, j, j), T, p);
+    u = Flxq_inv(gcoeff(A, pr, j), T, p);
     for (i = pr + 1; i <= m; i++) {
       v = Flxq_mul(gcoeff(A, i, j), u, T, p);
       gcoeff(A, i, j) = v;
       for (k = j + 1; k <= n; k++) {
         gcoeff(A, i, k) = Flx_sub(gcoeff(A, i, k),
-                                  Flx_mul(gcoeff(A, j, k), v, p), p);
+                                  Flx_mul(gcoeff(A, pr, k), v, p), p);
       }
     }
   }
   setlg(*R, j);
   *C = vecslice(A, 1, j - 1);
-  *U = rowslice(A, 1, j - 1);
+  *U = rowpermute(A, *R);
   if (gc_needed(av, 1))
     gerepileall(av, 2, C, U);
   return j - 1;
@@ -1232,7 +1219,7 @@ FlxqM_echelon_gauss(GEN A, GEN *R, GEN *C, GEN T, ulong p) {
 /* column echelon form */
 static ulong
 FlxqM_echelon(GEN A, GEN *R, GEN *C, GEN T, ulong p) {
-  long i, j, j1, j2, m = nbrows(A), n = lg(A) - 1, n1, r, r1, r2;
+  long j, j1, j2, m = nbrows(A), n = lg(A) - 1, n1, r, r1, r2;
   GEN A1, A2, R1, R1c, C1, R2, C2;
   GEN A12, A22, B2, C11, C21, M12;
   pari_sp av = avma;
@@ -1254,12 +1241,6 @@ FlxqM_echelon(GEN A, GEN *R, GEN *C, GEN T, ulong p) {
   R1c = indexcompl(R1, m);
   C11 = rowpermute(C1, R1);
   C21 = rowpermute(C1, R1c);
-  if (R1[r1] != r1) {
-    GEN zero = zero_Flx(get_Flx_var(T));
-    for (j = 1; j <= r1; j++)
-      for (i = 1; i <= R1[j] - j; i++)
-        gcoeff(C21, i, j) = zero;
-  }
   A12 = rowpermute(A2, R1);
   A22 = rowpermute(A2, R1c);
   M12 = FlxqM_rsolve_lower_unit(C11, A12, T, p);
@@ -1584,12 +1565,6 @@ Flm_ker_echelon(GEN x, ulong p, long pivots) {
   Rc = indexcompl(R, n);
   C1 = rowpermute(C, R);
   C2 = rowpermute(C, Rc);
-  if (R[r] != r) {
-    long i, j;
-    for (j = 1; j <= r; j++)
-      for (i = 1; i <= R[j] - j; i++)
-        ucoeff(C2, i, j) = 0;
-  }
   S = Flm_lsolve_lower_unit(C1, C2, p);
   K = vecpermute(shallowconcat(Flm_neg(S, p), matid_Flm(n - r)),
                  perm_inv(vecsmall_concat(R, Rc)));
@@ -1603,15 +1578,13 @@ static GEN
 Flm_deplin_echelon(GEN x, ulong p) {
   pari_sp av = avma;
   GEN R, Rc, C, C1, C2, s, v;
-  long i, j, n = lg(x) - 1, r;
+  long i, n = lg(x) - 1, r;
   r = Flm_echelon(Flm_transpose(x), &R, &C, p);
   if (r == n) { avma = av; return NULL; }
   Rc = indexcompl(R, n);
   i = Rc[1];
   C1 = rowpermute(C, R);
   C2 = rowslice(C, i, i);
-  for (j = i; j <= r; j++)
-    ucoeff(C2, 1, j) = 0UL;
   s = Flm_row(Flm_lsolve_lower_unit(C1, C2, p), 1);
   v = vecpermute(vecsmall_concat(Flv_neg(s, p), vecsmall_ei(n - r, 1)),
                  perm_inv(vecsmall_concat(R, Rc)));
@@ -2121,13 +2094,6 @@ FlxqM_invimage_CUP(GEN A, GEN B, GEN T, ulong p) {
   Rc = indexcompl(R, nbrows(B));
   C1 = rowpermute(C, R);
   C2 = rowpermute(C, Rc);
-  if (R[r] != r) {
-    long i, j;
-    GEN zero = zero_Flx(sv);
-    for (j = 1; j <= r; j++)
-      for (i = 1; i <= R[j] - j; i++)
-        gcoeff(C2, i, j) = zero;
-  }
   B1 = rowpermute(B, R);
   B2 = rowpermute(B, Rc);
   Z = FlxqM_rsolve_lower_unit(C1, B1, T, p);
@@ -2392,13 +2358,6 @@ FlxqM_ker_echelon(GEN x, GEN T, ulong p) {
   Rc = indexcompl(R, n);
   C1 = rowpermute(C, R);
   C2 = rowpermute(C, Rc);
-  if (R[r] != r) {
-    long i, j;
-    GEN zero = zero_Flx(get_Flx_var(T));
-    for (j = 1; j <= r; j++)
-      for (i = 1; i <= R[j] - j; i++)
-        gcoeff(C2, i, j) = zero;
-  }
   S = FlxqM_lsolve_lower_unit(C1, C2, T, p);
   K = vecpermute(shallowconcat(FlxM_neg(S, p), matid_FlxqM(n - r, T, p)),
                  perm_inv(vecsmall_concat(R, Rc)));
@@ -2417,18 +2376,13 @@ static GEN
 FlxqM_deplin_echelon(GEN x, GEN T, ulong p) {
   pari_sp av = avma;
   GEN R, Rc, C, C1, C2, s, v;
-  long i, j, n = lg(x) - 1, r, sv = get_Flx_var(T);
+  long i, n = lg(x) - 1, r, sv = get_Flx_var(T);
   r = FlxqM_echelon(shallowtrans(x), &R, &C, T, p);
   if (r == n) { avma = av; return NULL; }
   Rc = indexcompl(R, n);
   i = Rc[1];
   C1 = rowpermute(C, R);
   C2 = rowslice(C, i, i);
-  if (i <= r) {
-    GEN zero = zero_Flx(sv);
-    for (j = i; j <= r; j++)
-      gcoeff(C2, 1, j) = zero;
-  }
   s = row(FlxqM_lsolve_lower_unit(C1, C2, T, p), 1);
   settyp(s, t_COL);
   v = vecpermute(shallowconcat(FlxC_neg(s, p), col_ei_FlxC(n - r, 1, sv)),
@@ -4403,12 +4357,6 @@ Flm_invimage_CUP(GEN A, GEN B, ulong p) {
   Rc = indexcompl(R, nbrows(B));
   C1 = rowpermute(C, R);
   C2 = rowpermute(C, Rc);
-  if (R[r] != r) {
-    long i, j;
-    for (j = 1; j <= r; j++)
-      for (i = 1; i <= R[j] - j; i++)
-        ucoeff(C2, i, j) = 0UL;
-  }
   B1 = rowpermute(B, R);
   B2 = rowpermute(B, Rc);
   Z = Flm_rsolve_lower_unit(C1, B1, p);
-- 
2.7.3

>From 5ad7e2199d70b4b66b5650b26421884868ccea81 Mon Sep 17 00:00:00 2001
From: Peter Bruin <P.J.Bruin@math.leidenuniv.nl>
Date: Sat, 11 Mar 2017 12:13:52 +0100
Subject: [PATCH 2/2] allow U = P = NULL in CUP decomposition, and improve
 garbage collection

---
 src/basemath/alglin1.c | 26 ++++++++++----------------
 1 file changed, 10 insertions(+), 16 deletions(-)

diff --git a/src/basemath/alglin1.c b/src/basemath/alglin1.c
index 5e00a12..8d747b8 100644
--- a/src/basemath/alglin1.c
+++ b/src/basemath/alglin1.c
@@ -757,11 +757,9 @@ Flm_lsolve_lower_unit(GEN L, GEN A, ulong p) {
 static long
 Flm_CUP_gauss(GEN A, GEN *R, GEN *C, GEN *U, GEN *P, ulong p) {
   long i, j, k, m = nbrows(A), n = lg(A) - 1, pr, pc, u, v;
-  pari_sp av;
 
-  *P = identity_perm(n);
+  if (P) *P = identity_perm(n);
   *R = cgetg(m + 1, t_VECSMALL);
-  av = avma;
   for (j = 1, pr = 0; j <= n; j++) {
     for (pr++, pc = 0; pr <= m; pr++) {
       for (k = j; k <= n; k++) {
@@ -777,7 +775,7 @@ Flm_CUP_gauss(GEN A, GEN *R, GEN *C, GEN *U, GEN *P, ulong p) {
     (*R)[j] = pr;
     if (pc != j) {
       swap(gel(A, j), gel(A, pc));
-      lswap((*P)[j], (*P)[pc]);
+      if (P) lswap((*P)[j], (*P)[pc]);
     }
     u = Fl_inv(ucoeff(A, pr, j), p);
     for (i = pr + 1; i <= m; i++) {
@@ -791,9 +789,7 @@ Flm_CUP_gauss(GEN A, GEN *R, GEN *C, GEN *U, GEN *P, ulong p) {
   }
   setlg(*R, j);
   *C = vecslice(A, 1, j - 1);
-  *U = rowpermute(A, *R);
-  if (gc_needed(av, 1))
-    gerepileall(av, 2, C, U);
+  if (U) *U = rowpermute(A, *R);
   return j - 1;
 }
 
@@ -855,8 +851,7 @@ Flm_CUP(GEN A, GEN *R, GEN *C, GEN *U, GEN *P, ulong p) {
 
 static ulong
 Flm_echelon_gauss(GEN A, GEN *R, GEN *C, ulong p) {
-  GEN U, P;
-  return Flm_CUP_gauss(A, R, C, &U, &P, p);
+  return Flm_CUP_gauss(A, R, C, NULL, NULL, p);
 }
 
 /* column echelon form */
@@ -1114,7 +1109,7 @@ FlxqM_CUP_gauss(GEN A, GEN *R, GEN *C, GEN *U, GEN *P, GEN T, ulong p) {
   pari_sp av;
   GEN u, v;
 
-  *P = identity_perm(n);
+  if (P) *P = identity_perm(n);
   *R = cgetg(m + 1, t_VECSMALL);
   av = avma;
   for (j = 1, pr = 0; j <= n; j++) {
@@ -1133,7 +1128,7 @@ FlxqM_CUP_gauss(GEN A, GEN *R, GEN *C, GEN *U, GEN *P, GEN T, ulong p) {
     (*R)[j] = pr;
     if (pc != j) {
       swap(gel(A, j), gel(A, pc));
-      lswap((*P)[j], (*P)[pc]);
+      if (P) lswap((*P)[j], (*P)[pc]);
     }
     u = Flxq_inv(gcoeff(A, pr, j), T, p);
     for (i = pr + 1; i <= m; i++) {
@@ -1144,12 +1139,12 @@ FlxqM_CUP_gauss(GEN A, GEN *R, GEN *C, GEN *U, GEN *P, GEN T, ulong p) {
                                   Flx_mul(gcoeff(A, pr, k), v, p), p);
       }
     }
+    if (gc_needed(av, 2))
+      A = gerepilecopy(av, A);
   }
   setlg(*R, j);
   *C = vecslice(A, 1, j - 1);
-  *U = rowpermute(A, *R);
-  if (gc_needed(av, 1))
-    gerepileall(av, 2, C, U);
+  if (U) *U = rowpermute(A, *R);
   return j - 1;
 }
 
@@ -1212,8 +1207,7 @@ FlxqM_CUP(GEN A, GEN *R, GEN *C, GEN *U, GEN *P, GEN T, ulong p) {
 
 static ulong
 FlxqM_echelon_gauss(GEN A, GEN *R, GEN *C, GEN T, ulong p) {
-  GEN U, P;
-  return FlxqM_CUP_gauss(A, R, C, &U, &P, T, p);
+  return FlxqM_CUP_gauss(A, R, C, NULL, NULL, T, p);
 }
 
 /* column echelon form */
-- 
2.7.3