| Peter Bruin on Mon, 13 Mar 2017 08:24:45 +0100 |
[Date Prev] [Date Next] [Thread Prev] [Thread Next] [Date Index] [Thread Index]
| Re: Linear algebra via CUP decomposition and reduction to matrix multiplication |
Hello Bill, > On Thu, Mar 09, 2017 at 11:02:43AM +0100, Peter Bruin wrote: >> Hello, >> >> Here is a set of patches that speeds up many linear algebra computations >> over fields of small characteristic (Flm, FlxqM) by using the CUP matrix >> decomposition [1]. This decomposes any m × n matrix of rank r over a >> field as C*U*P with C in column echelon form of size m × r, U upper >> triangular of size r × n and P a permutation matrix of size n × n. >> >> There is one small user-visible change: the various gauss_pivot and >> indexrank functions used to return the list of pivot columns in the row >> echelon form, and for each pivot column the first possible pivot row. >> The new algorithm instead returns the list of pivot rows in the column >> echelon from, and for each pivot row some choice of pivot column. This >> explains the changed output in the "nf" and "rnfkummer" tests in the >> last patch. I checked that the new output is mathematically equivalent >> to the old output. > > Applied, thanks! > > I took the opportunuity to add a GP function permsign. Thanks! Here are two follow-up patches to simplify a few things: - The CUP_gauss functions were unnecessarily moving certain matrix entries up, thereby destroying known zeroes which then had to be put back by other functions; the first patch avoids this unnecessary work. - The matrix U and the permutation P were always computed, even when only the echelon form was needed; the second patch allows U = P = NULL in the CUP_gauss functions. Also, this patch removes a useless gerepile in Flm_CUP_gauss and moves the gerepile in FlxqM_CUP_gauss into the outermost loop (potentially uses less memory with the same time complexity). Peter
>From 89221d47e0365cdaa7587323e7646f4d68b71044 Mon Sep 17 00:00:00 2001
From: Peter Bruin <P.J.Bruin@math.leidenuniv.nl>
Date: Fri, 10 Mar 2017 15:55:38 +0100
Subject: [PATCH 1/2] simplify CUP decomposition: keep known zeroes in
non-pivot rows
---
src/basemath/alglin1.c | 72 +++++++-------------------------------------------
1 file changed, 10 insertions(+), 62 deletions(-)
diff --git a/src/basemath/alglin1.c b/src/basemath/alglin1.c
index 4f80b73..5e00a12 100644
--- a/src/basemath/alglin1.c
+++ b/src/basemath/alglin1.c
@@ -779,23 +779,19 @@ Flm_CUP_gauss(GEN A, GEN *R, GEN *C, GEN *U, GEN *P, ulong p) {
swap(gel(A, j), gel(A, pc));
lswap((*P)[j], (*P)[pc]);
}
- if (pr != j) {
- for (k = j; k <= n; k++)
- ucoeff(A, j, k) = ucoeff(A, pr, k);
- }
- u = Fl_inv(ucoeff(A, j, j), p);
+ u = Fl_inv(ucoeff(A, pr, j), p);
for (i = pr + 1; i <= m; i++) {
v = Fl_mul(ucoeff(A, i, j), u, p);
ucoeff(A, i, j) = v;
for (k = j + 1; k <= n; k++) {
ucoeff(A, i, k) = Fl_sub(ucoeff(A, i, k),
- Fl_mul(ucoeff(A, j, k), v, p), p);
+ Fl_mul(ucoeff(A, pr, k), v, p), p);
}
}
}
setlg(*R, j);
*C = vecslice(A, 1, j - 1);
- *U = rowslice(A, 1, j - 1);
+ *U = rowpermute(A, *R);
if (gc_needed(av, 1))
gerepileall(av, 2, C, U);
return j - 1;
@@ -866,7 +862,7 @@ Flm_echelon_gauss(GEN A, GEN *R, GEN *C, ulong p) {
/* column echelon form */
static ulong
Flm_echelon(GEN A, GEN *R, GEN *C, ulong p) {
- long i, j, j1, j2, m = nbrows(A), n = lg(A) - 1, n1, r, r1, r2;
+ long j, j1, j2, m = nbrows(A), n = lg(A) - 1, n1, r, r1, r2;
GEN A1, A2, R1, R1c, C1, R2, C2;
GEN A12, A22, B2, C11, C21, M12;
pari_sp av = avma;
@@ -888,11 +884,6 @@ Flm_echelon(GEN A, GEN *R, GEN *C, ulong p) {
R1c = indexcompl(R1, m);
C11 = rowpermute(C1, R1);
C21 = rowpermute(C1, R1c);
- if (R1[r1] != r1) {
- for (j = 1; j <= r1; j++)
- for (i = 1; i <= R1[j] - j; i++)
- ucoeff(C21, i, j) = 0;
- }
A12 = rowpermute(A2, R1);
A22 = rowpermute(A2, R1c);
M12 = Flm_rsolve_lower_unit(C11, A12, p);
@@ -1144,23 +1135,19 @@ FlxqM_CUP_gauss(GEN A, GEN *R, GEN *C, GEN *U, GEN *P, GEN T, ulong p) {
swap(gel(A, j), gel(A, pc));
lswap((*P)[j], (*P)[pc]);
}
- if (pr != j) {
- for (k = j; k <= n; k++)
- gcoeff(A, j, k) = gcoeff(A, pr, k);
- }
- u = Flxq_inv(gcoeff(A, j, j), T, p);
+ u = Flxq_inv(gcoeff(A, pr, j), T, p);
for (i = pr + 1; i <= m; i++) {
v = Flxq_mul(gcoeff(A, i, j), u, T, p);
gcoeff(A, i, j) = v;
for (k = j + 1; k <= n; k++) {
gcoeff(A, i, k) = Flx_sub(gcoeff(A, i, k),
- Flx_mul(gcoeff(A, j, k), v, p), p);
+ Flx_mul(gcoeff(A, pr, k), v, p), p);
}
}
}
setlg(*R, j);
*C = vecslice(A, 1, j - 1);
- *U = rowslice(A, 1, j - 1);
+ *U = rowpermute(A, *R);
if (gc_needed(av, 1))
gerepileall(av, 2, C, U);
return j - 1;
@@ -1232,7 +1219,7 @@ FlxqM_echelon_gauss(GEN A, GEN *R, GEN *C, GEN T, ulong p) {
/* column echelon form */
static ulong
FlxqM_echelon(GEN A, GEN *R, GEN *C, GEN T, ulong p) {
- long i, j, j1, j2, m = nbrows(A), n = lg(A) - 1, n1, r, r1, r2;
+ long j, j1, j2, m = nbrows(A), n = lg(A) - 1, n1, r, r1, r2;
GEN A1, A2, R1, R1c, C1, R2, C2;
GEN A12, A22, B2, C11, C21, M12;
pari_sp av = avma;
@@ -1254,12 +1241,6 @@ FlxqM_echelon(GEN A, GEN *R, GEN *C, GEN T, ulong p) {
R1c = indexcompl(R1, m);
C11 = rowpermute(C1, R1);
C21 = rowpermute(C1, R1c);
- if (R1[r1] != r1) {
- GEN zero = zero_Flx(get_Flx_var(T));
- for (j = 1; j <= r1; j++)
- for (i = 1; i <= R1[j] - j; i++)
- gcoeff(C21, i, j) = zero;
- }
A12 = rowpermute(A2, R1);
A22 = rowpermute(A2, R1c);
M12 = FlxqM_rsolve_lower_unit(C11, A12, T, p);
@@ -1584,12 +1565,6 @@ Flm_ker_echelon(GEN x, ulong p, long pivots) {
Rc = indexcompl(R, n);
C1 = rowpermute(C, R);
C2 = rowpermute(C, Rc);
- if (R[r] != r) {
- long i, j;
- for (j = 1; j <= r; j++)
- for (i = 1; i <= R[j] - j; i++)
- ucoeff(C2, i, j) = 0;
- }
S = Flm_lsolve_lower_unit(C1, C2, p);
K = vecpermute(shallowconcat(Flm_neg(S, p), matid_Flm(n - r)),
perm_inv(vecsmall_concat(R, Rc)));
@@ -1603,15 +1578,13 @@ static GEN
Flm_deplin_echelon(GEN x, ulong p) {
pari_sp av = avma;
GEN R, Rc, C, C1, C2, s, v;
- long i, j, n = lg(x) - 1, r;
+ long i, n = lg(x) - 1, r;
r = Flm_echelon(Flm_transpose(x), &R, &C, p);
if (r == n) { avma = av; return NULL; }
Rc = indexcompl(R, n);
i = Rc[1];
C1 = rowpermute(C, R);
C2 = rowslice(C, i, i);
- for (j = i; j <= r; j++)
- ucoeff(C2, 1, j) = 0UL;
s = Flm_row(Flm_lsolve_lower_unit(C1, C2, p), 1);
v = vecpermute(vecsmall_concat(Flv_neg(s, p), vecsmall_ei(n - r, 1)),
perm_inv(vecsmall_concat(R, Rc)));
@@ -2121,13 +2094,6 @@ FlxqM_invimage_CUP(GEN A, GEN B, GEN T, ulong p) {
Rc = indexcompl(R, nbrows(B));
C1 = rowpermute(C, R);
C2 = rowpermute(C, Rc);
- if (R[r] != r) {
- long i, j;
- GEN zero = zero_Flx(sv);
- for (j = 1; j <= r; j++)
- for (i = 1; i <= R[j] - j; i++)
- gcoeff(C2, i, j) = zero;
- }
B1 = rowpermute(B, R);
B2 = rowpermute(B, Rc);
Z = FlxqM_rsolve_lower_unit(C1, B1, T, p);
@@ -2392,13 +2358,6 @@ FlxqM_ker_echelon(GEN x, GEN T, ulong p) {
Rc = indexcompl(R, n);
C1 = rowpermute(C, R);
C2 = rowpermute(C, Rc);
- if (R[r] != r) {
- long i, j;
- GEN zero = zero_Flx(get_Flx_var(T));
- for (j = 1; j <= r; j++)
- for (i = 1; i <= R[j] - j; i++)
- gcoeff(C2, i, j) = zero;
- }
S = FlxqM_lsolve_lower_unit(C1, C2, T, p);
K = vecpermute(shallowconcat(FlxM_neg(S, p), matid_FlxqM(n - r, T, p)),
perm_inv(vecsmall_concat(R, Rc)));
@@ -2417,18 +2376,13 @@ static GEN
FlxqM_deplin_echelon(GEN x, GEN T, ulong p) {
pari_sp av = avma;
GEN R, Rc, C, C1, C2, s, v;
- long i, j, n = lg(x) - 1, r, sv = get_Flx_var(T);
+ long i, n = lg(x) - 1, r, sv = get_Flx_var(T);
r = FlxqM_echelon(shallowtrans(x), &R, &C, T, p);
if (r == n) { avma = av; return NULL; }
Rc = indexcompl(R, n);
i = Rc[1];
C1 = rowpermute(C, R);
C2 = rowslice(C, i, i);
- if (i <= r) {
- GEN zero = zero_Flx(sv);
- for (j = i; j <= r; j++)
- gcoeff(C2, 1, j) = zero;
- }
s = row(FlxqM_lsolve_lower_unit(C1, C2, T, p), 1);
settyp(s, t_COL);
v = vecpermute(shallowconcat(FlxC_neg(s, p), col_ei_FlxC(n - r, 1, sv)),
@@ -4403,12 +4357,6 @@ Flm_invimage_CUP(GEN A, GEN B, ulong p) {
Rc = indexcompl(R, nbrows(B));
C1 = rowpermute(C, R);
C2 = rowpermute(C, Rc);
- if (R[r] != r) {
- long i, j;
- for (j = 1; j <= r; j++)
- for (i = 1; i <= R[j] - j; i++)
- ucoeff(C2, i, j) = 0UL;
- }
B1 = rowpermute(B, R);
B2 = rowpermute(B, Rc);
Z = Flm_rsolve_lower_unit(C1, B1, p);
--
2.7.3
>From 5ad7e2199d70b4b66b5650b26421884868ccea81 Mon Sep 17 00:00:00 2001
From: Peter Bruin <P.J.Bruin@math.leidenuniv.nl>
Date: Sat, 11 Mar 2017 12:13:52 +0100
Subject: [PATCH 2/2] allow U = P = NULL in CUP decomposition, and improve
garbage collection
---
src/basemath/alglin1.c | 26 ++++++++++----------------
1 file changed, 10 insertions(+), 16 deletions(-)
diff --git a/src/basemath/alglin1.c b/src/basemath/alglin1.c
index 5e00a12..8d747b8 100644
--- a/src/basemath/alglin1.c
+++ b/src/basemath/alglin1.c
@@ -757,11 +757,9 @@ Flm_lsolve_lower_unit(GEN L, GEN A, ulong p) {
static long
Flm_CUP_gauss(GEN A, GEN *R, GEN *C, GEN *U, GEN *P, ulong p) {
long i, j, k, m = nbrows(A), n = lg(A) - 1, pr, pc, u, v;
- pari_sp av;
- *P = identity_perm(n);
+ if (P) *P = identity_perm(n);
*R = cgetg(m + 1, t_VECSMALL);
- av = avma;
for (j = 1, pr = 0; j <= n; j++) {
for (pr++, pc = 0; pr <= m; pr++) {
for (k = j; k <= n; k++) {
@@ -777,7 +775,7 @@ Flm_CUP_gauss(GEN A, GEN *R, GEN *C, GEN *U, GEN *P, ulong p) {
(*R)[j] = pr;
if (pc != j) {
swap(gel(A, j), gel(A, pc));
- lswap((*P)[j], (*P)[pc]);
+ if (P) lswap((*P)[j], (*P)[pc]);
}
u = Fl_inv(ucoeff(A, pr, j), p);
for (i = pr + 1; i <= m; i++) {
@@ -791,9 +789,7 @@ Flm_CUP_gauss(GEN A, GEN *R, GEN *C, GEN *U, GEN *P, ulong p) {
}
setlg(*R, j);
*C = vecslice(A, 1, j - 1);
- *U = rowpermute(A, *R);
- if (gc_needed(av, 1))
- gerepileall(av, 2, C, U);
+ if (U) *U = rowpermute(A, *R);
return j - 1;
}
@@ -855,8 +851,7 @@ Flm_CUP(GEN A, GEN *R, GEN *C, GEN *U, GEN *P, ulong p) {
static ulong
Flm_echelon_gauss(GEN A, GEN *R, GEN *C, ulong p) {
- GEN U, P;
- return Flm_CUP_gauss(A, R, C, &U, &P, p);
+ return Flm_CUP_gauss(A, R, C, NULL, NULL, p);
}
/* column echelon form */
@@ -1114,7 +1109,7 @@ FlxqM_CUP_gauss(GEN A, GEN *R, GEN *C, GEN *U, GEN *P, GEN T, ulong p) {
pari_sp av;
GEN u, v;
- *P = identity_perm(n);
+ if (P) *P = identity_perm(n);
*R = cgetg(m + 1, t_VECSMALL);
av = avma;
for (j = 1, pr = 0; j <= n; j++) {
@@ -1133,7 +1128,7 @@ FlxqM_CUP_gauss(GEN A, GEN *R, GEN *C, GEN *U, GEN *P, GEN T, ulong p) {
(*R)[j] = pr;
if (pc != j) {
swap(gel(A, j), gel(A, pc));
- lswap((*P)[j], (*P)[pc]);
+ if (P) lswap((*P)[j], (*P)[pc]);
}
u = Flxq_inv(gcoeff(A, pr, j), T, p);
for (i = pr + 1; i <= m; i++) {
@@ -1144,12 +1139,12 @@ FlxqM_CUP_gauss(GEN A, GEN *R, GEN *C, GEN *U, GEN *P, GEN T, ulong p) {
Flx_mul(gcoeff(A, pr, k), v, p), p);
}
}
+ if (gc_needed(av, 2))
+ A = gerepilecopy(av, A);
}
setlg(*R, j);
*C = vecslice(A, 1, j - 1);
- *U = rowpermute(A, *R);
- if (gc_needed(av, 1))
- gerepileall(av, 2, C, U);
+ if (U) *U = rowpermute(A, *R);
return j - 1;
}
@@ -1212,8 +1207,7 @@ FlxqM_CUP(GEN A, GEN *R, GEN *C, GEN *U, GEN *P, GEN T, ulong p) {
static ulong
FlxqM_echelon_gauss(GEN A, GEN *R, GEN *C, GEN T, ulong p) {
- GEN U, P;
- return FlxqM_CUP_gauss(A, R, C, &U, &P, T, p);
+ return FlxqM_CUP_gauss(A, R, C, NULL, NULL, T, p);
}
/* column echelon form */
--
2.7.3