| Peter Bruin on Thu, 29 Oct 2015 13:40:49 +0100 |
[Date Prev] [Date Next] [Thread Prev] [Thread Next] [Date Index] [Thread Index]
| Re: Faster digits and fromdigits in base 2^k, and FlxqM_mul_Kronecker |
Hi Bill, > However > make test-digits > fails > and > ? fromdigits([1,0,1],2) > %1 = 7 > is not good. > > the line 166 in fromdigits_2k > if (k == 1) return bits_to_int(x, l); > > does not seem correct. You are right; I added this line at a late stage, and somehow misunderstood bits_to_int. It should be in nv_fromdigits_2k instead of fromdigits_2k. > PS: next time, you might want to look at 'git format-patch' to > generate the patches. > Also, you should try 'make checkspaces'. Thanks for the hints. I replaced the tabs by spaces and made new patches using git format-patch. The difference with respect to the previous ones is just the whitespace fixes and moving the call to bits_to_int. Cheers, Peter
>From 1aff932dddab902558eb1fb1051f3a2c4b8005c4 Mon Sep 17 00:00:00 2001
From: Peter Bruin <P.J.Bruin@math.leidenuniv.nl>
Date: Fri, 11 Sep 2015 12:56:58 +0200
Subject: [PATCH 1/2] improve digits and fromdigits in base 2^k
---
doc/usersch5.tex | 13 ++-
src/basemath/arith2.c | 2 +
src/basemath/bit.c | 223 ++++++++++++++++++++++++++++++++++++++++++------
src/headers/paridecl.h | 2 +
4 files changed, 213 insertions(+), 27 deletions(-)
diff --git a/doc/usersch5.tex b/doc/usersch5.tex
index 78c4497..069a1a4 100644
--- a/doc/usersch5.tex
+++ b/doc/usersch5.tex
@@ -2347,7 +2347,7 @@ shifts the mantissa
$$f, s[m], s[m+1],\ldots s[M]$$
right by $n$ bits.
-\subsec{From \typ{INT} to bits}
+\subsec{From \typ{INT} to bits or digits in base $2^k$ and back}
\fun{GEN}{binary_zv}{GEN x} given a \typ{INT} $x$, return a \typ{VECSMALL} of
bits, from most significant to least significant.
@@ -2368,6 +2368,17 @@ proper \kbd{GEN}), return the integer $\sum_{i = 1}^l x[i] 2^{l-i}$, as a
\fun{ulong}{bits_to_u}{GEN v, long l} same as \tet{bits_to_int}, where
$l < \tet{BITS_IN_LONG}$, so we can return an \kbd{ulong}.
+\fun{GEN}{fromdigits_2k}{GEN x, long k} converse of \tet{binary_2k};
+given a \typ{VEC} $x$ of length $l$ and a positive \kbd{long} $k$,
+where each $x[i]$ is a \typ{INT} with $0\leq x[i] < 2^k$, return the
+integer $\sum_{i = 1}^l x[i] 2^{k(l-i)}$, as a \typ{INT}.
+
+\fun{GEN}{nv_fromdigits_2k}{GEN x, long k} as \tet{fromdigits_2k}, but
+with $x$ being a \typ{VECSMALL} and each $x[i]$ being a \kbd{ulong}
+with $0\leq x[i] < 2^{\min\{k,\tet{BITS_IN_LONG}\}}$. Here $k$ may be
+any positive \kbd{long}, and the $x[i]$ are regarded as $k$-bit
+integers by truncating or extending with zeroes.
+
\subsec{Integer valuation}
For integers $x$ and $p$, such that $x\neq 0$ and $|p| > 1$, we define
$v_p(x)$ to be the largest integer exponent $e$ such that $p^e$ divides $x$.
diff --git a/src/basemath/arith2.c b/src/basemath/arith2.c
index 01726a0..1935132 100644
--- a/src/basemath/arith2.c
+++ b/src/basemath/arith2.c
@@ -1501,6 +1501,8 @@ fromdigits(GEN x, GEN B)
if (typ(x)!=t_VEC || !RgV_is_ZV(x)) pari_err_TYPE("fromdigits",x);
B = check_basis(B);
if (lg(x)==1) { avma = av; return gen_0; }
+ if (Z_ispow2(B))
+ return fromdigits_2k(x, expi(B));
x = vecreverse(x);
return gerepileupto(av, gen_fromdigits(x, B, NULL, &Z_ring));
}
diff --git a/src/basemath/bit.c b/src/basemath/bit.c
index 6e9e253..5c0ff08 100644
--- a/src/basemath/bit.c
+++ b/src/basemath/bit.c
@@ -81,6 +81,117 @@ bits_to_u(GEN v, long l)
return u;
}
+/* set BITS_IN_LONG bits starting at word *w plus *r bits,
+ * clearing subsequent bits in the last word touched */
+INLINE void
+int_set_ulong(ulong a, GEN *w, long *r)
+{
+ if (*r) {
+ **w |= (a << *r);
+ *w = int_nextW(*w);
+ **w = (a >> (BITS_IN_LONG - *r));
+ } else {
+ **w = a;
+ *w = int_nextW(*w);
+ }
+}
+
+/* set k bits starting at word *w plus *r bits,
+ * clearing subsequent bits in the last word touched */
+INLINE void
+int_set_bits(ulong a, long k, GEN *w, long *r)
+{
+ if (*r) {
+ **w |= a << *r;
+ a >>= BITS_IN_LONG - *r;
+ } else {
+ **w = a;
+ a = 0;
+ }
+ *r += k;
+ if (*r >= BITS_IN_LONG) {
+ *w = int_nextW(*w);
+ *r -= BITS_IN_LONG;
+ for (; *r >= BITS_IN_LONG; *r -= BITS_IN_LONG) {
+ **w = a;
+ a = 0;
+ *w = int_nextW(*w);
+ }
+ if (*r)
+ **w = a;
+ }
+}
+
+/* set k bits from z (t_INT) starting at word *w plus *r bits,
+ * clearing subsequent bits in the last word touched */
+INLINE void
+int_set_int(GEN z, long k, GEN *w, long *r)
+{
+ long l = lgefint(z) - 2;
+ GEN y;
+ if (!l) {
+ int_set_bits(0, k, w, r);
+ return;
+ }
+ y = int_LSW(z);
+ for (; l > 1; l--) {
+ int_set_ulong((ulong) *y, w, r);
+ y = int_nextW(y);
+ k -= BITS_IN_LONG;
+ }
+ if (k)
+ int_set_bits((ulong) *y, k, w, r);
+}
+
+GEN
+nv_fromdigits_2k(GEN x, long k)
+{
+ long l = lg(x) - 1, r;
+ GEN w, z;
+ if (k == 1) return bits_to_int(x, l);
+ if (!l) return gen_0;
+ z = cgetipos(nbits2lg(k * l));
+ w = int_LSW(z);
+ r = 0;
+ for (; l; l--)
+ int_set_bits(uel(x, l), k, &w, &r);
+ return int_normalize(z, 0);
+}
+
+GEN
+fromdigits_2k(GEN x, long k)
+{
+ long l, m;
+ GEN w, y, z;
+ for (l = lg(x) - 1; l && !signe(gel(x, 1)); x++, l--);
+ if (!l) return gen_0;
+ m = expi(gel(x, 1)) + 1;
+ z = cgetipos(nbits2lg(k * (l - 1) + m));
+ w = int_LSW(z);
+ if (!(k & (BITS_IN_LONG - 1))) {
+ long i, j, t = k >> TWOPOTBITS_IN_LONG;
+ for (; l; l--) {
+ j = lgefint(gel(x, l)) - 2;
+ y = int_LSW(gel(x, l));
+ for (i = 0; i < j; i++) {
+ *w = *y;
+ y = int_nextW(y);
+ w = int_nextW(w);
+ }
+ for (; i < t; i++) {
+ *w = 0;
+ w = int_nextW(w);
+ }
+ }
+ } else {
+ long r = 0;
+ for (; l > 1; l--)
+ int_set_int(gel(x, l), k, &w, &r);
+ int_set_int(gel(x, 1), m, &w, &r);
+ }
+ return int_normalize(z, 0);
+}
+
GEN
binaire(GEN x)
{
@@ -139,41 +250,101 @@ binaire(GEN x)
return y;
}
+/* extract k bits (as ulong) starting at word *w plus *r bits */
+INLINE ulong
+int_get_bits(long k, GEN *w, long *r)
+{
+ ulong mask = (1UL << k) - 1;
+ ulong a = (((ulong) **w) >> *r) & mask;
+ *r += k;
+ if (*r >= BITS_IN_LONG) {
+ *r -= BITS_IN_LONG;
+ *w = int_nextW(*w);
+ if (*r)
+ a |= (**w << (k - *r)) & mask;
+ }
+ return a;
+}
+
+/* extract BITS_IN_LONG bits starting at word *w plus *r bits */
+INLINE ulong
+int_get_ulong(GEN *w, long *r)
+{
+ ulong a = ((ulong) **w) >> *r;
+ *w = int_nextW(*w);
+ if (*r)
+ a |= (**w << (BITS_IN_LONG - *r));
+ return a;
+}
+
+/* extract k bits (as t_INT) starting at word *w plus *r bits */
+INLINE GEN
+int_get_int(long k, GEN *w, long *r)
+{
+ GEN z = cgetipos(nbits2lg(k));
+ GEN y = int_LSW(z);
+ for (; k >= BITS_IN_LONG; k -= BITS_IN_LONG) {
+ *y = int_get_ulong(w, r);
+ y = int_nextW(y);
+ }
+ if (k)
+ *y = int_get_bits(k, w, r);
+ return int_normalize(z, 0);
+}
+
/* assume k < BITS_IN_LONG */
GEN
binary_2k_zv(GEN x, long k)
{
- long iv, j, n, nmodk, nk;
- GEN v, vk;
+ long l, n, r;
+ GEN v, w;
if (k == 1) return binary_zv(x);
- if (!signe(x)) return cgetg(1,t_VECSMALL);
- v = binary_zv(x);
- n = lg(v)-1;
- nk = n / k; nmodk = n % k;
- if (nmodk) nk++;
- vk = cgetg(nk+1, t_VECSMALL);
- iv = n - k;
- if (!nmodk) nmodk = k;
- for (j = nk; j >= 2; j--,iv-=k) vk[j] = bits_to_u(v+iv, k);
- vk[1] = bits_to_u(v,nmodk);
- return vk;
+ if (!signe(x)) return cgetg(1, t_VECSMALL);
+ n = expi(x) + 1;
+ l = (n + k - 1) / k;
+ v = cgetg(l + 1, t_VECSMALL);
+ w = int_LSW(x);
+ r = 0;
+ for (; l > 1; l--) {
+ uel(v, l) = int_get_bits(k, &w, &r);
+ n -= k;
+ }
+ uel(v, 1) = int_get_bits(n, &w, &r);
+ return v;
}
+
GEN
binary_2k(GEN x, long k)
{
- long iv, j, n, nmodk, nk;
- GEN v, vk;
- if (!signe(x)) return cgetg(1,t_VEC);
- v = binary_zv(x);
- n = lg(v)-1;
- nk = n / k; nmodk = n % k;
- if (nmodk) nk++;
- vk = cgetg(nk+1, t_VEC);
- iv = n - k;
- if (!nmodk) nmodk = k;
- for (j = nk; j >= 2; j--,iv-=k) gel(vk,j) = bits_to_int(v+iv, k);
- gel(vk,1) = bits_to_int(v, nmodk);
- return vk;
+ long l, n;
+ GEN v, w, y, z;
+ if (k == 1) return binaire(x);
+ if (!signe(x)) return cgetg(1, t_VEC);
+ n = expi(x) + 1;
+ l = (n + k - 1) / k;
+ v = cgetg(l + 1, t_VEC);
+ w = int_LSW(x);
+ if (!(k & (BITS_IN_LONG - 1))) {
+ long m, t = k >> TWOPOTBITS_IN_LONG, u = lgefint(x) - 2;
+ for (; l; l--) {
+ m = minss(t, u);
+ z = cgetipos(m + 2);
+ y = int_LSW(z);
+ for (; m; m--) {
+ *y = *w;
+ y = int_nextW(y);
+ w = int_nextW(w);
+ }
+ gel(v, l) = int_normalize(z, 0);
+ u -= t;
+ }
+ } else {
+ long r = 0;
+ for (; l > 1; l--, n -= k)
+ gel(v, l) = int_get_int(k, &w, &r);
+ gel(v, 1) = int_get_int(n, &w, &r);
+ }
+ return v;
}
/* return 1 if bit n of x is set, 0 otherwise */
diff --git a/src/headers/paridecl.h b/src/headers/paridecl.h
index d4a4743..514745b 100644
--- a/src/headers/paridecl.h
+++ b/src/headers/paridecl.h
@@ -2100,6 +2100,8 @@ GEN binary_zv(GEN x);
GEN binary_2k(GEN x, long k);
GEN binary_2k_zv(GEN x, long k);
long bittest(GEN x, long n);
+GEN fromdigits_2k(GEN x, long k);
+GEN nv_fromdigits_2k(GEN x, long k);
GEN gbitand(GEN x, GEN y);
GEN gbitneg(GEN x, long n);
GEN gbitnegimply(GEN x, GEN y);
--
1.7.9.5
>From ed63bd98566d8e9a06d17983191d966f45f5445e Mon Sep 17 00:00:00 2001
From: Peter Bruin <P.J.Bruin@math.leidenuniv.nl>
Date: Sun, 4 Oct 2015 23:19:12 +0200
Subject: [PATCH 2/2] improve FlxqM_mul_Kronecker using nv_fromdigits_2k and
binary_2k_zv
---
src/basemath/Flx.c | 139 +++++++++++++++++++++++++++++++++++++++++-------
src/basemath/alglin1.c | 7 +--
2 files changed, 121 insertions(+), 25 deletions(-)
diff --git a/src/basemath/Flx.c b/src/basemath/Flx.c
index 2fb5d56..51e01ff 100644
--- a/src/basemath/Flx.c
+++ b/src/basemath/Flx.c
@@ -4229,6 +4229,18 @@ kron_pack_Flx_spec_3(GEN x, long l) {
}
static GEN
+kron_pack_Flx_spec_bits(GEN x, long b, long l) {
+ GEN y;
+ long i;
+ if (l == 0)
+ return gen_0;
+ y = cgetg(l + 1, t_VECSMALL);
+ for(i = 1; i <= l; i++)
+ y[i] = x[l - i];
+ return nv_fromdigits_2k(y, b);
+}
+
+static GEN
kron_unpack_Flx(GEN z, ulong p)
{
long i, l = lgefint(z);
@@ -4250,6 +4262,37 @@ kron_unpack_Flx_3(GEN x, ulong p) {
return Z_mod2BIL_Flx_3(x, d, p);
}
+/* assume b < BITS_IN_LONG */
+static GEN
+kron_unpack_Flx_bits_narrow(GEN z, long b, ulong p) {
+ GEN v = binary_2k_zv(z, b), x;
+ long i, l = lg(v) + 1;
+ x = cgetg(l, t_VECSMALL);
+ for (i = 2; i < l; i++)
+ x[i] = v[l - i] % p;
+ return Flx_renormalize(x, l);
+}
+
+static GEN
+kron_unpack_Flx_bits_wide(GEN z, long b, ulong p, ulong pi) {
+ GEN v = binary_2k(z, b), x, y;
+ long i, l = lg(v) + 1, ly;
+ x = cgetg(l, t_VECSMALL);
+ for (i = 2; i < l; i++) {
+ y = gel(v, l - i);
+ ly = lgefint(y);
+ switch (ly) {
+ case 2: x[i] = 0; break;
+ case 3: x[i] = *int_W_lg(y, 0, ly) % p; break;
+ case 4: x[i] = remll_pre(*int_W_lg(y, 1, ly), *int_W_lg(y, 0, ly), p, pi); break;
+ case 5: x[i] = remlll_pre(*int_W_lg(y, 2, ly), *int_W_lg(y, 1, ly),
+ *int_W_lg(y, 0, ly), p, pi); break;
+ default: x[i] = umodiu(gel(v, l - i), p);
+ }
+ }
+ return Flx_renormalize(x, l);
+}
+
static GEN
FlxM_pack_ZM(GEN M, GEN (*pack)(GEN, long)) {
long i, j, l, lc;
@@ -4268,6 +4311,24 @@ FlxM_pack_ZM(GEN M, GEN (*pack)(GEN, long)) {
}
static GEN
+FlxM_pack_ZM_bits(GEN M, long b)
+{
+ long i, j, l, lc;
+ GEN N = cgetg_copy(M, &l), x;
+ if (l == 1)
+ return N;
+ lc = lgcols(M);
+ for (j = 1; j < l; j++) {
+ gel(N, j) = cgetg(lc, t_COL);
+ for (i = 1; i < lc; i++) {
+ x = gcoeff(M, i, j);
+ gcoeff(N, i, j) = kron_pack_Flx_spec_bits(x + 2, b, lgpol(x));
+ }
+ }
+ return N;
+}
+
+static GEN
ZM_unpack_FlxqM(GEN M, GEN T, ulong p, GEN (*unpack)(GEN, ulong))
{
long i, j, l, lc, sv = get_Flx_var(T);
@@ -4286,44 +4347,82 @@ ZM_unpack_FlxqM(GEN M, GEN T, ulong p, GEN (*unpack)(GEN, ulong))
return N;
}
+static GEN
+ZM_unpack_FlxqM_bits(GEN M, long b, GEN T, ulong p)
+{
+ long i, j, l, lc, sv = get_Flx_var(T);
+ GEN N = cgetg_copy(M, &l), x;
+ if (l == 1)
+ return N;
+ lc = lgcols(M);
+ if (b < BITS_IN_LONG) {
+ for (j = 1; j < l; j++) {
+ gel(N, j) = cgetg(lc, t_COL);
+ for (i = 1; i < lc; i++) {
+ x = kron_unpack_Flx_bits_narrow(gcoeff(M, i, j), b, p);
+ x[1] = sv;
+ gcoeff(N, i, j) = Flx_rem(x, T, p);
+ }
+ }
+ } else {
+ ulong pi = get_Fl_red(p);
+ for (j = 1; j < l; j++) {
+ gel(N, j) = cgetg(lc, t_COL);
+ for (i = 1; i < lc; i++) {
+ x = kron_unpack_Flx_bits_wide(gcoeff(M, i, j), b, p, pi);
+ x[1] = sv;
+ gcoeff(N, i, j) = Flx_rem(x, T, p);
+ }
+ }
+ }
+ return N;
+}
+
GEN
FlxqM_mul_Kronecker(GEN A, GEN B, GEN T, ulong p)
{
pari_sp av = avma;
- long l, n = lg(A) - 1;
+ long b, d = degpol(T), n = lg(A) - 1;
GEN C, D, z;
GEN (*pack)(GEN, long), (*unpack)(GEN, ulong);
int is_sqr = A==B;
- /*
- cf. Flx_mulspec(), maxlengthcoeffpol()
- z can be 1, 2, 3 or (theoretically) 4 words long
- */
- z = muliu(muliu(sqru(p - 1), degpol(T)), n);
- l = lgefint(z);
+ z = muliu(muliu(sqru(p - 1), d), n);
+ b = expi(z) + 1;
+ /* only do expensive bit-packing if it saves at least 1 limb */
+ if (b <= BITS_IN_HALFULONG) {
+ if (nbits2lg(d*b) - 2 == (d + 1)/2)
+ b = BITS_IN_HALFULONG;
+ } else {
+ long l = lgefint(z) - 2;
+ if (nbits2lg(d*b) - 2 == d*l)
+ b = l*BITS_IN_LONG;
+ }
avma = av;
- switch (l - 2) {
- case 1:
- if (HIGHWORD(z[2]) == 0) {
- pack = kron_pack_Flx_spec_half;
- unpack = int_to_Flx_half;
- } else {
- pack = kron_pack_Flx_spec;
- unpack = kron_unpack_Flx;
- }
+ switch (b) {
+ case BITS_IN_HALFULONG:
+ pack = kron_pack_Flx_spec_half;
+ unpack = int_to_Flx_half;
break;
- case 2:
+ case BITS_IN_LONG:
+ pack = kron_pack_Flx_spec;
+ unpack = kron_unpack_Flx;
+ break;
+ case 2*BITS_IN_LONG:
pack = kron_pack_Flx_spec_2;
unpack = kron_unpack_Flx_2;
break;
- case 3:
+ case 3*BITS_IN_LONG:
pack = kron_pack_Flx_spec_3;
unpack = kron_unpack_Flx_3;
break;
default:
- /* not implemented, probably won't occur in practice */
- return NULL;
+ A = FlxM_pack_ZM_bits(A, b);
+ B = is_sqr? A: FlxM_pack_ZM_bits(B, b);
+ C = ZM_mul(A, B);
+ D = ZM_unpack_FlxqM_bits(C, b, T, p);
+ return gerepilecopy(av, D);
}
A = FlxM_pack_ZM(A, pack);
B = is_sqr? A: FlxM_pack_ZM(B, pack);
diff --git a/src/basemath/alglin1.c b/src/basemath/alglin1.c
index 383f2eb..e3bab48 100644
--- a/src/basemath/alglin1.c
+++ b/src/basemath/alglin1.c
@@ -981,11 +981,8 @@ FlxqM_mul(GEN A, GEN B, GEN T, ulong p) {
if (n == 0)
return cgetg(1, t_MAT);
- if (n > 1) {
- GEN C = FlxqM_mul_Kronecker(A, B, T, p);
- if (C != NULL)
- return C;
- }
+ if (n > 1)
+ return FlxqM_mul_Kronecker(A, B, T, p);
ff = get_Flxq_field(&E, T, p);
return gen_matmul(A, B, E, ff);
}
--
1.7.9.5