Bill Allombert on Thu, 28 Oct 2004 15:12:46 +0200


[Date Prev] [Date Next] [Thread Prev] [Thread Next] [Date Index] [Thread Index]

asm inline hppa level0 kernel


Hello PARI-dev,

The attached patch replace the asm hppa kernel with a inline version
that work with gcc.

This is for 32-bit only.

You need to build with --kernel=hppa.

It was only tested with gcc on GNU/Linux.

Please test on others platforms.

I think the code of the different level0 kernel could be cleaned up 
a bit: I found

1) obsolete functions
2) no support for LOCAL_HIREMAINDER/LOCAL_OVERFLOW
3) code duplication
4) use of inferior alternative.

Performance of the hppa kernel (gcc 3.3.4, dual PA8600 at 550MHz,
linux 2.4.26)

ASM inline kernel:

+++ Total bench for gp-sta is 2890
+++ Total bench for gp-dyn is 2976

Portable kernel:

+++ Total bench for gp-sta is 4176
+++ Total bench for gp-dyn is 4264

Cheers,
Bill
? src/kernel/hppa/level0.c
? src/kernel/hppa/level0.h
Index: src/kernel/hppa/MakeLVL0.SH
===================================================================
RCS file: /home/cvs/pari/src/kernel/hppa/MakeLVL0.SH,v
retrieving revision 1.3
diff -u -r1.3 MakeLVL0.SH
--- src/kernel/hppa/MakeLVL0.SH	25 Nov 2003 18:53:36 -0000	1.3
+++ src/kernel/hppa/MakeLVL0.SH	28 Oct 2004 12:07:08 -0000
@@ -1,10 +1,11 @@
 # Level 0 kernel is "asm extern"
 kern=$src/kernel/$kernlvl0
+knone=$src/kernel/none
 
 cat >> $file << EOT
-parilvl0.h: $src/kernel/none/asm0.h 
-	cat $src/kernel/none/asm0.h > parilvl0.h
-kernel\$(_O):  $kern/level0.s
-	\$(AS) \$(ASFLAGS) -o kernel\$(_O) $kern/level0.s
+parilvl0.h: $kern/level0.h 
+	cat $kern/level0.h $knone/divll.h > parilvl0.h
+kernel\$(_O): .headers $kern/level0.h
+	\$(CC) -c \$(CFLAGS) \$(CPPFLAGS) -o kernel\$(_O) $knone/level0.c
 
 EOT
--- /dev/null	2004-08-20 19:57:57.000000000 +0000
+++ src/kernel/hppa/level0.h	2004-10-28 10:21:42.000000000 +0000
@@ -0,0 +1,95 @@
+#line 2 "../src/kernel/hppa/level0.h"
+/* $Id: level0.h,v 1.9 2003/03/05 20:17:11 karim Exp $
+
+Copyright (C) 2004  The PARI group.
+
+This file is part of the PARI/GP package.
+
+PARI/GP is free software; you can redistribute it and/or modify it under the
+terms of the GNU General Public License as published by the Free Software
+Foundation. It is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY WHATSOEVER.
+
+Check the License for details. You should have received a copy of it, along
+with the package; see the file 'COPYING'. If not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
+
+/* This file was made using idea from Bruno Haible ix86 asm inline kernel
+ * and code from Nigel Smart hppa asm kernel.  */
+
+#define LOCAL_HIREMAINDER  register ulong hiremainder
+#define LOCAL_OVERFLOW     register ulong overflow
+
+#define addll(a,b) \
+({ ulong __value, __arg1 = (a), __arg2 = (b); \
+   __asm__ ("add %2,%3,%0\n\taddc %%r0,%%r0,%1" \
+        : "=r" (__value), "=r" (overflow) \
+        : "r" (__arg1), "r" (__arg2) \
+        : "cc"); \
+  __value; \
+})
+
+#define addllx(a,b) \
+({ ulong __value, __arg1 = (a), __arg2 = (b); \
+   __asm__ ("sub %4,%5,%%r0\n\taddc %2,%3,%0\n\taddc %%r0,%%r0,%1" \
+        : "=r" (__value), "=r" (overflow) \
+        : "r" (__arg1), "r" (__arg2), "r" (overflow), "r" ((ulong) 1)\
+        : "cc"); \
+  __value; \
+})
+
+#define subll(a,b) \
+({ ulong __value, __arg1 = (a), __arg2 = (b); \
+   __asm__ ("sub %2,%3,%0\n\taddc %%r0,%%r0,%1\n\tsubi 1,%1,%1" \
+        : "=r" (__value), "=r" (overflow) \
+        : "r" (__arg1), "r" (__arg2) , "r" ((ulong) 1)\
+        : "cc"); \
+  __value; \
+})
+
+#define subllx(a,b) \
+({ ulong __value, __arg1 = (a), __arg2 = (b); \
+   __asm__ ("sub %%r0,%4,%%r0\n\tsubb %2,%3,%0\n\taddc %%r0,%%r0,%1\n\tsubi 1,%1,%1" \
+        : "=&r" (__value), "=r" (overflow) \
+        : "r" (__arg1), "r" (__arg2), "r" (overflow)\
+        : "cc"); \
+  __value; \
+})
+
+#define mulll(a,b) \
+({ ulong __arg1 = (a), __arg2 = (b); \
+   union {double z; ulong x[2];} __vtab; \
+   __asm__ ("xmpyu %1,%2,%0" \
+        : "=f" (__vtab.z) \
+        : "f" (__arg1), "f" (__arg2) \
+        : "cc"); \
+   hiremainder=__vtab.x[0]; \
+   __vtab.x[1]; \
+})
+
+#define addmul(a,b) \
+({ ulong __value, __arg1 = (a), __arg2 = (b); \
+    union {double z; ulong x[2];} __vtab; \
+    __asm__ ("xmpyu %1,%2,%0" \
+	: "=f" (__vtab.z) \
+	: "f" (__arg1), "f" (__arg2) \
+	: "cc"); \
+    __asm__ ("add %2,%3,%0\n\taddc %%r0, %4, %1" \
+        : "=r" (__value), "=r" (hiremainder) \
+        : "r" (__vtab.x[1]),"r" (hiremainder), "r" (__vtab.x[0]) \
+        : "cc"); \
+    __value; \
+})
+
+/* From Peter Montgomery */
+
+#define bfffo(x) \
+({int __value; \
+  ulong __arg1=(x); \
+  static int __bfffo_tabshi[16]={4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0};\
+  __value = BITS_IN_LONG - 4; \
+  if (__arg1 > 0xffffUL) {__value -= 16; __arg1 >>= 16;} \
+  if (__arg1 > 0x00ffUL) {__value -= 8; __arg1 >>= 8;} \
+  if (__arg1 > 0x000fUL) {__value -= 4; __arg1 >>= 4;} \
+  __value + __bfffo_tabshi[__arg1]; \
+})