| Bill Allombert on Thu, 28 Oct 2004 15:12:46 +0200 |
[Date Prev] [Date Next] [Thread Prev] [Thread Next] [Date Index] [Thread Index]
| asm inline hppa level0 kernel |
Hello PARI-dev, The attached patch replace the asm hppa kernel with a inline version that work with gcc. This is for 32-bit only. You need to build with --kernel=hppa. It was only tested with gcc on GNU/Linux. Please test on others platforms. I think the code of the different level0 kernel could be cleaned up a bit: I found 1) obsolete functions 2) no support for LOCAL_HIREMAINDER/LOCAL_OVERFLOW 3) code duplication 4) use of inferior alternative. Performance of the hppa kernel (gcc 3.3.4, dual PA8600 at 550MHz, linux 2.4.26) ASM inline kernel: +++ Total bench for gp-sta is 2890 +++ Total bench for gp-dyn is 2976 Portable kernel: +++ Total bench for gp-sta is 4176 +++ Total bench for gp-dyn is 4264 Cheers, Bill
? src/kernel/hppa/level0.c
? src/kernel/hppa/level0.h
Index: src/kernel/hppa/MakeLVL0.SH
===================================================================
RCS file: /home/cvs/pari/src/kernel/hppa/MakeLVL0.SH,v
retrieving revision 1.3
diff -u -r1.3 MakeLVL0.SH
--- src/kernel/hppa/MakeLVL0.SH 25 Nov 2003 18:53:36 -0000 1.3
+++ src/kernel/hppa/MakeLVL0.SH 28 Oct 2004 12:07:08 -0000
@@ -1,10 +1,11 @@
# Level 0 kernel is "asm extern"
kern=$src/kernel/$kernlvl0
+knone=$src/kernel/none
cat >> $file << EOT
-parilvl0.h: $src/kernel/none/asm0.h
- cat $src/kernel/none/asm0.h > parilvl0.h
-kernel\$(_O): $kern/level0.s
- \$(AS) \$(ASFLAGS) -o kernel\$(_O) $kern/level0.s
+parilvl0.h: $kern/level0.h
+ cat $kern/level0.h $knone/divll.h > parilvl0.h
+kernel\$(_O): .headers $kern/level0.h
+ \$(CC) -c \$(CFLAGS) \$(CPPFLAGS) -o kernel\$(_O) $knone/level0.c
EOT
--- /dev/null 2004-08-20 19:57:57.000000000 +0000
+++ src/kernel/hppa/level0.h 2004-10-28 10:21:42.000000000 +0000
@@ -0,0 +1,95 @@
+#line 2 "../src/kernel/hppa/level0.h"
+/* $Id: level0.h,v 1.9 2003/03/05 20:17:11 karim Exp $
+
+Copyright (C) 2004 The PARI group.
+
+This file is part of the PARI/GP package.
+
+PARI/GP is free software; you can redistribute it and/or modify it under the
+terms of the GNU General Public License as published by the Free Software
+Foundation. It is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY WHATSOEVER.
+
+Check the License for details. You should have received a copy of it, along
+with the package; see the file 'COPYING'. If not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
+
+/* This file was made using idea from Bruno Haible ix86 asm inline kernel
+ * and code from Nigel Smart hppa asm kernel. */
+
+#define LOCAL_HIREMAINDER register ulong hiremainder
+#define LOCAL_OVERFLOW register ulong overflow
+
+#define addll(a,b) \
+({ ulong __value, __arg1 = (a), __arg2 = (b); \
+ __asm__ ("add %2,%3,%0\n\taddc %%r0,%%r0,%1" \
+ : "=r" (__value), "=r" (overflow) \
+ : "r" (__arg1), "r" (__arg2) \
+ : "cc"); \
+ __value; \
+})
+
+#define addllx(a,b) \
+({ ulong __value, __arg1 = (a), __arg2 = (b); \
+ __asm__ ("sub %4,%5,%%r0\n\taddc %2,%3,%0\n\taddc %%r0,%%r0,%1" \
+ : "=r" (__value), "=r" (overflow) \
+ : "r" (__arg1), "r" (__arg2), "r" (overflow), "r" ((ulong) 1)\
+ : "cc"); \
+ __value; \
+})
+
+#define subll(a,b) \
+({ ulong __value, __arg1 = (a), __arg2 = (b); \
+ __asm__ ("sub %2,%3,%0\n\taddc %%r0,%%r0,%1\n\tsubi 1,%1,%1" \
+ : "=r" (__value), "=r" (overflow) \
+ : "r" (__arg1), "r" (__arg2) , "r" ((ulong) 1)\
+ : "cc"); \
+ __value; \
+})
+
+#define subllx(a,b) \
+({ ulong __value, __arg1 = (a), __arg2 = (b); \
+ __asm__ ("sub %%r0,%4,%%r0\n\tsubb %2,%3,%0\n\taddc %%r0,%%r0,%1\n\tsubi 1,%1,%1" \
+ : "=&r" (__value), "=r" (overflow) \
+ : "r" (__arg1), "r" (__arg2), "r" (overflow)\
+ : "cc"); \
+ __value; \
+})
+
+#define mulll(a,b) \
+({ ulong __arg1 = (a), __arg2 = (b); \
+ union {double z; ulong x[2];} __vtab; \
+ __asm__ ("xmpyu %1,%2,%0" \
+ : "=f" (__vtab.z) \
+ : "f" (__arg1), "f" (__arg2) \
+ : "cc"); \
+ hiremainder=__vtab.x[0]; \
+ __vtab.x[1]; \
+})
+
+#define addmul(a,b) \
+({ ulong __value, __arg1 = (a), __arg2 = (b); \
+ union {double z; ulong x[2];} __vtab; \
+ __asm__ ("xmpyu %1,%2,%0" \
+ : "=f" (__vtab.z) \
+ : "f" (__arg1), "f" (__arg2) \
+ : "cc"); \
+ __asm__ ("add %2,%3,%0\n\taddc %%r0, %4, %1" \
+ : "=r" (__value), "=r" (hiremainder) \
+ : "r" (__vtab.x[1]),"r" (hiremainder), "r" (__vtab.x[0]) \
+ : "cc"); \
+ __value; \
+})
+
+/* From Peter Montgomery */
+
+#define bfffo(x) \
+({int __value; \
+ ulong __arg1=(x); \
+ static int __bfffo_tabshi[16]={4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0};\
+ __value = BITS_IN_LONG - 4; \
+ if (__arg1 > 0xffffUL) {__value -= 16; __arg1 >>= 16;} \
+ if (__arg1 > 0x00ffUL) {__value -= 8; __arg1 >>= 8;} \
+ if (__arg1 > 0x000fUL) {__value -= 4; __arg1 >>= 4;} \
+ __value + __bfffo_tabshi[__arg1]; \
+})