Line data Source code
1 : #line 2 "../src/kernel/none/mulll.h"
2 : /* Copyright (C) 2000 The PARI group.
3 :
4 : This file is part of the PARI/GP package.
5 :
6 : PARI/GP is free software; you can redistribute it and/or modify it under the
7 : terms of the GNU General Public License as published by the Free Software
8 : Foundation; either version 2 of the License, or (at your option) any later
9 : version. It is distributed in the hope that it will be useful, but WITHOUT
10 : ANY WARRANTY WHATSOEVER.
11 :
12 : Check the License for details. You should have received a copy of it, along
13 : with the package; see the file 'COPYING'. If not, write to the Free Software
14 : Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */
15 :
16 : #undef LOCAL_HIREMAINDER
17 : #define LOCAL_HIREMAINDER
18 : extern ulong hiremainder;
19 :
20 : /* Version Peter Montgomery */
21 : /*
22 : * Assume (for presentation) that BITS_IN_LONG = 32.
23 : * Then 0 <= xhi, xlo, yhi, ylo <= 2^16 - 1. Hence
24 : *
25 : * -2^31 + 2^16 <= (xhi-2^15)*(ylo-2^15) + (xlo-2^15)*(yhi-2^15) <= 2^31.
26 : *
27 : * If xhi*ylo + xlo*yhi = 2^32*overflow + xymid, then
28 : *
29 : * -2^32 + 2^16 <= 2^32*overflow + xymid - 2^15*(xhi + ylo + xlo + yhi) <= 0.
30 : *
31 : * 2^16*overflow <= (xhi+xlo+yhi+ylo)/2 - xymid/2^16 <= 2^16*overflow + 2^16-1
32 : *
33 : * This inequality was derived using exact (rational) arithmetic;
34 : * it remains valid when we truncate the two middle terms.
35 : */
36 :
37 : #if !defined(INLINE)
38 : extern long mulll(ulong x, ulong y);
39 : extern long addmul(ulong x, ulong y);
40 : #else
41 :
42 : #if defined(__GNUC__) && !defined(DISABLE_INLINE)
43 : #undef LOCAL_HIREMAINDER
44 : #define LOCAL_HIREMAINDER ulong hiremainder
45 :
46 : #define mulll(x, y) \
47 : __extension__ ({ \
48 : const ulong __x = (x), __y = (y);\
49 : const ulong __xlo = LOWWORD(__x), __xhi = HIGHWORD(__x); \
50 : const ulong __ylo = LOWWORD(__y), __yhi = HIGHWORD(__y); \
51 : ulong __xylo,__xymid,__xyhi,__xymidhi,__xymidlo; \
52 : ulong __xhl,__yhl; \
53 : \
54 : __xylo = __xlo*__ylo; __xyhi = __xhi*__yhi; \
55 : __xhl = __xhi+__xlo; __yhl = __yhi+__ylo; \
56 : __xymid = __xhl*__yhl - (__xyhi+__xylo); \
57 : \
58 : __xymidhi = HIGHWORD(__xymid); \
59 : __xymidlo = __xymid << BITS_IN_HALFULONG; \
60 : \
61 : __xylo += __xymidlo; \
62 : hiremainder = __xyhi + __xymidhi + (__xylo < __xymidlo) \
63 : + ((((__xhl + __yhl) >> 1) - __xymidhi) & HIGHMASK); \
64 : \
65 : __xylo; \
66 : })
67 :
68 : #define addmul(x, y) \
69 : __extension__ ({ \
70 : const ulong __x = (x), __y = (y);\
71 : const ulong __xlo = LOWWORD(__x), __xhi = HIGHWORD(__x); \
72 : const ulong __ylo = LOWWORD(__y), __yhi = HIGHWORD(__y); \
73 : ulong __xylo,__xymid,__xyhi,__xymidhi,__xymidlo; \
74 : ulong __xhl,__yhl; \
75 : \
76 : __xylo = __xlo*__ylo; __xyhi = __xhi*__yhi; \
77 : __xhl = __xhi+__xlo; __yhl = __yhi+__ylo; \
78 : __xymid = __xhl*__yhl - (__xyhi+__xylo); \
79 : \
80 : __xylo += hiremainder; __xyhi += (__xylo < hiremainder); \
81 : \
82 : __xymidhi = HIGHWORD(__xymid); \
83 : __xymidlo = __xymid << BITS_IN_HALFULONG; \
84 : \
85 : __xylo += __xymidlo; \
86 : hiremainder = __xyhi + __xymidhi + (__xylo < __xymidlo) \
87 : + ((((__xhl + __yhl) >> 1) - __xymidhi) & HIGHMASK); \
88 : \
89 : __xylo; \
90 : })
91 :
92 : #else
93 :
94 : INLINE long
95 18165477354 : mulll(ulong x, ulong y)
96 : {
97 18165477354 : const ulong xlo = LOWWORD(x), xhi = HIGHWORD(x);
98 18165477354 : const ulong ylo = LOWWORD(y), yhi = HIGHWORD(y);
99 : ulong xylo,xymid,xyhi,xymidhi,xymidlo;
100 : ulong xhl,yhl;
101 :
102 18165477354 : xylo = xlo*ylo; xyhi = xhi*yhi;
103 18165477354 : xhl = xhi+xlo; yhl = yhi+ylo;
104 18165477354 : xymid = xhl*yhl - (xyhi+xylo);
105 :
106 18165477354 : xymidhi = HIGHWORD(xymid);
107 18165477354 : xymidlo = xymid << BITS_IN_HALFULONG;
108 :
109 18165477354 : xylo += xymidlo;
110 36330954708 : hiremainder = xyhi + xymidhi + (xylo < xymidlo)
111 18165477354 : + ((((xhl + yhl) >> 1) - xymidhi) & HIGHMASK);
112 :
113 18165477354 : return xylo;
114 : }
115 :
116 : INLINE long
117 >22554*10^7 : addmul(ulong x, ulong y)
118 : {
119 >22554*10^7 : const ulong xlo = LOWWORD(x), xhi = HIGHWORD(x);
120 >22554*10^7 : const ulong ylo = LOWWORD(y), yhi = HIGHWORD(y);
121 : ulong xylo,xymid,xyhi,xymidhi,xymidlo;
122 : ulong xhl,yhl;
123 :
124 >22554*10^7 : xylo = xlo*ylo; xyhi = xhi*yhi;
125 >22554*10^7 : xhl = xhi+xlo; yhl = yhi+ylo;
126 >22554*10^7 : xymid = xhl*yhl - (xyhi+xylo);
127 :
128 >22554*10^7 : xylo += hiremainder; xyhi += (xylo < hiremainder);
129 :
130 >22554*10^7 : xymidhi = HIGHWORD(xymid);
131 >22554*10^7 : xymidlo = xymid << BITS_IN_HALFULONG;
132 :
133 >22554*10^7 : xylo += xymidlo;
134 >45109*10^7 : hiremainder = xyhi + xymidhi + (xylo < xymidlo)
135 >22554*10^7 : + ((((xhl + yhl) >> 1) - xymidhi) & HIGHMASK);
136 :
137 >22554*10^7 : return xylo;
138 : }
139 : #endif
140 :
141 : #endif
|