Code coverage tests

This page documents the degree to which the PARI/GP source code is tested by our public test suite, distributed with the source distribution in directory src/test/. This is measured by the gcov utility; we then process gcov output using the lcov frond-end.

We test a few variants depending on Configure flags on the pari.math.u-bordeaux.fr machine (x86_64 architecture), and agregate them in the final report:

The target is 90% coverage for all mathematical modules (given that branches depending on DEBUGLEVEL or DEBUGMEM are not covered). This script is run to produce the results below.

LCOV - code coverage report
Current view: top level - kernel/none - gcdll.c (source / functions) Hit Total Coverage
Test: PARI/GP v2.8.0 lcov report (development 19217-a6dcf64) Lines: 356 360 98.9 %
Date: 2016-07-27 07:10:32 Functions: 14 14 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : #line 2 "../src/kernel/none/gcdll.c"
       2             : /* Copyright (C) 2000  The PARI group.
       3             : 
       4             : This file is part of the PARI/GP package.
       5             : 
       6             : PARI/GP is free software; you can redistribute it and/or modify it under the
       7             : terms of the GNU General Public License as published by the Free Software
       8             : Foundation. It is distributed in the hope that it will be useful, but WITHOUT
       9             : ANY WARRANTY WHATSOEVER.
      10             : 
      11             : Check the License for details. You should have received a copy of it, along
      12             : with the package; see the file 'COPYING'. If not, write to the Free Software
      13             : Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */
      14             : 
      15             : /***********************************************************************/
      16             : /**                                                                   **/
      17             : /**                          GCD                                      **/
      18             : /**                                                                   **/
      19             : /***********************************************************************/
      20             : /* Fast ulong gcd.  Called with y odd; x can be arbitrary (but will most of
      21             :  * the time be smaller than y). */
      22             : 
      23             : /* Gotos are Harmful, and Programming is a Science.  E.W.Dijkstra. */
      24             : ulong
      25   198019118 : gcduodd(ulong x, ulong y)         /* assume y&1==1, y > 1 */
      26             : {
      27   198019118 :   if (!x) return y;
      28             :   /* fix up x */
      29   198019118 :   while (!(x&1)) x>>=1;
      30   198019118 :   if (x==1) return 1;
      31   187840317 :   if (x==y) return y;
      32   186711221 :   else if (x>y) goto xislarger;/* will be rare, given how we'll use this */
      33             :   /* loop invariants: x,y odd and distinct. */
      34             :  yislarger:
      35  2869403758 :   if ((x^y)&2)                 /* ...01, ...11 or vice versa */
      36  1442067183 :     y=(x>>2)+(y>>2)+1;         /* ==(x+y)>>2 except it can't overflow */
      37             :   else                         /* ...01,...01 or ...11,...11 */
      38  1427336575 :     y=(y-x)>>2;                /* now y!=0 in either case */
      39  2869403758 :   while (!(y&1)) y>>=1;        /* kill any windfall-gained powers of 2 */
      40  2869403758 :   if (y==1) return 1;          /* comparand == return value... */
      41  2795013586 :   if (x==y) return y;          /* this and the next is just one comparison */
      42  2769104422 :   else if (x<y) goto yislarger;/* else fall through to xislarger */
      43             : 
      44             :  xislarger:                    /* same as above, seen through a mirror */
      45  2612523786 :   if ((x^y)&2)
      46  1344497912 :     x=(x>>2)+(y>>2)+1;
      47             :   else
      48  1268025874 :     x=(x-y)>>2;                /* x!=0 */
      49  2612523786 :   while (!(x&1)) x>>=1;
      50  2612523786 :   if (x==1) return 1;
      51  2553441713 :   if (x==y) return y;
      52  2526111901 :   else if (x>y) goto xislarger;
      53             : 
      54  1705456280 :   goto yislarger;
      55             : }
      56             : /* Gotos are useful, and Programming is an Art.  D.E.Knuth. */
      57             : /* PS: Of course written with Dijkstra's lessons firmly in mind... --GN */
      58             : 
      59             : /* at least one of a or b is odd, return gcd(a,b) */
      60             : INLINE ulong
      61    69599763 : mygcduodd(ulong a, ulong b)
      62             : {
      63             :   ulong c;
      64    69599763 :   if (b&1)
      65             :   {
      66    44308362 :     if (a==1 || b==1)
      67     6260300 :       c = 1;
      68             :     else
      69    38048062 :      c = gcduodd(a, b);
      70             :   }
      71             :   else
      72             :   {
      73    25291401 :     if (a==1)
      74     9323826 :       c = 1;
      75             :     else
      76    15967575 :       c = gcduodd(b, a);
      77             :   }
      78    69599763 :   return c;
      79             : }
      80             : 
      81             : /* modified right shift binary algorithm with at most one division */
      82             : long
      83     5545277 : cgcd(long a,long b)
      84             : {
      85             :   long v;
      86     5545277 :   a=labs(a); if (!b) return a;
      87     4174434 :   b=labs(b); if (!a) return b;
      88     4136195 :   if (a>b) { a %= b; if (!a) return b; }
      89     1961493 :   else     { b %= a; if (!b) return a; }
      90     1539585 :   v = vals(a|b);
      91     1539585 :   return (long)(mygcduodd((ulong)(a>>v), (ulong)(b>>v)) << v);
      92             : }
      93             : ulong
      94     9135649 : ugcd(ulong a,ulong b)
      95             : {
      96             :   long v;
      97     9135649 :   if (!b) return a;
      98     9133136 :   if (!a) return b;
      99     2807279 :   if (a>b) { a %= b; if (!a) return b; }
     100     1044777 :   else     { b %= a; if (!b) return a; }
     101      563797 :   v = vals(a|b);
     102      563797 :   return mygcduodd(a>>v, b>>v) << v;
     103             : }
     104             : 
     105             : /* For gcdii(): assume a>b>0, return gcd(a,b) as a GEN */
     106             : static GEN
     107   129617756 : igcduu(ulong a, ulong b)
     108             : {
     109             :   long v;
     110   129617756 :   a %= b; if (!a) return utoipos(b);
     111    67496381 :   v = vals(a|b);
     112    67496381 :   return utoipos( mygcduodd(a>>v, b>>v) << v );
     113             : }
     114             : 
     115             : /*Warning: overflows silently if lcm does not fit*/
     116             : long
     117      745333 : clcm(long a,long b)
     118             : {
     119      745333 :   long d = cgcd(a,b);
     120      745333 :   if (!d) return 0;
     121      745333 :   return d == 1? a*b: a*(b/d);
     122             : }
     123             : 
     124             : /********************************************************************/
     125             : /**                                                                **/
     126             : /**               INTEGER EXTENDED GCD  (AND INVMOD)               **/
     127             : /**                                                                **/
     128             : /********************************************************************/
     129             : 
     130             : /* GN 1998Oct25, originally developed in January 1998 under 2.0.4.alpha,
     131             :  * in the context of trying to improve elliptic curve cryptosystem attacking
     132             :  * algorithms.  2001Jan02 -- added bezout() functionality.
     133             :  *
     134             :  * Two basic ideas - (1) avoid many integer divisions, especially when the
     135             :  * quotient is 1 (which happens more than 40% of the time).  (2) Use Lehmer's
     136             :  * trick as modified by Jebelean of extracting a couple of words' worth of
     137             :  * leading bits from both operands, and compute partial quotients from them
     138             :  * as long as we can be sure of their values.  The Jebelean modifications
     139             :  * consist in reliable inequalities from which we can decide fast whether
     140             :  * to carry on or to return to the outer loop, and in re-shifting after the
     141             :  * first word's worth of bits has been used up.  All of this is described
     142             :  * in R. Lercier's these [pp148-153 & 163f.], except his outer loop isn't
     143             :  * quite right  (the catch-up divisions needed when one partial quotient is
     144             :  * larger than a word are missing).
     145             :  *
     146             :  * The API consists of invmod() and bezout() below;  the single-word routines
     147             :  * xgcduu and xxgcduu may be called directly if desired;  lgcdii() probably
     148             :  * doesn't make much sense out of context.
     149             :  *
     150             :  * The whole lot is a factor 6 .. 8 faster on word-sized operands, and asym-
     151             :  * ptotically about a factor 2.5 .. 3, depending on processor architecture,
     152             :  * than the naive continued-division code.  Unfortunately, thanks to the
     153             :  * unrolled loops and all, the code is a bit lengthy.
     154             :  */
     155             : 
     156             : /*==================================
     157             :  * xgcduu(d,d1,f,v,v1,s)
     158             :  * xxgcduu(d,d1,f,u,u1,v,v1,s)
     159             :  * rgcduu(d,d1,vmax,u,u1,v,v1,s)
     160             :  *==================================*/
     161             : /*
     162             :  * Fast `final' extended gcd algorithm, acting on two ulongs.  Ideally this
     163             :  * should be replaced with assembler versions wherever possible.  The present
     164             :  * code essentially does `subtract, compare, and possibly divide' at each step,
     165             :  * which is reasonable when hardware division (a) exists, (b) is a bit slowish
     166             :  * and (c) does not depend a lot on the operand values (as on i486).  When
     167             :  * wordsize division is in fact an assembler routine based on subtraction,
     168             :  * this strategy may not be the most efficient one.
     169             :  *
     170             :  * xxgcduu() should be called with  d > d1 > 0, returns gcd(d,d1), and assigns
     171             :  * the usual signless cont.frac. recurrence matrix to [u, u1; v, v1]  (i.e.,
     172             :  * the product of all the [0, 1; 1 q_j] where the leftmost factor arises from
     173             :  * the quotient of the first division step),  and the information about the
     174             :  * implied signs to s  (-1 when an odd number of divisions has been done,
     175             :  * 1 otherwise).  xgcduu() is exactly the same except that u,u1 are not com-
     176             :  * puted (and not returned, of course).
     177             :  *
     178             :  * The input flag f should be set to 1 if we know in advance that gcd(d,d1)==1
     179             :  * (so we can stop the chain division one step early:  as soon as the remainder
     180             :  * equals 1).  Use this when you intend to use only what would be v, or only
     181             :  * what would be u and v, after that final division step, but not u1 and v1.
     182             :  * With the flag in force and thus without that final step, the interesting
     183             :  * quantity/ies will still sit in [u1 and] v1, of course.
     184             :  *
     185             :  * For computing the inverse of a single-word INTMOD known to exist, pass f=1
     186             :  * to xgcduu(), and obtain the result from s and v1.  (The routine does the
     187             :  * right thing when d1==1 already.)  For finishing a multiword modinv known
     188             :  * to exist, pass f=1 to xxgcduu(), and multiply the returned matrix  (with
     189             :  * properly adjusted signs)  onto the values v' and v1' previously obtained
     190             :  * from the multiword division steps.  Actually, just take the scalar product
     191             :  * of [v',v1'] with [u1,-v1], and change the sign if s==-1.  (If the final
     192             :  * step had been carried out, it would be [-u,v], and s would also change.)
     193             :  * For reducing a rational number to lowest terms, pass f=0 to xgcduu().
     194             :  * Finally, f=0 with xxgcduu() is useful for Bezout computations.
     195             :  * [Harrumph.  In the above prescription, the sign turns out to be precisely
     196             :  * wrong.]
     197             :  * (It is safe for invmod() to call xgcduu() with f=1, because f&1 doesn't
     198             :  * make a difference when gcd(d,d1)>1.  The speedup is negligible.)
     199             :  *
     200             :  * In principle, when gcd(d,d1) is known to be 1, it is straightforward to
     201             :  * recover the final u,u1 given only v,v1 and s.  However, it probably isn't
     202             :  * worthwhile, as it trades a few multiplications for a division.
     203             :  *
     204             :  * Note that these routines do not know and do not need to know about the
     205             :  * PARI stack.
     206             :  *
     207             :  * Added 2001Jan15:
     208             :  * rgcduu() is a variant of xxgcduu() which does not have f  (the effect is
     209             :  * that of f=0),  but instead has a ulong vmax parameter, for use in rational
     210             :  * reconstruction below.  It returns when v1 exceeds vmax;  v will never
     211             :  * exceed vmax.  (vmax=0 is taken as a synonym of ULONG_MAX i.e. unlimited,
     212             :  * in which case rgcduu behaves exactly like xxgcduu with f=0.)  The return
     213             :  * value of rgcduu() is typically meaningless;  the interesting part is the
     214             :  * matrix.
     215             :  */
     216             : 
     217             : ulong
     218   170091370 : xgcduu(ulong d, ulong d1, int f, ulong* v, ulong* v1, long *s)
     219             : {
     220             :   ulong xv,xv1, xs, q,res;
     221             :   LOCAL_HIREMAINDER;
     222             : 
     223             :   /* The above blurb contained a lie.  The main loop always stops when d1
     224             :    * has become equal to 1.  If (d1 == 1 && !(f&1)) after the loop, we do
     225             :    * the final `division' of d by 1 `by hand' as it were.
     226             :    *
     227             :    * The loop has already been unrolled once.  Aggressive optimization could
     228             :    * well lead to a totally unrolled assembler version...
     229             :    *
     230             :    * On modern x86 architectures, this loop is a pig anyway.  The division
     231             :    * instruction always puts its result into the same pair of registers, and
     232             :    * we always want to use one of them straight away, so pipeline performance
     233             :    * will suck big time.  An assembler version should probably do a first loop
     234             :    * computing and storing all the quotients -- their number is bounded in
     235             :    * advance -- and then assembling the matrix in a second pass.  On other
     236             :    * architectures where we can cycle through four or so groups of registers
     237             :    * and exploit a fast ALU result-to-operand feedback path, this is much less
     238             :    * of an issue.  (Intel sucks.  See http://www.x86.org/ ...)
     239             :    */
     240   170091370 :   xs = res = 0;
     241   170091370 :   xv = 0UL; xv1 = 1UL;
     242  1140572488 :   while (d1 > 1UL)
     243             :   {
     244   891046914 :     d -= d1;                        /* no need to use subll */
     245   891046914 :     if (d >= d1)
     246             :     {
     247   485480497 :       hiremainder = 0; q = 1 + divll(d,d1); d = hiremainder;
     248   485480497 :       xv += q * xv1;
     249             :     }
     250             :     else
     251   405566417 :       xv += xv1;
     252             :                                 /* possible loop exit */
     253   891046914 :     if (d <= 1UL) { xs=1; break; }
     254             :                                 /* repeat with inverted roles */
     255   800389748 :     d1 -= d;
     256   800389748 :     if (d1 >= d)
     257             :     {
     258   453537005 :       hiremainder = 0; q = 1 + divll(d1,d); d1 = hiremainder;
     259   453537005 :       xv1 += q * xv;
     260             :     }
     261             :     else
     262   346852743 :       xv1 += xv;
     263             :   } /* while */
     264             : 
     265   170091370 :   if (!(f&1))                        /* division by 1 postprocessing if needed */
     266             :   {
     267      743323 :     if (xs && d==1)
     268      298627 :     { xv1 += d1 * xv; xs = 0; res = 1UL; }
     269      444696 :     else if (!xs && d1==1)
     270      282989 :     { xv += d * xv1; xs = 1; res = 1UL; }
     271             :   }
     272             : 
     273   170091370 :   if (xs)
     274             :   {
     275    91137261 :     *s = -1; *v = xv1; *v1 = xv;
     276    91137261 :     return (res ? res : (d==1 ? 1UL : d1));
     277             :   }
     278             :   else
     279             :   {
     280    78954109 :     *s = 1; *v = xv; *v1 = xv1;
     281    78954109 :     return (res ? res : (d1==1 ? 1UL : d));
     282             :   }
     283             : }
     284             : 
     285             : 
     286             : ulong
     287   139381003 : xxgcduu(ulong d, ulong d1, int f,
     288             :         ulong* u, ulong* u1, ulong* v, ulong* v1, long *s)
     289             : {
     290             :   ulong xu,xu1, xv,xv1, xs, q,res;
     291             :   LOCAL_HIREMAINDER;
     292             : 
     293   139381003 :   xs = res = 0;
     294   139381003 :   xu = xv1 = 1UL;
     295   139381003 :   xu1 = xv = 0UL;
     296   451475527 :   while (d1 > 1UL)
     297             :   {
     298   240679654 :     d -= d1;                        /* no need to use subll */
     299   240679654 :     if (d >= d1)
     300             :     {
     301   139119248 :       hiremainder = 0; q = 1 + divll(d,d1); d = hiremainder;
     302   139119248 :       xv += q * xv1;
     303   139119248 :       xu += q * xu1;
     304             :     }
     305             :     else
     306   101560406 :     { xv += xv1; xu += xu1; }
     307             :                                 /* possible loop exit */
     308   240679654 :     if (d <= 1UL) { xs=1; break; }
     309             :                                 /* repeat with inverted roles */
     310   172713521 :     d1 -= d;
     311   172713521 :     if (d1 >= d)
     312             :     {
     313    95243423 :       hiremainder = 0; q = 1 + divll(d1,d); d1 = hiremainder;
     314    95243423 :       xv1 += q * xv;
     315    95243423 :       xu1 += q * xu;
     316             :     }
     317             :     else
     318    77470098 :     { xv1 += xv; xu1 += xu; }
     319             :   } /* while */
     320             : 
     321   139381003 :   if (!(f&1))                        /* division by 1 postprocessing if needed */
     322             :   {
     323   135176616 :     if (xs && d==1)
     324             :     {
     325    40649618 :       xv1 += d1 * xv;
     326    40649618 :       xu1 += d1 * xu;
     327    40649618 :       xs = 0; res = 1UL;
     328             :     }
     329    94526998 :     else if (!xs && d1==1)
     330             :     {
     331    60300856 :       xv += d * xv1;
     332    60300856 :       xu += d * xu1;
     333    60300856 :       xs = 1; res = 1UL;
     334             :     }
     335             :   }
     336             : 
     337   139381003 :   if (xs)
     338             :   {
     339    87823366 :     *s = -1; *u = xu1; *u1 = xu; *v = xv1; *v1 = xv;
     340    87823366 :     return (res ? res : (d==1 ? 1UL : d1));
     341             :   }
     342             :   else
     343             :   {
     344    51557637 :     *s = 1; *u = xu; *u1 = xu1; *v = xv; *v1 = xv1;
     345    51557637 :     return (res ? res : (d1==1 ? 1UL : d));
     346             :   }
     347             : }
     348             : 
     349             : ulong
     350       70060 : rgcduu(ulong d, ulong d1, ulong vmax,
     351             :        ulong* u, ulong* u1, ulong* v, ulong* v1, long *s)
     352             : {
     353       70060 :   ulong xu,xu1, xv,xv1, xs, q, res=0;
     354       70060 :   int f = 0;
     355             :   LOCAL_HIREMAINDER;
     356             : 
     357       70060 :   if (vmax == 0) vmax = ULONG_MAX;
     358       70060 :   xs = res = 0;
     359       70060 :   xu = xv1 = 1UL;
     360       70060 :   xu1 = xv = 0UL;
     361      267514 :   while (d1 > 1UL)
     362             :   {
     363      192762 :     d -= d1;                        /* no need to use subll */
     364      192762 :     if (d >= d1)
     365             :     {
     366       97219 :       hiremainder = 0; q = 1 + divll(d,d1); d = hiremainder;
     367       97219 :       xv += q * xv1;
     368       97219 :       xu += q * xu1;
     369             :     }
     370             :     else
     371       95543 :     { xv += xv1; xu += xu1; }
     372             :                                 /* possible loop exit */
     373      192762 :     if (xv > vmax) { f=xs=1; break; }
     374      175160 :     if (d <= 1UL) { xs=1; break; }
     375             :                                 /* repeat with inverted roles */
     376      155292 :     d1 -= d;
     377      155292 :     if (d1 >= d)
     378             :     {
     379       96932 :       hiremainder = 0; q = 1 + divll(d1,d); d1 = hiremainder;
     380       96932 :       xv1 += q * xv;
     381       96932 :       xu1 += q * xu;
     382             :     }
     383             :     else
     384       58360 :     { xv1 += xv; xu1 += xu; }
     385             :                                 /* possible loop exit */
     386      155292 :     if (xv1 > vmax) { f=1; break; }
     387             :   } /* while */
     388             : 
     389       70060 :   if (!(f&1))                        /* division by 1 postprocessing if needed */
     390             :   {
     391       24560 :     if (xs && d==1)
     392             :     {
     393       19868 :       xv1 += d1 * xv;
     394       19868 :       xu1 += d1 * xu;
     395       19868 :       xs = 0; res = 1UL;
     396             :     }
     397        4692 :     else if (!xs && d1==1)
     398             :     {
     399        4692 :       xv += d * xv1;
     400        4692 :       xu += d * xu1;
     401        4692 :       xs = 1; res = 1UL;
     402             :     }
     403             :   }
     404             : 
     405       70060 :   if (xs)
     406             :   {
     407       22294 :     *s = -1; *u = xu1; *u1 = xu; *v = xv1; *v1 = xv;
     408       22294 :     return (res ? res : (d==1 ? 1UL : d1));
     409             :   }
     410             :   else
     411             :   {
     412       47766 :     *s = 1; *u = xu; *u1 = xu1; *v = xv; *v1 = xv1;
     413       47766 :     return (res ? res : (d1==1 ? 1UL : d));
     414             :   }
     415             : }
     416             : 
     417             : /*==================================
     418             :  * cbezout(a,b,uu,vv)
     419             :  *==================================
     420             :  * Same as bezout() but for C longs.
     421             :  *    Return g = gcd(a,b) >= 0, and assign longs u,v through pointers uu,vv
     422             :  *    such that g = u*a + v*b.
     423             :  * Special cases:
     424             :  *    a == b == 0 ==> pick u=1, v=0 (and return 1, surprisingly)
     425             :  *    a != 0 == b ==> keep v=0
     426             :  *    a == 0 != b ==> keep u=0
     427             :  *    |a| == |b| != 0 ==> keep u=0, set v=+-1
     428             :  * Assignments through uu,vv happen unconditionally;  non-NULL pointers
     429             :  * _must_ be used.
     430             :  */
     431             : long
     432       18459 : cbezout(long a,long b,long *uu,long *vv)
     433             : {
     434             :   long s,*t;
     435       18459 :   ulong d = labs(a), d1 = labs(b);
     436             :   ulong r,u,u1,v,v1;
     437             : 
     438             : #ifdef DEBUG_CBEZOUT
     439             :   err_printf("> cbezout(%ld,%ld,%p,%p)\n", a, b, (void *)uu, (void *)vv);
     440             : #endif
     441       18459 :   if (!b)
     442             :   {
     443         231 :     *vv=0L;
     444         231 :     if (!a)
     445             :     {
     446           0 :       *uu=1L;
     447             : #ifdef DEBUG_CBEZOUT
     448             :       err_printf("< %ld (%ld, %ld)\n", 1L, *uu, *vv);
     449             : #endif
     450           0 :       return 0L;
     451             :     }
     452         231 :     *uu = a < 0 ? -1L : 1L;
     453             : #ifdef DEBUG_CBEZOUT
     454             :     err_printf("< %ld (%ld, %ld)\n", (long)d, *uu, *vv);
     455             : #endif
     456         231 :     return (long)d;
     457             :   }
     458       18228 :   else if (!a || (d == d1))
     459             :   {
     460         581 :     *uu = 0L; *vv = b < 0 ? -1L : 1L;
     461             : #ifdef DEBUG_CBEZOUT
     462             :     err_printf("< %ld (%ld, %ld)\n", (long)d1, *uu, *vv);
     463             : #endif
     464         581 :     return (long)d1;
     465             :   }
     466       17647 :   else if (d == 1)                /* frequently used by nfinit */
     467             :   {
     468       13020 :     *uu = a; *vv = 0L;
     469             : #ifdef DEBUG_CBEZOUT
     470             :     err_printf("< %ld (%ld, %ld)\n", 1L, *uu, *vv);
     471             : #endif
     472       13020 :     return 1L;
     473             :   }
     474        4627 :   else if (d < d1)
     475             :   {
     476             : /* bug in gcc-2.95.3:
     477             :  * s = a; a = b; b = s; produces wrong result a = b. This is OK:  */
     478        3626 :     { long _x = a; a = b; b = _x; }        /* in order to keep the right signs */
     479        3626 :     r = d; d = d1; d1 = r;
     480        3626 :     t = uu; uu = vv; vv = t;
     481             : #ifdef DEBUG_CBEZOUT
     482             :     err_printf("  swapping\n");
     483             : #endif
     484             :   }
     485             :   /* d > d1 > 0 */
     486        4627 :   r = xxgcduu(d, d1, 0, &u, &u1, &v, &v1, &s);
     487        4627 :   if (s < 0)
     488             :   {
     489         812 :     *uu = a < 0 ? (long)u : -(long)u;
     490         812 :     *vv = b < 0 ? -(long)v : (long)v;
     491             :   }
     492             :   else
     493             :   {
     494        3815 :     *uu = a < 0 ? -(long)u : (long)u;
     495        3815 :     *vv = b < 0 ? (long)v : -(long)v;
     496             :   }
     497             : #ifdef DEBUG_CBEZOUT
     498             :   err_printf("< %ld (%ld, %ld)\n", (long)r, *uu, *vv);
     499             : #endif
     500        4627 :   return (long)r;
     501             : }
     502             : 
     503             : /*==================================
     504             :  * lgcdii(d,d1,u,u1,v,v1,vmax)
     505             :  *==================================*/
     506             : /* Lehmer's partial extended gcd algorithm, acting on two t_INT GENs.
     507             :  *
     508             :  * Tries to determine, using the leading 2*BITS_IN_LONG significant bits of d
     509             :  * and a quantity of bits from d1 obtained by a shift of the same displacement,
     510             :  * as many partial quotients of d/d1 as possible, and assigns to [u,u1;v,v1]
     511             :  * the product of all the [0, 1; 1, q_j] thus obtained, where the leftmost
     512             :  * factor arises from the quotient of the first division step.
     513             :  *
     514             :  * For use in rational reconstruction, input param vmax can be given a
     515             :  * nonzero value.  In this case, we will return early as soon as v1 > vmax
     516             :  * (i.e. it is guaranteed that v <= vmax). --2001Jan15
     517             :  *
     518             :  * MUST be called with  d > d1 > 0, and with  d  occupying more than one
     519             :  * significant word  (if it doesn't, the caller has no business with us;
     520             :  * he/she/it should use xgcduu() instead).  Returns the number of reduction/
     521             :  * swap steps carried out, possibly zero, or under certain conditions minus
     522             :  * that number.  When the return value is nonzero, the caller should use the
     523             :  * returned recurrence matrix to update its own copies of d,d1.  When the
     524             :  * return value is non-positive, and the latest remainder after updating
     525             :  * turns out to be nonzero, the caller should at once attempt a full division,
     526             :  * rather than first trying lgcdii() again -- this typically happens when we
     527             :  * are about to encounter a quotient larger than half a word.  (This is not
     528             :  * detected infallibly -- after a positive return value, it is perfectly
     529             :  * possible that the next stage will end up needing a full division.  After
     530             :  * a negative return value, however, this is certain, and should be acted
     531             :  * upon.)
     532             :  *
     533             :  * (The sign information, for which xgcduu() has its return argument s, is now
     534             :  * implicit in the LSB of our return value, and the caller may take advantage
     535             :  * of the fact that a return value of +-1 implies u==0,u1==v==1  [only v1 pro-
     536             :  * vides interesting information in this case].  One might also use the fact
     537             :  * that if the return value is +-2, then u==1, but this is rather marginal.)
     538             :  *
     539             :  * If it was not possible to determine even the first quotient, either because
     540             :  * we're too close to an integer quotient or because the quotient would be
     541             :  * larger than one word  (if the `leading digit' of d1 after shifting is all
     542             :  * zeros),  we return 0 and do not bother to assign anything to the last four
     543             :  * args.
     544             :  *
     545             :  * The division chain might (sometimes) even run to completion.  It will be
     546             :  * up to the caller to detect this case.
     547             :  *
     548             :  * This routine does _not_ change d or d1;  this will also be up to the caller.
     549             :  *
     550             :  * Note that this routine does not know and does not need to know about the
     551             :  * PARI stack.
     552             :  */
     553             : /*#define DEBUG_LEHMER 1 */
     554             : int
     555     6670174 : lgcdii(ulong* d, ulong* d1,
     556             :        ulong* u, ulong* u1, ulong* v, ulong* v1,
     557             :        ulong vmax)
     558             : {
     559             :   /* Strategy:  (1) Extract/shift most significant bits.  We assume that d
     560             :    * has at least two significant words, but we can cope with a one-word d1.
     561             :    * Let dd,dd1 be the most significant dividend word and matching part of the
     562             :    * divisor.
     563             :    * (2) Check for overflow on the first division.  For our purposes, this
     564             :    * happens when the upper half of dd1 is zero.  (Actually this is detected
     565             :    * during extraction.)
     566             :    * (3) Get a fix on the first quotient.  We compute q = floor(dd/dd1), which
     567             :    * is an upper bound for floor(d/d1), and which gives the true value of the
     568             :    * latter if (and-almost-only-if) the remainder dd' = dd-q*dd1 is >= q.
     569             :    * (If it isn't, we give up.  This is annoying because the subsequent full
     570             :    * division will repeat some work already done, but it happens very infre-
     571             :    * quently.  Doing the extra-bit-fetch in this case would be awkward.)
     572             :    * (4) Finish initializations.
     573             :    *
     574             :    * The remainder of the action is comparatively boring... The main loop has
     575             :    * been unrolled once  (so we don't swap things and we can apply Jebelean's
     576             :    * termination conditions which alternatingly take two different forms during
     577             :    * successive iterations).  When we first run out of sufficient bits to form
     578             :    * a quotient, and have an extra word of each operand, we pull out two whole
     579             :    * word's worth of dividend bits, and divisor bits of matching significance;
     580             :    * to these we apply our partial matrix (disregarding overflow because the
     581             :    * result mod 2^(2*BITS_IN_LONG) will in fact give the correct values), and
     582             :    * re-extract one word's worth of the current dividend and a matching amount
     583             :    * of divisor bits.  The affair will normally terminate with matrix entries
     584             :    * just short of a whole word.  (We terminate the inner loop before these can
     585             :    * possibly overflow.)
     586             :    */
     587             :   ulong dd,dd1,ddlo,dd1lo, sh,shc;        /* `digits', shift count */
     588             :   ulong xu,xu1, xv,xv1, q,res;        /* recurrences, partial quotient, count */
     589             :   ulong tmp0,tmp1,tmp2,tmpd,tmpu,tmpv; /* temps */
     590             :   ulong dm1,dm2,d1m1,d1m2;
     591             :   long ld, ld1, lz;                /* t_INT effective lengths */
     592     6670174 :   int skip = 0;                        /* a boolean flag */
     593             :   LOCAL_OVERFLOW;
     594             :   LOCAL_HIREMAINDER;
     595             : 
     596             : #ifdef DEBUG_LEHMER
     597             :   voir(d, -1); voir(d1, -1);
     598             : #endif
     599             :   /* following is just for convenience: vmax==0 means no bound */
     600     6670174 :   if (vmax == 0) vmax = ULONG_MAX;
     601     6670174 :   ld = lgefint(d); ld1 = lgefint(d1); lz = ld - ld1; /* >= 0 */
     602     6670174 :   if (lz > 1) return 0;                /* rare, quick and desperate exit */
     603             : 
     604     6644893 :   d = int_MSW(d); d1 = int_MSW(d1);                /* point at the leading `digits' */
     605     6644893 :   dm1 = *int_precW(d); d1m1 = *int_precW(d1);
     606     6644893 :   dm2 = *int_precW(int_precW(d)); d1m2 = *int_precW(int_precW(d1));
     607     6644893 :   dd1lo = 0;                        /* unless we find something better */
     608     6644893 :   sh = bfffo(*d);                /* obtain dividend left shift count */
     609             : 
     610     6644893 :   if (sh)
     611             :   {                                /* do the shifting */
     612     6340798 :     shc = BITS_IN_LONG - sh;
     613     6340798 :     if (lz)
     614             :     {                                /* dividend longer than divisor */
     615     1588122 :       dd1 = (*d1 >> shc);
     616     1588122 :       if (!(HIGHMASK & dd1)) return 0;  /* overflow detected */
     617     1184142 :       if (ld1 > 3)
     618       34250 :         dd1lo = (*d1 << sh) + (d1m1 >> shc);
     619             :       else
     620     1149892 :         dd1lo = (*d1 << sh);
     621             :     }
     622             :     else
     623             :     {                                /* dividend and divisor have the same length */
     624     4752676 :       dd1 = (*d1 << sh);
     625     4752676 :       if (!(HIGHMASK & dd1)) return 0;
     626     4749905 :       if (ld1 > 3)
     627             :       {
     628     4760919 :         dd1 += (d1m1 >> shc);
     629     4760919 :         if (ld1 > 4)
     630     1379052 :           dd1lo = (d1m1 << sh) + (d1m2 >> shc);
     631             :         else
     632     3381867 :           dd1lo = (d1m1 << sh);
     633             :       }
     634             :     }
     635             :     /* following lines assume d to have 2 or more significant words */
     636     5934047 :     dd = (*d << sh) + (dm1 >> shc);
     637     5934047 :     if (ld > 4)
     638     1405929 :       ddlo = (dm1 << sh) + (dm2 >> shc);
     639             :     else
     640     4528118 :       ddlo = (dm1 << sh);
     641             :   }
     642             :   else
     643             :   {                                /* no shift needed */
     644      304095 :     if (lz) return 0;                /* div'd longer than div'r: o'flow automatic */
     645      298450 :     dd1 = *d1;
     646      298450 :     if (!(HIGHMASK & dd1)) return 0;
     647      298066 :     if(ld1 > 3) dd1lo = d1m1;
     648             :     /* assume again that d has another significant word */
     649      298066 :     dd = *d; ddlo = dm1;
     650             :   }
     651             : #ifdef DEBUG_LEHMER
     652             :   err_printf("  %lx:%lx, %lx:%lx\n", dd, ddlo, dd1, dd1lo);
     653             : #endif
     654             : 
     655             :   /* First subtraction/division stage.  (If a subtraction initially suffices,
     656             :    * we don't divide at all.)  If a Jebelean condition is violated, and we
     657             :    * can't fix it even by looking at the low-order bits in ddlo,dd1lo, we
     658             :    * give up and ask for a full division.  Otherwise we commit the result,
     659             :    * possibly deciding to re-shift immediately afterwards.
     660             :    */
     661     6232113 :   dd -= dd1;
     662     6232113 :   if (dd < dd1)
     663             :   {                                /* first quotient known to be == 1 */
     664     2447555 :     xv1 = 1UL;
     665     2447555 :     if (!dd)                        /* !(Jebelean condition), extraspecial case */
     666             :     {                                /* note this can actually happen...  Now
     667             :                                      * q==1 is known, but we underflow already.
     668             :                                  * OTOH we've just shortened d by a whole word.
     669             :                                  * Thus we feel pleased with ourselves and
     670             :                                  * return.  (The re-shift code below would
     671             :                                  * do so anyway.) */
     672      105441 :       *u = 0; *v = *u1 = *v1 = 1UL;
     673      105441 :       return -1;                /* Next step will be a full division. */
     674             :     } /* if !(Jebelean) then */
     675             :   }
     676             :   else
     677             :   {                                /* division indicated */
     678     3784558 :     hiremainder = 0;
     679     3784558 :     xv1 = 1 + divll(dd, dd1);        /* xv1: alternative spelling of `q', here ;) */
     680     3784558 :     dd = hiremainder;
     681     3784558 :     if (dd < xv1)                /* !(Jebelean cond'), non-extra special case */
     682             :     {                                /* Attempt to complete the division using the
     683             :                                  * less significant bits, before skipping right
     684             :                                  * past the 1st loop to the reshift stage. */
     685      142427 :       ddlo = subll(ddlo, mulll(xv1, dd1lo));
     686      142427 :       dd = subllx(dd, hiremainder);
     687             : 
     688             :       /* If we now have an overflow, q was _certainly_ too large.  Thanks to
     689             :        * our decision not to get here unless the original dd1 had bits set in
     690             :        * the upper half of the word, however, we now do know that the correct
     691             :        * quotient is in fact q-1.  Adjust our data accordingly. */
     692      142427 :       if (overflow)
     693             :       {
     694       51805 :         xv1--;
     695       51805 :         ddlo = addll(ddlo,dd1lo);
     696       51805 :         dd = addllx(dd,dd1);        /* overflows again which cancels the borrow */
     697             :         /* ...and fall through to skip=1 below */
     698             :       }
     699             :       else
     700             :       /* Test Jebelean condition anew, at this point using _all_ the extracted
     701             :        * bits we have.  This is clutching at straws; we have a more or less
     702             :        * even chance of succeeding this time.  Note that if we fail, we really
     703             :        * do not know whether the correct quotient would have been q or some
     704             :        * smaller value. */
     705       90622 :         if (!dd && ddlo < xv1) return 0;
     706             : 
     707             :       /* Otherwise, we now know that q is correct, but we cannot go into the
     708             :        * 1st loop.  Raise a flag so we'll remember to skip past the loop.
     709             :        * Get here also after the q-1 adjustment case. */
     710      102911 :       skip = 1;
     711             :     } /* if !(Jebelean) then */
     712             :   }
     713     6087156 :   res = 1;
     714             : #ifdef DEBUG_LEHMER
     715             :   err_printf("  q = %ld, %lx, %lx\n", xv1, dd1, dd);
     716             : #endif
     717     6087156 :   if (xv1 > vmax)
     718             :   {                                /* gone past the bound already */
     719        1002 :     *u = 0UL; *u1 = 1UL; *v = 1UL; *v1 = xv1;
     720        1002 :     return res;
     721             :   }
     722     6086154 :   xu = 0UL; xv = xu1 = 1UL;
     723             : 
     724             :   /* Some invariants from here across the first loop:
     725             :    *
     726             :    * At this point, and again after we are finished with the first loop and
     727             :    * subsequent conditional, a division and the attached update of the
     728             :    * recurrence matrix have just been carried out completely.  The matrix
     729             :    * xu,xu1;xv,xv1 has been initialized (or updated, possibly with permuted
     730             :    * columns), and the current remainder == next divisor (dd at the moment)
     731             :    * is nonzero (it might be zero here, but then skip will have been set).
     732             :    *
     733             :    * After the first loop, or when skip is set already, it will also be the
     734             :    * case that there aren't sufficiently many bits to continue without re-
     735             :    * shifting.  If the divisor after reshifting is zero, or indeed if it
     736             :    * doesn't have more than half a word's worth of bits, we will have to
     737             :    * return at that point.  Otherwise, we proceed into the second loop.
     738             :    *
     739             :    * Furthermore, when we reach the re-shift stage, dd:ddlo and dd1:dd1lo will
     740             :    * already reflect the result of applying the current matrix to the old
     741             :    * ddorig:ddlo and dd1orig:dd1lo.  (For the first iteration above, this
     742             :    * was easy to achieve, and we didn't even need to peek into the (now
     743             :    * no longer existent!) saved words.  After the loop, we'll stop for a
     744             :    * moment to merge in the ddlo,dd1lo contributions.)
     745             :    *
     746             :    * Note that after the first division, even an a priori quotient of 1 cannot
     747             :    * be trusted until we've checked Jebelean's condition -- it cannot be too
     748             :    * large, of course, but it might be too small.
     749             :    */
     750             : 
     751     6086154 :   if (!skip)
     752             :   {
     753             :     for(;;)
     754             :     {
     755             :       /* First half of loop divides dd into dd1, and leaves the recurrence
     756             :        * matrix xu,...,xv1 groomed the wrong way round (xu,xv will be the newer
     757             :        * entries) when successful. */
     758    50715690 :       tmpd = dd1 - dd;
     759    50715690 :       if (tmpd < dd)
     760             :       {                                /* quotient suspected to be 1 */
     761             : #ifdef DEBUG_LEHMER
     762             :         q = 1;
     763             : #endif
     764    21156975 :         tmpu = xu + xu1;        /* cannot overflow -- everything bounded by
     765             :                                  * the original dd during first loop */
     766    21156975 :         tmpv = xv + xv1;
     767             :       }
     768             :       else
     769             :       {                                /* division indicated */
     770    29558715 :         hiremainder = 0;
     771    29558715 :         q = 1 + divll(tmpd, dd);
     772    29558715 :         tmpd = hiremainder;
     773    29558715 :         tmpu = xu + q*xu1;        /* can't overflow, but may need to be undone */
     774    29558715 :         tmpv = xv + q*xv1;
     775             :       }
     776             : 
     777    50715690 :       tmp0 = addll(tmpv, xv1);
     778   100044330 :       if ((tmpd < tmpu) || overflow ||
     779    49328640 :           (dd - tmpd < tmp0))        /* !(Jebelean cond.) */
     780             :         break;                        /* skip ahead to reshift stage */
     781             :       else
     782             :       {                                /* commit dd1, xu, xv */
     783    47696310 :         res++;
     784    47696310 :         dd1 = tmpd; xu = tmpu; xv = tmpv;
     785             : #ifdef DEBUG_LEHMER
     786             :         err_printf("  q = %ld, %lx, %lx [%lu,%lu;%lu,%lu]\n",
     787             :                    q, dd, dd1, xu1, xu, xv1, xv);
     788             : #endif
     789    47696310 :         if (xv > vmax)
     790             :         {                        /* time to return */
     791       10919 :           *u = xu1; *u1 = xu; *v = xv1; *v1 = xv;
     792       10919 :           return res;
     793             :         }
     794             :       }
     795             : 
     796             :       /* Second half of loop divides dd1 into dd, and the matrix returns to its
     797             :        * normal arrangement. */
     798    47685391 :       tmpd = dd - dd1;
     799    47685391 :       if (tmpd < dd1)
     800             :       {                                /* quotient suspected to be 1 */
     801             : #ifdef DEBUG_LEHMER
     802             :         q = 1;
     803             : #endif
     804    19394025 :         tmpu = xu1 + xu;        /* cannot overflow */
     805    19394025 :         tmpv = xv1 + xv;
     806             :       }
     807             :       else
     808             :       {                                /* division indicated */
     809    28291366 :         hiremainder = 0;
     810    28291366 :         q = 1 + divll(tmpd, dd1);
     811    28291366 :         tmpd = hiremainder;
     812    28291366 :         tmpu = xu1 + q*xu;
     813    28291366 :         tmpv = xv1 + q*xv;
     814             :       }
     815             : 
     816    47685391 :       tmp0 = addll(tmpu, xu);
     817    92810609 :       if ((tmpd < tmpv) || overflow ||
     818    45125218 :           (dd1 - tmpd < tmp0))        /* !(Jebelean cond.) */
     819             :         break;                        /* skip ahead to reshift stage */
     820             :       else
     821             :       {                                /* commit dd, xu1, xv1 */
     822    44738806 :         res++;
     823    44738806 :         dd = tmpd; xu1 = tmpu; xv1 = tmpv;
     824             : #ifdef DEBUG_LEHMER
     825             :         err_printf("  q = %ld, %lx, %lx [%lu,%lu;%lu,%lu]\n",
     826             :                 q, dd1, dd, xu, xu1, xv, xv1);
     827             : #endif
     828    44738806 :         if (xv1 > vmax)
     829             :         {                        /* time to return */
     830        9921 :           *u = xu; *u1 = xu1; *v = xv; *v1 = xv1;
     831        9921 :           return res;
     832             :         }
     833             :       }
     834             : 
     835    44728885 :     } /* end of first loop */
     836             : 
     837             :     /* Intermezzo: update dd:ddlo, dd1:dd1lo.  (But not if skip is set.) */
     838             : 
     839     5965965 :     if (res&1)
     840             :     {
     841             :       /* after failed division in 1st half of loop:
     842             :        * [dd1:dd1lo,dd:ddlo] = [ddorig:ddlo,dd1orig:dd1lo]
     843             :        *                       * [ -xu, xu1 ; xv, -xv1 ]
     844             :        * (Actually, we only multiply [ddlo,dd1lo] onto the matrix and
     845             :        * add the high-order remainders + overflows onto [dd1,dd].)
     846             :        */
     847     3017192 :       tmp1 = mulll(ddlo, xu); tmp0 = hiremainder;
     848     3017192 :       tmp1 = subll(mulll(dd1lo,xv), tmp1);
     849     3017192 :       dd1 += subllx(hiremainder, tmp0);
     850     3017192 :       tmp2 = mulll(ddlo, xu1); tmp0 = hiremainder;
     851     3017192 :       ddlo = subll(tmp2, mulll(dd1lo,xv1));
     852     3017192 :       dd += subllx(tmp0, hiremainder);
     853     3017192 :       dd1lo = tmp1;
     854             :     }
     855             :     else
     856             :     {
     857             :       /* after failed division in 2nd half of loop:
     858             :        * [dd:ddlo,dd1:dd1lo] = [ddorig:ddlo,dd1orig:dd1lo]
     859             :        *                       * [ xu1, -xu ; -xv1, xv ]
     860             :        * (Actually, we only multiply [ddlo,dd1lo] onto the matrix and
     861             :        * add the high-order remainders + overflows onto [dd,dd1].)
     862             :        */
     863     2948773 :       tmp1 = mulll(ddlo, xu1); tmp0 = hiremainder;
     864     2948773 :       tmp1 = subll(tmp1, mulll(dd1lo,xv1));
     865     2948773 :       dd += subllx(tmp0, hiremainder);
     866     2948773 :       tmp2 = mulll(ddlo, xu); tmp0 = hiremainder;
     867     2948773 :       dd1lo = subll(mulll(dd1lo,xv), tmp2);
     868     2948773 :       dd1 += subllx(hiremainder, tmp0);
     869     2948773 :       ddlo = tmp1;
     870             :     }
     871             : #ifdef DEBUG_LEHMER
     872             :     err_printf("  %lx:%lx, %lx:%lx\n", dd, ddlo, dd1, dd1lo);
     873             : #endif
     874             :   } /* end of skip-pable section:  get here also, with res==1, when there
     875             :      * was a problem immediately after the very first division. */
     876             : 
     877             :   /* Re-shift.  Note:  the shift count _can_ be zero, viz. under the following
     878             :    * precise conditions:  The original dd1 had its topmost bit set, so the 1st
     879             :    * q was 1, and after subtraction, dd had its topmost bit unset.  If now
     880             :    * dd==0, we'd have taken the return exit already, so we couldn't have got
     881             :    * here.  If not, then it must have been the second division which has gone
     882             :    * amiss  (because dd1 was very close to an exact multiple of the remainder
     883             :    * dd value, so this will be very rare).  At this point, we'd have a fairly
     884             :    * slim chance of fixing things by re-examining dd1:dd1lo vs. dd:ddlo, but
     885             :    * this is not guaranteed to work.  Instead of trying, we return at once.
     886             :    * The caller will see to it that the initial subtraction is re-done using
     887             :    * _all_ the bits of both operands, which already helps, and the next round
     888             :    * will either be a full division  (if dd occupied a halfword or less),  or
     889             :    * another llgcdii() first step.  In the latter case, since we try a little
     890             :    * harder during our first step, we may actually be able to fix the problem,
     891             :    * and get here again with improved low-order bits and with another step
     892             :    * under our belt.  Otherwise we'll have given up above and forced a full-
     893             :    * blown division.
     894             :    *
     895             :    * If res is even, the shift count _cannot_ be zero.  (The first step forces
     896             :    * a zero into the remainder's MSB, and all subsequent remainders will have
     897             :    * inherited it.)
     898             :    *
     899             :    * The re-shift stage exits if the next divisor has at most half a word's
     900             :    * worth of bits.
     901             :    *
     902             :    * For didactic reasons, the second loop will be arranged in the same way
     903             :    * as the first -- beginning with the division of dd into dd1, as if res
     904             :    * was odd.  To cater for this, if res is actually even, we swap things
     905             :    * around during reshifting.  (During the second loop, the parity of res
     906             :    * does not matter;  we know in which half of the loop we are when we decide
     907             :    * to return.)
     908             :    */
     909             : #ifdef DEBUG_LEHMER
     910             :   err_printf("(sh)");
     911             : #endif
     912             : 
     913     6065314 :   if (res&1)
     914             :   {                                /* after odd number of division(s) */
     915     3115983 :     if (dd1 && (sh = bfffo(dd1)))
     916             :     {
     917     3061052 :       shc = BITS_IN_LONG - sh;
     918     3061052 :       dd = (ddlo >> shc) + (dd << sh);
     919     3061052 :       if (!(HIGHMASK & dd))
     920             :       {
     921       86547 :         *u = xu; *u1 = xu1; *v = xv; *v1 = xv1;
     922       86547 :         return -res;                /* full division asked for */
     923             :       }
     924     2974505 :       dd1 = (dd1lo >> shc) + (dd1 << sh);
     925             :     }
     926             :     else
     927             :     {                                /* time to return: <= 1 word left, or sh==0 */
     928       54931 :       *u = xu; *u1 = xu1; *v = xv; *v1 = xv1;
     929       54931 :       return res;
     930             :     }
     931             :   }
     932             :   else
     933             :   {                                /* after even number of divisions */
     934     2949331 :     if (dd)
     935             :     {
     936     2949331 :       sh = bfffo(dd);                /* Known to be positive. */
     937     2949331 :       shc = BITS_IN_LONG - sh;
     938             :                                 /* dd:ddlo will become the new dd1, and v.v. */
     939     2949331 :       tmpd = (ddlo >> shc) + (dd << sh);
     940     2949331 :       dd = (dd1lo >> shc) + (dd1 << sh);
     941     2949331 :       dd1 = tmpd;
     942             :       /* This has completed the swap;  now dd is again the current divisor.
     943             :        * The following test originally inspected dd1 -- a most subtle and
     944             :        * most annoying bug. The Management. */
     945     2949331 :       if (HIGHMASK & dd)
     946             :       {
     947             :         /* recurrence matrix is the wrong way round;  swap it. */
     948     2915639 :         tmp0 = xu; xu = xu1; xu1 = tmp0;
     949     2915639 :         tmp0 = xv; xv = xv1; xv1 = tmp0;
     950             :       }
     951             :       else
     952             :       {
     953             :         /* recurrence matrix is the wrong way round;  fix this. */
     954       33692 :         *u = xu1; *u1 = xu; *v = xv1; *v1 = xv;
     955       33692 :         return -res;                /* full division asked for */
     956             :       }
     957             :     }
     958             :     else
     959             :     {                                /* time to return: <= 1 word left */
     960           0 :       *u = xu1; *u1 = xu; *v = xv1; *v1 = xv;
     961           0 :       return res;
     962             :     }
     963             :   } /* end reshift */
     964             : 
     965             : #ifdef DEBUG_LEHMER
     966             :   err_printf("  %lx:%lx, %lx:%lx\n", dd, ddlo, dd1, dd1lo);
     967             : #endif
     968             : 
     969             :   /* The Second Loop.  Rip-off of the first, but we now check for overflow
     970             :    * in the recurrences.  Returns instead of breaking when we cannot fix the
     971             :    * quotient any longer.
     972             :    */
     973             : 
     974             :   for(;;)
     975             :   {
     976             :     /* First half of loop divides dd into dd1, and leaves the recurrence
     977             :      * matrix xu,...,xv1 groomed the wrong way round (xu,xv will be the newer
     978             :      * entries) when successful. */
     979    27698673 :     tmpd = dd1 - dd;
     980    27698673 :     if (tmpd < dd)
     981             :     {                                /* quotient suspected to be 1 */
     982             : #ifdef DEBUG_LEHMER
     983             :       q = 1;
     984             : #endif
     985     9568801 :       tmpu = xu + xu1;
     986     9568801 :       tmpv = addll(xv, xv1);        /* xv,xv1 will overflow first */
     987     9568801 :       tmp1 = overflow;
     988             :     }
     989             :     else
     990             :     {                                /* division indicated */
     991    18129872 :       hiremainder = 0;
     992    18129872 :       q = 1 + divll(tmpd, dd);
     993    18129872 :       tmpd = hiremainder;
     994    18129872 :       tmpu = xu + q*xu1;
     995    18129872 :       tmpv = addll(xv, mulll(q,xv1));
     996    18129872 :       tmp1 = overflow | hiremainder;
     997             :     }
     998             : 
     999    27698673 :     tmp0 = addll(tmpv, xv1);
    1000    53934279 :     if ((tmpd < tmpu) || overflow || tmp1 ||
    1001    26235606 :         (dd - tmpd < tmp0))        /* !(Jebelean cond.) */
    1002             :     {
    1003             :       /* The recurrence matrix has not yet been warped... */
    1004     3002101 :       *u = xu; *u1 = xu1; *v = xv; *v1 = xv1;
    1005     3002101 :       break;
    1006             :     }
    1007             :     /* commit dd1, xu, xv */
    1008    24696572 :     res++;
    1009    24696572 :     dd1 = tmpd; xu = tmpu; xv = tmpv;
    1010             : #ifdef DEBUG_LEHMER
    1011             :     err_printf("  q = %ld, %lx, %lx\n", q, dd, dd1);
    1012             : #endif
    1013    24696572 :     if (xv > vmax)
    1014             :     {                                /* time to return */
    1015       52367 :       *u = xu1; *u1 = xu; *v = xv1; *v1 = xv;
    1016       52367 :       return res;
    1017             :     }
    1018             : 
    1019             :     /* Second half of loop divides dd1 into dd, and the matrix returns to its
    1020             :      * normal arrangement. */
    1021    24644205 :     tmpd = dd - dd1;
    1022    24644205 :     if (tmpd < dd1)
    1023             :     {                                /* quotient suspected to be 1 */
    1024             : #ifdef DEBUG_LEHMER
    1025             :       q = 1;
    1026             : #endif
    1027    10449040 :       tmpu = xu1 + xu;
    1028    10449040 :       tmpv = addll(xv1, xv);
    1029    10449040 :       tmp1 = overflow;
    1030             :     }
    1031             :     else
    1032             :     {                                /* division indicated */
    1033    14195165 :       hiremainder = 0;
    1034    14195165 :       q = 1 + divll(tmpd, dd1);
    1035    14195165 :       tmpd = hiremainder;
    1036    14195165 :       tmpu = xu1 + q*xu;
    1037    14195165 :       tmpv = addll(xv1, mulll(q, xv));
    1038    14195165 :       tmp1 = overflow | hiremainder;
    1039             :     }
    1040             : 
    1041    24644205 :     tmp0 = addll(tmpu, xu);
    1042    46918418 :     if ((tmpd < tmpv) || overflow || tmp1 ||
    1043    22274213 :         (dd1 - tmpd < tmp0))        /* !(Jebelean cond.) */
    1044             :     {
    1045             :       /* The recurrence matrix has not yet been unwarped, so it is
    1046             :        * the wrong way round;  fix this. */
    1047     2810282 :       *u = xu1; *u1 = xu; *v = xv1; *v1 = xv;
    1048     2810282 :       break;
    1049             :     }
    1050             : 
    1051    21833923 :     res++; /* commit dd, xu1, xv1 */
    1052    21833923 :     dd = tmpd; xu1 = tmpu; xv1 = tmpv;
    1053             : #ifdef DEBUG_LEHMER
    1054             :     err_printf("  q = %ld, %lx, %lx\n", q, dd1, dd);
    1055             : #endif
    1056    21833923 :     if (xv1 > vmax)
    1057             :     {                                /* time to return */
    1058       25394 :       *u = xu; *u1 = xu1; *v = xv; *v1 = xv1;
    1059       25394 :       return res;
    1060             :     }
    1061    21808529 :   } /* end of second loop */
    1062             : 
    1063     5812383 :   return res;
    1064             : }
    1065             : 
    1066             : /* 1 / Mod(x,p). Assume x < p */
    1067             : ulong
    1068   166511453 : Fl_invsafe(ulong x, ulong p)
    1069             : {
    1070             :   long s;
    1071   166511453 :   ulong xv, xv1, g = xgcduu(p, x, 1, &xv, &xv1, &s);
    1072   166616599 :   if (g != 1UL) return 0UL;
    1073   165986870 :   xv = xv1 % p; if (s < 0) xv = p - xv;
    1074   165986870 :   return xv;
    1075             : }
    1076             : 
    1077             : /* assume 0 < x < p; return u such that u x = gcd(x,p) (mod p);
    1078             :  * set *pg = gcd(x,p) */
    1079             : ulong
    1080      743323 : Fl_invgen(ulong x, ulong p, ulong *pg)
    1081             : {
    1082             :   ulong v, v1;
    1083             :   long s;
    1084      743323 :   *pg = xgcduu(p, x, 0, &v, &v1, &s);
    1085      743323 :   if (s > 0) v = p - v;
    1086      743323 :   return v;
    1087             : }
    1088             : 
    1089             : /* 1 / Mod(x,p). Assume x < p */
    1090             : ulong
    1091   147911754 : Fl_inv(ulong x, ulong p)
    1092             : {
    1093   147911754 :   ulong xv  = Fl_invsafe(x, p);
    1094   148017718 :   if (!xv && p!=1UL) pari_err_INV("Fl_inv", mkintmod(utoi(x), utoi(p)));
    1095   148018255 :   return xv;
    1096             : }

Generated by: LCOV version 1.11