Code coverage tests

This page documents the degree to which the PARI/GP source code is tested by our public test suite, distributed with the source distribution in directory src/test/. This is measured by the gcov utility; we then process gcov output using the lcov frond-end.

We test a few variants depending on Configure flags on the pari.math.u-bordeaux.fr machine (x86_64 architecture), and agregate them in the final report:

The target is 90% coverage for all mathematical modules (given that branches depending on DEBUGLEVEL or DEBUGMEM are not covered). This script is run to produce the results below.

LCOV - code coverage report
Current view: top level - kernel/none - gcdll.c (source / functions) Hit Total Coverage
Test: PARI/GP v2.10.0 lcov report (development 19825-b77c7f8) Lines: 356 358 99.4 %
Date: 2016-12-05 05:49:04 Functions: 14 14 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : #line 2 "../src/kernel/none/gcdll.c"
       2             : /* Copyright (C) 2000  The PARI group.
       3             : 
       4             : This file is part of the PARI/GP package.
       5             : 
       6             : PARI/GP is free software; you can redistribute it and/or modify it under the
       7             : terms of the GNU General Public License as published by the Free Software
       8             : Foundation. It is distributed in the hope that it will be useful, but WITHOUT
       9             : ANY WARRANTY WHATSOEVER.
      10             : 
      11             : Check the License for details. You should have received a copy of it, along
      12             : with the package; see the file 'COPYING'. If not, write to the Free Software
      13             : Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */
      14             : 
      15             : /***********************************************************************/
      16             : /**                                                                   **/
      17             : /**                          GCD                                      **/
      18             : /**                                                                   **/
      19             : /***********************************************************************/
      20             : /* Fast ulong gcd.  Called with y odd; x can be arbitrary (but will most of
      21             :  * the time be smaller than y). */
      22             : 
      23             : /* Gotos are Harmful, and Programming is a Science.  E.W.Dijkstra. */
      24             : ulong
      25    77670600 : gcduodd(ulong x, ulong y)         /* assume y&1==1, y > 1 */
      26             : {
      27    77670600 :   if (!x) return y;
      28             :   /* fix up x */
      29    77670600 :   while (!(x&1)) x>>=1;
      30    77670600 :   if (x==1) return 1;
      31    67319835 :   if (x==y) return y;
      32    66285341 :   else if (x>y) goto xislarger;/* will be rare, given how we'll use this */
      33             :   /* loop invariants: x,y odd and distinct. */
      34             :  yislarger:
      35   554815922 :   if ((x^y)&2)                 /* ...01, ...11 or vice versa */
      36   281697126 :     y=(x>>2)+(y>>2)+1;         /* ==(x+y)>>2 except it can't overflow */
      37             :   else                         /* ...01,...01 or ...11,...11 */
      38   273118796 :     y=(y-x)>>2;                /* now y!=0 in either case */
      39   554815922 :   while (!(y&1)) y>>=1;        /* kill any windfall-gained powers of 2 */
      40   554815922 :   if (y==1) return 1;          /* comparand == return value... */
      41   526448240 :   if (x==y) return y;          /* this and the next is just one comparison */
      42   518058011 :   else if (x<y) goto yislarger;/* else fall through to xislarger */
      43             : 
      44             :  xislarger:                    /* same as above, seen through a mirror */
      45   352771574 :   if ((x^y)&2)
      46   180568337 :     x=(x>>2)+(y>>2)+1;
      47             :   else
      48   172203237 :     x=(x-y)>>2;                /* x!=0 */
      49   352771574 :   while (!(x&1)) x>>=1;
      50   352771574 :   if (x==1) return 1;
      51   329818186 :   if (x==y) return y;
      52   323244144 :   else if (x>y) goto xislarger;
      53             : 
      54   217148997 :   goto yislarger;
      55             : }
      56             : /* Gotos are useful, and Programming is an Art.  D.E.Knuth. */
      57             : /* PS: Of course written with Dijkstra's lessons firmly in mind... --GN */
      58             : 
      59             : /* at least one of a or b is odd, return gcd(a,b) */
      60             : INLINE ulong
      61    70514525 : mygcduodd(ulong a, ulong b)
      62             : {
      63             :   ulong c;
      64    70514525 :   if (b&1)
      65             :   {
      66    45488397 :     if (a==1 || b==1)
      67     6376863 :       c = 1;
      68             :     else
      69    39111534 :      c = gcduodd(a, b);
      70             :   }
      71             :   else
      72             :   {
      73    25026128 :     if (a==1)
      74     9219285 :       c = 1;
      75             :     else
      76    15806843 :       c = gcduodd(b, a);
      77             :   }
      78    70514525 :   return c;
      79             : }
      80             : 
      81             : /* modified right shift binary algorithm with at most one division */
      82             : long
      83     5744742 : cgcd(long a,long b)
      84             : {
      85             :   long v;
      86     5744742 :   a=labs(a); if (!b) return a;
      87     4066813 :   b=labs(b); if (!a) return b;
      88     4028228 :   if (a>b) { a %= b; if (!a) return b; }
      89     1969966 :   else     { b %= a; if (!b) return a; }
      90     1538437 :   v = vals(a|b);
      91     1538437 :   return (long)(mygcduodd((ulong)(a>>v), (ulong)(b>>v)) << v);
      92             : }
      93             : ulong
      94     9075477 : ugcd(ulong a,ulong b)
      95             : {
      96             :   long v;
      97     9075477 :   if (!b) return a;
      98     9072880 :   if (!a) return b;
      99     2744787 :   if (a>b) { a %= b; if (!a) return b; }
     100     1022777 :   else     { b %= a; if (!b) return a; }
     101      540000 :   v = vals(a|b);
     102      540000 :   return mygcduodd(a>>v, b>>v) << v;
     103             : }
     104             : 
     105             : /* For gcdii(): assume a>b>0, return gcd(a,b) as a GEN */
     106             : static GEN
     107   129488775 : igcduu(ulong a, ulong b)
     108             : {
     109             :   long v;
     110   129488775 :   a %= b; if (!a) return utoipos(b);
     111    68436088 :   v = vals(a|b);
     112    68436088 :   return utoipos( mygcduodd(a>>v, b>>v) << v );
     113             : }
     114             : 
     115             : /*Warning: overflows silently if lcm does not fit*/
     116             : long
     117      760019 : clcm(long a,long b)
     118             : {
     119      760019 :   long d = cgcd(a,b);
     120      760019 :   if (!d) return 0;
     121      760019 :   return d == 1? a*b: a*(b/d);
     122             : }
     123             : 
     124             : /********************************************************************/
     125             : /**                                                                **/
     126             : /**               INTEGER EXTENDED GCD  (AND INVMOD)               **/
     127             : /**                                                                **/
     128             : /********************************************************************/
     129             : 
     130             : /* GN 1998Oct25, originally developed in January 1998 under 2.0.4.alpha,
     131             :  * in the context of trying to improve elliptic curve cryptosystem attacking
     132             :  * algorithms.  2001Jan02 -- added bezout() functionality.
     133             :  *
     134             :  * Two basic ideas - (1) avoid many integer divisions, especially when the
     135             :  * quotient is 1 (which happens more than 40% of the time).  (2) Use Lehmer's
     136             :  * trick as modified by Jebelean of extracting a couple of words' worth of
     137             :  * leading bits from both operands, and compute partial quotients from them
     138             :  * as long as we can be sure of their values.  The Jebelean modifications
     139             :  * consist in reliable inequalities from which we can decide fast whether
     140             :  * to carry on or to return to the outer loop, and in re-shifting after the
     141             :  * first word's worth of bits has been used up.  All of this is described
     142             :  * in R. Lercier's these [pp148-153 & 163f.], except his outer loop isn't
     143             :  * quite right  (the catch-up divisions needed when one partial quotient is
     144             :  * larger than a word are missing).
     145             :  *
     146             :  * The API consists of invmod() and bezout() below;  the single-word routines
     147             :  * xgcduu and xxgcduu may be called directly if desired;  lgcdii() probably
     148             :  * doesn't make much sense out of context.
     149             :  *
     150             :  * The whole lot is a factor 6 .. 8 faster on word-sized operands, and asym-
     151             :  * ptotically about a factor 2.5 .. 3, depending on processor architecture,
     152             :  * than the naive continued-division code.  Unfortunately, thanks to the
     153             :  * unrolled loops and all, the code is a bit lengthy.
     154             :  */
     155             : 
     156             : /*==================================
     157             :  * xgcduu(d,d1,f,v,v1,s)
     158             :  * xxgcduu(d,d1,f,u,u1,v,v1,s)
     159             :  * rgcduu(d,d1,vmax,u,u1,v,v1,s)
     160             :  *==================================*/
     161             : /*
     162             :  * Fast `final' extended gcd algorithm, acting on two ulongs.  Ideally this
     163             :  * should be replaced with assembler versions wherever possible.  The present
     164             :  * code essentially does `subtract, compare, and possibly divide' at each step,
     165             :  * which is reasonable when hardware division (a) exists, (b) is a bit slowish
     166             :  * and (c) does not depend a lot on the operand values (as on i486).  When
     167             :  * wordsize division is in fact an assembler routine based on subtraction,
     168             :  * this strategy may not be the most efficient one.
     169             :  *
     170             :  * xxgcduu() should be called with  d > d1 > 0, returns gcd(d,d1), and assigns
     171             :  * the usual signless cont.frac. recurrence matrix to [u, u1; v, v1]  (i.e.,
     172             :  * the product of all the [0, 1; 1 q_j] where the leftmost factor arises from
     173             :  * the quotient of the first division step),  and the information about the
     174             :  * implied signs to s  (-1 when an odd number of divisions has been done,
     175             :  * 1 otherwise).  xgcduu() is exactly the same except that u,u1 are not com-
     176             :  * puted (and not returned, of course).
     177             :  *
     178             :  * The input flag f should be set to 1 if we know in advance that gcd(d,d1)==1
     179             :  * (so we can stop the chain division one step early:  as soon as the remainder
     180             :  * equals 1).  Use this when you intend to use only what would be v, or only
     181             :  * what would be u and v, after that final division step, but not u1 and v1.
     182             :  * With the flag in force and thus without that final step, the interesting
     183             :  * quantity/ies will still sit in [u1 and] v1, of course.
     184             :  *
     185             :  * For computing the inverse of a single-word INTMOD known to exist, pass f=1
     186             :  * to xgcduu(), and obtain the result from s and v1.  (The routine does the
     187             :  * right thing when d1==1 already.)  For finishing a multiword modinv known
     188             :  * to exist, pass f=1 to xxgcduu(), and multiply the returned matrix  (with
     189             :  * properly adjusted signs)  onto the values v' and v1' previously obtained
     190             :  * from the multiword division steps.  Actually, just take the scalar product
     191             :  * of [v',v1'] with [u1,-v1], and change the sign if s==-1.  (If the final
     192             :  * step had been carried out, it would be [-u,v], and s would also change.)
     193             :  * For reducing a rational number to lowest terms, pass f=0 to xgcduu().
     194             :  * Finally, f=0 with xxgcduu() is useful for Bezout computations.
     195             :  * [Harrumph.  In the above prescription, the sign turns out to be precisely
     196             :  * wrong.]
     197             :  * (It is safe for invmod() to call xgcduu() with f=1, because f&1 doesn't
     198             :  * make a difference when gcd(d,d1)>1.  The speedup is negligible.)
     199             :  *
     200             :  * In principle, when gcd(d,d1) is known to be 1, it is straightforward to
     201             :  * recover the final u,u1 given only v,v1 and s.  However, it probably isn't
     202             :  * worthwhile, as it trades a few multiplications for a division.
     203             :  *
     204             :  * Note that these routines do not know and do not need to know about the
     205             :  * PARI stack.
     206             :  *
     207             :  * Added 2001Jan15:
     208             :  * rgcduu() is a variant of xxgcduu() which does not have f  (the effect is
     209             :  * that of f=0),  but instead has a ulong vmax parameter, for use in rational
     210             :  * reconstruction below.  It returns when v1 exceeds vmax;  v will never
     211             :  * exceed vmax.  (vmax=0 is taken as a synonym of ULONG_MAX i.e. unlimited,
     212             :  * in which case rgcduu behaves exactly like xxgcduu with f=0.)  The return
     213             :  * value of rgcduu() is typically meaningless;  the interesting part is the
     214             :  * matrix.
     215             :  */
     216             : 
     217             : ulong
     218   148471289 : xgcduu(ulong d, ulong d1, int f, ulong* v, ulong* v1, long *s)
     219             : {
     220             :   ulong xv,xv1, xs, q,res;
     221             :   LOCAL_HIREMAINDER;
     222             : 
     223             :   /* The above blurb contained a lie.  The main loop always stops when d1
     224             :    * has become equal to 1.  If (d1 == 1 && !(f&1)) after the loop, we do
     225             :    * the final `division' of d by 1 `by hand' as it were.
     226             :    *
     227             :    * The loop has already been unrolled once.  Aggressive optimization could
     228             :    * well lead to a totally unrolled assembler version...
     229             :    *
     230             :    * On modern x86 architectures, this loop is a pig anyway.  The division
     231             :    * instruction always puts its result into the same pair of registers, and
     232             :    * we always want to use one of them straight away, so pipeline performance
     233             :    * will suck big time.  An assembler version should probably do a first loop
     234             :    * computing and storing all the quotients -- their number is bounded in
     235             :    * advance -- and then assembling the matrix in a second pass.  On other
     236             :    * architectures where we can cycle through four or so groups of registers
     237             :    * and exploit a fast ALU result-to-operand feedback path, this is much less
     238             :    * of an issue.  (Intel sucks.  See http://www.x86.org/ ...)
     239             :    */
     240   148471289 :   xs = res = 0;
     241   148471289 :   xv = 0UL; xv1 = 1UL;
     242   964489580 :   while (d1 > 1UL)
     243             :   {
     244   744771865 :     d -= d1;                        /* no need to use subll */
     245   744771865 :     if (d >= d1)
     246             :     {
     247   405410560 :       hiremainder = 0; q = 1 + divll(d,d1); d = hiremainder;
     248   405410560 :       xv += q * xv1;
     249             :     }
     250             :     else
     251   339361305 :       xv += xv1;
     252             :                                 /* possible loop exit */
     253   744771865 :     if (d <= 1UL) { xs=1; break; }
     254             :                                 /* repeat with inverted roles */
     255   667547002 :     d1 -= d;
     256   667547002 :     if (d1 >= d)
     257             :     {
     258   377601754 :       hiremainder = 0; q = 1 + divll(d1,d); d1 = hiremainder;
     259   377601754 :       xv1 += q * xv;
     260             :     }
     261             :     else
     262   289945248 :       xv1 += xv;
     263             :   } /* while */
     264             : 
     265   148471289 :   if (!(f&1))                        /* division by 1 postprocessing if needed */
     266             :   {
     267      322427 :     if (xs && d==1)
     268       96873 :     { xv1 += d1 * xv; xs = 0; res = 1UL; }
     269      225554 :     else if (!xs && d1==1)
     270       95641 :     { xv += d * xv1; xs = 1; res = 1UL; }
     271             :   }
     272             : 
     273   148471289 :   if (xs)
     274             :   {
     275    78406839 :     *s = -1; *v = xv1; *v1 = xv;
     276    78406839 :     return (res ? res : (d==1 ? 1UL : d1));
     277             :   }
     278             :   else
     279             :   {
     280    70064450 :     *s = 1; *v = xv; *v1 = xv1;
     281    70064450 :     return (res ? res : (d1==1 ? 1UL : d));
     282             :   }
     283             : }
     284             : 
     285             : 
     286             : ulong
     287   140596003 : xxgcduu(ulong d, ulong d1, int f,
     288             :         ulong* u, ulong* u1, ulong* v, ulong* v1, long *s)
     289             : {
     290             :   ulong xu,xu1, xv,xv1, xs, q,res;
     291             :   LOCAL_HIREMAINDER;
     292             : 
     293   140596003 :   xs = res = 0;
     294   140596003 :   xu = xv1 = 1UL;
     295   140596003 :   xu1 = xv = 0UL;
     296   456866086 :   while (d1 > 1UL)
     297             :   {
     298   244064227 :     d -= d1;                        /* no need to use subll */
     299   244064227 :     if (d >= d1)
     300             :     {
     301   141475284 :       hiremainder = 0; q = 1 + divll(d,d1); d = hiremainder;
     302   141475284 :       xv += q * xv1;
     303   141475284 :       xu += q * xu1;
     304             :     }
     305             :     else
     306   102588943 :     { xv += xv1; xu += xu1; }
     307             :                                 /* possible loop exit */
     308   244064227 :     if (d <= 1UL) { xs=1; break; }
     309             :                                 /* repeat with inverted roles */
     310   175674080 :     d1 -= d;
     311   175674080 :     if (d1 >= d)
     312             :     {
     313    96988495 :       hiremainder = 0; q = 1 + divll(d1,d); d1 = hiremainder;
     314    96988495 :       xv1 += q * xv;
     315    96988495 :       xu1 += q * xu;
     316             :     }
     317             :     else
     318    78685585 :     { xv1 += xv; xu1 += xu; }
     319             :   } /* while */
     320             : 
     321   140596003 :   if (!(f&1))                        /* division by 1 postprocessing if needed */
     322             :   {
     323   136320004 :     if (xs && d==1)
     324             :     {
     325    40619054 :       xv1 += d1 * xv;
     326    40619054 :       xu1 += d1 * xu;
     327    40619054 :       xs = 0; res = 1UL;
     328             :     }
     329    95700950 :     else if (!xs && d1==1)
     330             :     {
     331    60813585 :       xv += d * xv1;
     332    60813585 :       xu += d * xu1;
     333    60813585 :       xs = 1; res = 1UL;
     334             :     }
     335             :   }
     336             : 
     337   140596003 :   if (xs)
     338             :   {
     339    88846213 :     *s = -1; *u = xu1; *u1 = xu; *v = xv1; *v1 = xv;
     340    88846213 :     return (res ? res : (d==1 ? 1UL : d1));
     341             :   }
     342             :   else
     343             :   {
     344    51749790 :     *s = 1; *u = xu; *u1 = xu1; *v = xv; *v1 = xv1;
     345    51749790 :     return (res ? res : (d1==1 ? 1UL : d));
     346             :   }
     347             : }
     348             : 
     349             : ulong
     350      846424 : rgcduu(ulong d, ulong d1, ulong vmax,
     351             :        ulong* u, ulong* u1, ulong* v, ulong* v1, long *s)
     352             : {
     353      846424 :   ulong xu,xu1, xv,xv1, xs, q, res=0;
     354      846424 :   int f = 0;
     355             :   LOCAL_HIREMAINDER;
     356             : 
     357      846424 :   if (vmax == 0) vmax = ULONG_MAX;
     358      846424 :   xs = res = 0;
     359      846424 :   xu = xv1 = 1UL;
     360      846424 :   xu1 = xv = 0UL;
     361     3845292 :   while (d1 > 1UL)
     362             :   {
     363     2967774 :     d -= d1;                        /* no need to use subll */
     364     2967774 :     if (d >= d1)
     365             :     {
     366     1620883 :       hiremainder = 0; q = 1 + divll(d,d1); d = hiremainder;
     367     1620883 :       xv += q * xv1;
     368     1620883 :       xu += q * xu1;
     369             :     }
     370             :     else
     371     1346891 :     { xv += xv1; xu += xu1; }
     372             :                                 /* possible loop exit */
     373     2967774 :     if (xv > vmax) { f=xs=1; break; }
     374     2626490 :     if (d <= 1UL) { xs=1; break; }
     375             :                                 /* repeat with inverted roles */
     376     2559530 :     d1 -= d;
     377     2559530 :     if (d1 >= d)
     378             :     {
     379     1561683 :       hiremainder = 0; q = 1 + divll(d1,d); d1 = hiremainder;
     380     1561683 :       xv1 += q * xv;
     381     1561683 :       xu1 += q * xu;
     382             :     }
     383             :     else
     384      997847 :     { xv1 += xv; xu1 += xu; }
     385             :                                 /* possible loop exit */
     386     2559530 :     if (xv1 > vmax) { f=1; break; }
     387             :   } /* while */
     388             : 
     389      846424 :   if (!(f&1))                        /* division by 1 postprocessing if needed */
     390             :   {
     391       98054 :     if (xs && d==1)
     392             :     {
     393       66960 :       xv1 += d1 * xv;
     394       66960 :       xu1 += d1 * xu;
     395       66960 :       xs = 0; res = 1UL;
     396             :     }
     397       31094 :     else if (!xs && d1==1)
     398             :     {
     399       31094 :       xv += d * xv1;
     400       31094 :       xu += d * xu1;
     401       31094 :       xs = 1; res = 1UL;
     402             :     }
     403             :   }
     404             : 
     405      846424 :   if (xs)
     406             :   {
     407      372378 :     *s = -1; *u = xu1; *u1 = xu; *v = xv1; *v1 = xv;
     408      372378 :     return (res ? res : (d==1 ? 1UL : d1));
     409             :   }
     410             :   else
     411             :   {
     412      474046 :     *s = 1; *u = xu; *u1 = xu1; *v = xv; *v1 = xv1;
     413      474046 :     return (res ? res : (d1==1 ? 1UL : d));
     414             :   }
     415             : }
     416             : 
     417             : /*==================================
     418             :  * cbezout(a,b,uu,vv)
     419             :  *==================================
     420             :  * Same as bezout() but for C longs.
     421             :  *    Return g = gcd(a,b) >= 0, and assign longs u,v through pointers uu,vv
     422             :  *    such that g = u*a + v*b.
     423             :  * Special cases:
     424             :  *    a == b == 0 ==> pick u=1, v=0 (and return 1, surprisingly)
     425             :  *    a != 0 == b ==> keep v=0
     426             :  *    a == 0 != b ==> keep u=0
     427             :  *    |a| == |b| != 0 ==> keep u=0, set v=+-1
     428             :  * Assignments through uu,vv happen unconditionally;  non-NULL pointers
     429             :  * _must_ be used.
     430             :  */
     431             : long
     432       18991 : cbezout(long a,long b,long *uu,long *vv)
     433             : {
     434             :   long s,*t;
     435       18991 :   ulong d = labs(a), d1 = labs(b);
     436             :   ulong r,u,u1,v,v1;
     437             : 
     438       18991 :   if (!b)
     439             :   {
     440         231 :     *vv=0L;
     441         231 :     if (!a) { *uu=1L; return 0L; }
     442         231 :     *uu = a < 0 ? -1L : 1L;
     443         231 :     return (long)d;
     444             :   }
     445       18760 :   else if (!a || (d == d1))
     446             :   {
     447         581 :     *uu = 0L; *vv = b < 0 ? -1L : 1L;
     448         581 :     return (long)d1;
     449             :   }
     450       18179 :   else if (d == 1)                /* frequently used by nfinit */
     451             :   {
     452       13489 :     *uu = a; *vv = 0L;
     453       13489 :     return 1L;
     454             :   }
     455        4690 :   else if (d < d1)
     456             :   {
     457             : /* bug in gcc-2.95.3:
     458             :  * s = a; a = b; b = s; produces wrong result a = b. This is OK:  */
     459        3668 :     { long _x = a; a = b; b = _x; }        /* in order to keep the right signs */
     460        3668 :     r = d; d = d1; d1 = r;
     461        3668 :     t = uu; uu = vv; vv = t;
     462             :   }
     463             :   /* d > d1 > 0 */
     464        4690 :   r = xxgcduu(d, d1, 0, &u, &u1, &v, &v1, &s);
     465        4690 :   if (s < 0)
     466             :   {
     467         840 :     *uu = a < 0 ? (long)u : -(long)u;
     468         840 :     *vv = b < 0 ? -(long)v : (long)v;
     469             :   }
     470             :   else
     471             :   {
     472        3850 :     *uu = a < 0 ? -(long)u : (long)u;
     473        3850 :     *vv = b < 0 ? (long)v : -(long)v;
     474             :   }
     475        4690 :   return (long)r;
     476             : }
     477             : 
     478             : /*==================================
     479             :  * lgcdii(d,d1,u,u1,v,v1,vmax)
     480             :  *==================================*/
     481             : /* Lehmer's partial extended gcd algorithm, acting on two t_INT GENs.
     482             :  *
     483             :  * Tries to determine, using the leading 2*BITS_IN_LONG significant bits of d
     484             :  * and a quantity of bits from d1 obtained by a shift of the same displacement,
     485             :  * as many partial quotients of d/d1 as possible, and assigns to [u,u1;v,v1]
     486             :  * the product of all the [0, 1; 1, q_j] thus obtained, where the leftmost
     487             :  * factor arises from the quotient of the first division step.
     488             :  *
     489             :  * For use in rational reconstruction, input param vmax can be given a
     490             :  * nonzero value.  In this case, we will return early as soon as v1 > vmax
     491             :  * (i.e. it is guaranteed that v <= vmax). --2001Jan15
     492             :  *
     493             :  * MUST be called with  d > d1 > 0, and with  d  occupying more than one
     494             :  * significant word  (if it doesn't, the caller has no business with us;
     495             :  * he/she/it should use xgcduu() instead).  Returns the number of reduction/
     496             :  * swap steps carried out, possibly zero, or under certain conditions minus
     497             :  * that number.  When the return value is nonzero, the caller should use the
     498             :  * returned recurrence matrix to update its own copies of d,d1.  When the
     499             :  * return value is non-positive, and the latest remainder after updating
     500             :  * turns out to be nonzero, the caller should at once attempt a full division,
     501             :  * rather than first trying lgcdii() again -- this typically happens when we
     502             :  * are about to encounter a quotient larger than half a word.  (This is not
     503             :  * detected infallibly -- after a positive return value, it is perfectly
     504             :  * possible that the next stage will end up needing a full division.  After
     505             :  * a negative return value, however, this is certain, and should be acted
     506             :  * upon.)
     507             :  *
     508             :  * (The sign information, for which xgcduu() has its return argument s, is now
     509             :  * implicit in the LSB of our return value, and the caller may take advantage
     510             :  * of the fact that a return value of +-1 implies u==0,u1==v==1  [only v1 pro-
     511             :  * vides interesting information in this case].  One might also use the fact
     512             :  * that if the return value is +-2, then u==1, but this is rather marginal.)
     513             :  *
     514             :  * If it was not possible to determine even the first quotient, either because
     515             :  * we're too close to an integer quotient or because the quotient would be
     516             :  * larger than one word  (if the `leading digit' of d1 after shifting is all
     517             :  * zeros),  we return 0 and do not bother to assign anything to the last four
     518             :  * args.
     519             :  *
     520             :  * The division chain might (sometimes) even run to completion.  It will be
     521             :  * up to the caller to detect this case.
     522             :  *
     523             :  * This routine does _not_ change d or d1;  this will also be up to the caller.
     524             :  *
     525             :  * Note that this routine does not know and does not need to know about the
     526             :  * PARI stack.
     527             :  */
     528             : int
     529     8861528 : lgcdii(ulong* d, ulong* d1,
     530             :        ulong* u, ulong* u1, ulong* v, ulong* v1,
     531             :        ulong vmax)
     532             : {
     533             :   /* Strategy:  (1) Extract/shift most significant bits.  We assume that d
     534             :    * has at least two significant words, but we can cope with a one-word d1.
     535             :    * Let dd,dd1 be the most significant dividend word and matching part of the
     536             :    * divisor.
     537             :    * (2) Check for overflow on the first division.  For our purposes, this
     538             :    * happens when the upper half of dd1 is zero.  (Actually this is detected
     539             :    * during extraction.)
     540             :    * (3) Get a fix on the first quotient.  We compute q = floor(dd/dd1), which
     541             :    * is an upper bound for floor(d/d1), and which gives the true value of the
     542             :    * latter if (and-almost-only-if) the remainder dd' = dd-q*dd1 is >= q.
     543             :    * (If it isn't, we give up.  This is annoying because the subsequent full
     544             :    * division will repeat some work already done, but it happens very infre-
     545             :    * quently.  Doing the extra-bit-fetch in this case would be awkward.)
     546             :    * (4) Finish initializations.
     547             :    *
     548             :    * The remainder of the action is comparatively boring... The main loop has
     549             :    * been unrolled once  (so we don't swap things and we can apply Jebelean's
     550             :    * termination conditions which alternatingly take two different forms during
     551             :    * successive iterations).  When we first run out of sufficient bits to form
     552             :    * a quotient, and have an extra word of each operand, we pull out two whole
     553             :    * word's worth of dividend bits, and divisor bits of matching significance;
     554             :    * to these we apply our partial matrix (disregarding overflow because the
     555             :    * result mod 2^(2*BITS_IN_LONG) will in fact give the correct values), and
     556             :    * re-extract one word's worth of the current dividend and a matching amount
     557             :    * of divisor bits.  The affair will normally terminate with matrix entries
     558             :    * just short of a whole word.  (We terminate the inner loop before these can
     559             :    * possibly overflow.)
     560             :    */
     561             :   ulong dd,dd1,ddlo,dd1lo, sh,shc;        /* `digits', shift count */
     562             :   ulong xu,xu1, xv,xv1, q,res;        /* recurrences, partial quotient, count */
     563             :   ulong tmp0,tmp1,tmp2,tmpd,tmpu,tmpv; /* temps */
     564             :   ulong dm1,dm2,d1m1,d1m2;
     565             :   long ld, ld1, lz;                /* t_INT effective lengths */
     566     8861528 :   int skip = 0;                        /* a boolean flag */
     567             :   LOCAL_OVERFLOW;
     568             :   LOCAL_HIREMAINDER;
     569             : 
     570             :   /* following is just for convenience: vmax==0 means no bound */
     571     8861528 :   if (vmax == 0) vmax = ULONG_MAX;
     572     8861528 :   ld = lgefint(d); ld1 = lgefint(d1); lz = ld - ld1; /* >= 0 */
     573     8861528 :   if (lz > 1) return 0;                /* rare, quick and desperate exit */
     574             : 
     575     8824283 :   d = int_MSW(d); d1 = int_MSW(d1);                /* point at the leading `digits' */
     576     8824283 :   dm1 = *int_precW(d); d1m1 = *int_precW(d1);
     577     8824283 :   dm2 = *int_precW(int_precW(d)); d1m2 = *int_precW(int_precW(d1));
     578     8824283 :   dd1lo = 0;                        /* unless we find something better */
     579     8824283 :   sh = bfffo(*d);                /* obtain dividend left shift count */
     580             : 
     581     8824283 :   if (sh)
     582             :   {                                /* do the shifting */
     583     8501204 :     shc = BITS_IN_LONG - sh;
     584     8501204 :     if (lz)
     585             :     {                                /* dividend longer than divisor */
     586     1707985 :       dd1 = (*d1 >> shc);
     587     1707985 :       if (!(HIGHMASK & dd1)) return 0;  /* overflow detected */
     588     1233033 :       if (ld1 > 3)
     589       78994 :         dd1lo = (*d1 << sh) + (d1m1 >> shc);
     590             :       else
     591     1154039 :         dd1lo = (*d1 << sh);
     592             :     }
     593             :     else
     594             :     {                                /* dividend and divisor have the same length */
     595     6793219 :       dd1 = (*d1 << sh);
     596     6793219 :       if (!(HIGHMASK & dd1)) return 0;
     597     6791352 :       if (ld1 > 3)
     598             :       {
     599     6804571 :         dd1 += (d1m1 >> shc);
     600     6804571 :         if (ld1 > 4)
     601     2701940 :           dd1lo = (d1m1 << sh) + (d1m2 >> shc);
     602             :         else
     603     4102631 :           dd1lo = (d1m1 << sh);
     604             :       }
     605             :     }
     606             :     /* following lines assume d to have 2 or more significant words */
     607     8024385 :     dd = (*d << sh) + (dm1 >> shc);
     608     8024385 :     if (ld > 4)
     609     2770533 :       ddlo = (dm1 << sh) + (dm2 >> shc);
     610             :     else
     611     5253852 :       ddlo = (dm1 << sh);
     612             :   }
     613             :   else
     614             :   {                                /* no shift needed */
     615      323079 :     if (lz) return 0;                /* div'd longer than div'r: o'flow automatic */
     616      317355 :     dd1 = *d1;
     617      317355 :     if (!(HIGHMASK & dd1)) return 0;
     618      316964 :     if(ld1 > 3) dd1lo = d1m1;
     619             :     /* assume again that d has another significant word */
     620      316964 :     dd = *d; ddlo = dm1;
     621             :   }
     622             : 
     623             :   /* First subtraction/division stage.  (If a subtraction initially suffices,
     624             :    * we don't divide at all.)  If a Jebelean condition is violated, and we
     625             :    * can't fix it even by looking at the low-order bits in ddlo,dd1lo, we
     626             :    * give up and ask for a full division.  Otherwise we commit the result,
     627             :    * possibly deciding to re-shift immediately afterwards.
     628             :    */
     629     8341349 :   dd -= dd1;
     630     8341349 :   if (dd < dd1)
     631             :   {                                /* first quotient known to be == 1 */
     632     3159045 :     xv1 = 1UL;
     633     3159045 :     if (!dd)                        /* !(Jebelean condition), extraspecial case */
     634             :     {                                /* note this can actually happen...  Now
     635             :                                      * q==1 is known, but we underflow already.
     636             :                                  * OTOH we've just shortened d by a whole word.
     637             :                                  * Thus we feel pleased with ourselves and
     638             :                                  * return.  (The re-shift code below would
     639             :                                  * do so anyway.) */
     640      394189 :       *u = 0; *v = *u1 = *v1 = 1UL;
     641      394189 :       return -1;                /* Next step will be a full division. */
     642             :     } /* if !(Jebelean) then */
     643             :   }
     644             :   else
     645             :   {                                /* division indicated */
     646     5182304 :     hiremainder = 0;
     647     5182304 :     xv1 = 1 + divll(dd, dd1);        /* xv1: alternative spelling of `q', here ;) */
     648     5182304 :     dd = hiremainder;
     649     5182304 :     if (dd < xv1)                /* !(Jebelean cond'), non-extra special case */
     650             :     {                                /* Attempt to complete the division using the
     651             :                                  * less significant bits, before skipping right
     652             :                                  * past the 1st loop to the reshift stage. */
     653      723036 :       ddlo = subll(ddlo, mulll(xv1, dd1lo));
     654      723036 :       dd = subllx(dd, hiremainder);
     655             : 
     656             :       /* If we now have an overflow, q was _certainly_ too large.  Thanks to
     657             :        * our decision not to get here unless the original dd1 had bits set in
     658             :        * the upper half of the word, however, we now do know that the correct
     659             :        * quotient is in fact q-1.  Adjust our data accordingly. */
     660      723036 :       if (overflow)
     661             :       {
     662      222784 :         xv1--;
     663      222784 :         ddlo = addll(ddlo,dd1lo);
     664      222784 :         dd = addllx(dd,dd1);        /* overflows again which cancels the borrow */
     665             :         /* ...and fall through to skip=1 below */
     666             :       }
     667             :       else
     668             :       /* Test Jebelean condition anew, at this point using _all_ the extracted
     669             :        * bits we have.  This is clutching at straws; we have a more or less
     670             :        * even chance of succeeding this time.  Note that if we fail, we really
     671             :        * do not know whether the correct quotient would have been q or some
     672             :        * smaller value. */
     673      500252 :         if (!dd && ddlo < xv1) return 0;
     674             : 
     675             :       /* Otherwise, we now know that q is correct, but we cannot go into the
     676             :        * 1st loop.  Raise a flag so we'll remember to skip past the loop.
     677             :        * Get here also after the q-1 adjustment case. */
     678      447943 :       skip = 1;
     679             :     } /* if !(Jebelean) then */
     680             :   }
     681     7672067 :   res = 1;
     682     7672067 :   if (xv1 > vmax)
     683             :   {                                /* gone past the bound already */
     684       11136 :     *u = 0UL; *u1 = 1UL; *v = 1UL; *v1 = xv1;
     685       11136 :     return res;
     686             :   }
     687     7660931 :   xu = 0UL; xv = xu1 = 1UL;
     688             : 
     689             :   /* Some invariants from here across the first loop:
     690             :    *
     691             :    * At this point, and again after we are finished with the first loop and
     692             :    * subsequent conditional, a division and the attached update of the
     693             :    * recurrence matrix have just been carried out completely.  The matrix
     694             :    * xu,xu1;xv,xv1 has been initialized (or updated, possibly with permuted
     695             :    * columns), and the current remainder == next divisor (dd at the moment)
     696             :    * is nonzero (it might be zero here, but then skip will have been set).
     697             :    *
     698             :    * After the first loop, or when skip is set already, it will also be the
     699             :    * case that there aren't sufficiently many bits to continue without re-
     700             :    * shifting.  If the divisor after reshifting is zero, or indeed if it
     701             :    * doesn't have more than half a word's worth of bits, we will have to
     702             :    * return at that point.  Otherwise, we proceed into the second loop.
     703             :    *
     704             :    * Furthermore, when we reach the re-shift stage, dd:ddlo and dd1:dd1lo will
     705             :    * already reflect the result of applying the current matrix to the old
     706             :    * ddorig:ddlo and dd1orig:dd1lo.  (For the first iteration above, this
     707             :    * was easy to achieve, and we didn't even need to peek into the (now
     708             :    * no longer existent!) saved words.  After the loop, we'll stop for a
     709             :    * moment to merge in the ddlo,dd1lo contributions.)
     710             :    *
     711             :    * Note that after the first division, even an a priori quotient of 1 cannot
     712             :    * be trusted until we've checked Jebelean's condition -- it cannot be too
     713             :    * large, of course, but it might be too small.
     714             :    */
     715             : 
     716     7660931 :   if (!skip)
     717             :   {
     718             :     for(;;)
     719             :     {
     720             :       /* First half of loop divides dd into dd1, and leaves the recurrence
     721             :        * matrix xu,...,xv1 groomed the wrong way round (xu,xv will be the newer
     722             :        * entries) when successful. */
     723    58763638 :       tmpd = dd1 - dd;
     724    58763638 :       if (tmpd < dd)
     725             :       {                                /* quotient suspected to be 1 */
     726    24544599 :         tmpu = xu + xu1;        /* cannot overflow -- everything bounded by
     727             :                                  * the original dd during first loop */
     728    24544599 :         tmpv = xv + xv1;
     729             :       }
     730             :       else
     731             :       {                                /* division indicated */
     732    34219039 :         hiremainder = 0;
     733    34219039 :         q = 1 + divll(tmpd, dd);
     734    34219039 :         tmpd = hiremainder;
     735    34219039 :         tmpu = xu + q*xu1;        /* can't overflow, but may need to be undone */
     736    34219039 :         tmpv = xv + q*xv1;
     737             :       }
     738             : 
     739    58763638 :       tmp0 = addll(tmpv, xv1);
     740   115980694 :       if ((tmpd < tmpu) || overflow ||
     741    57217056 :           (dd - tmpd < tmp0))        /* !(Jebelean cond.) */
     742             :         break;                        /* skip ahead to reshift stage */
     743             :       else
     744             :       {                                /* commit dd1, xu, xv */
     745    55138924 :         res++;
     746    55138924 :         dd1 = tmpd; xu = tmpu; xv = tmpv;
     747    55138924 :         if (xv > vmax)
     748             :         {                        /* time to return */
     749       13196 :           *u = xu1; *u1 = xu; *v = xv1; *v1 = xv;
     750       13196 :           return res;
     751             :         }
     752             :       }
     753             : 
     754             :       /* Second half of loop divides dd1 into dd, and the matrix returns to its
     755             :        * normal arrangement. */
     756    55125728 :       tmpd = dd - dd1;
     757    55125728 :       if (tmpd < dd1)
     758             :       {                                /* quotient suspected to be 1 */
     759    22379900 :         tmpu = xu1 + xu;        /* cannot overflow */
     760    22379900 :         tmpv = xv1 + xv;
     761             :       }
     762             :       else
     763             :       {                                /* division indicated */
     764    32745828 :         hiremainder = 0;
     765    32745828 :         q = 1 + divll(tmpd, dd1);
     766    32745828 :         tmpd = hiremainder;
     767    32745828 :         tmpu = xu1 + q*xu;
     768    32745828 :         tmpv = xv1 + q*xv;
     769             :       }
     770             : 
     771    55125728 :       tmp0 = addll(tmpu, xu);
     772   107127551 :       if ((tmpd < tmpv) || overflow ||
     773    52001823 :           (dd1 - tmpd < tmp0))        /* !(Jebelean cond.) */
     774             :         break;                        /* skip ahead to reshift stage */
     775             :       else
     776             :       {                                /* commit dd, xu1, xv1 */
     777    51548143 :         res++;
     778    51548143 :         dd = tmpd; xu1 = tmpu; xv1 = tmpv;
     779    51548143 :         if (xv1 > vmax)
     780             :         {                        /* time to return */
     781       11772 :           *u = xu; *u1 = xu1; *v = xv; *v1 = xv1;
     782       11772 :           return res;
     783             :         }
     784             :       }
     785             : 
     786    51536371 :     } /* end of first loop */
     787             : 
     788             :     /* Intermezzo: update dd:ddlo, dd1:dd1lo.  (But not if skip is set.) */
     789             : 
     790     7202299 :     if (res&1)
     791             :     {
     792             :       /* after failed division in 1st half of loop:
     793             :        * [dd1:dd1lo,dd:ddlo] = [ddorig:ddlo,dd1orig:dd1lo]
     794             :        *                       * [ -xu, xu1 ; xv, -xv1 ]
     795             :        * (Actually, we only multiply [ddlo,dd1lo] onto the matrix and
     796             :        * add the high-order remainders + overflows onto [dd1,dd].)
     797             :        */
     798     3629216 :       tmp1 = mulll(ddlo, xu); tmp0 = hiremainder;
     799     3629216 :       tmp1 = subll(mulll(dd1lo,xv), tmp1);
     800     3629216 :       dd1 += subllx(hiremainder, tmp0);
     801     3629216 :       tmp2 = mulll(ddlo, xu1); tmp0 = hiremainder;
     802     3629216 :       ddlo = subll(tmp2, mulll(dd1lo,xv1));
     803     3629216 :       dd += subllx(tmp0, hiremainder);
     804     3629216 :       dd1lo = tmp1;
     805             :     }
     806             :     else
     807             :     {
     808             :       /* after failed division in 2nd half of loop:
     809             :        * [dd:ddlo,dd1:dd1lo] = [ddorig:ddlo,dd1orig:dd1lo]
     810             :        *                       * [ xu1, -xu ; -xv1, xv ]
     811             :        * (Actually, we only multiply [ddlo,dd1lo] onto the matrix and
     812             :        * add the high-order remainders + overflows onto [dd,dd1].)
     813             :        */
     814     3573083 :       tmp1 = mulll(ddlo, xu1); tmp0 = hiremainder;
     815     3573083 :       tmp1 = subll(tmp1, mulll(dd1lo,xv1));
     816     3573083 :       dd += subllx(tmp0, hiremainder);
     817     3573083 :       tmp2 = mulll(ddlo, xu); tmp0 = hiremainder;
     818     3573083 :       dd1lo = subll(mulll(dd1lo,xv), tmp2);
     819     3573083 :       dd1 += subllx(hiremainder, tmp0);
     820     3573083 :       ddlo = tmp1;
     821             :     }
     822             :   } /* end of skip-pable section:  get here also, with res==1, when there
     823             :      * was a problem immediately after the very first division. */
     824             : 
     825             :   /* Re-shift.  Note:  the shift count _can_ be zero, viz. under the following
     826             :    * precise conditions:  The original dd1 had its topmost bit set, so the 1st
     827             :    * q was 1, and after subtraction, dd had its topmost bit unset.  If now
     828             :    * dd==0, we'd have taken the return exit already, so we couldn't have got
     829             :    * here.  If not, then it must have been the second division which has gone
     830             :    * amiss  (because dd1 was very close to an exact multiple of the remainder
     831             :    * dd value, so this will be very rare).  At this point, we'd have a fairly
     832             :    * slim chance of fixing things by re-examining dd1:dd1lo vs. dd:ddlo, but
     833             :    * this is not guaranteed to work.  Instead of trying, we return at once.
     834             :    * The caller will see to it that the initial subtraction is re-done using
     835             :    * _all_ the bits of both operands, which already helps, and the next round
     836             :    * will either be a full division  (if dd occupied a halfword or less),  or
     837             :    * another llgcdii() first step.  In the latter case, since we try a little
     838             :    * harder during our first step, we may actually be able to fix the problem,
     839             :    * and get here again with improved low-order bits and with another step
     840             :    * under our belt.  Otherwise we'll have given up above and forced a full-
     841             :    * blown division.
     842             :    *
     843             :    * If res is even, the shift count _cannot_ be zero.  (The first step forces
     844             :    * a zero into the remainder's MSB, and all subsequent remainders will have
     845             :    * inherited it.)
     846             :    *
     847             :    * The re-shift stage exits if the next divisor has at most half a word's
     848             :    * worth of bits.
     849             :    *
     850             :    * For didactic reasons, the second loop will be arranged in the same way
     851             :    * as the first -- beginning with the division of dd into dd1, as if res
     852             :    * was odd.  To cater for this, if res is actually even, we swap things
     853             :    * around during reshifting.  (During the second loop, the parity of res
     854             :    * does not matter;  we know in which half of the loop we are when we decide
     855             :    * to return.) */
     856     7635963 :   if (res&1)
     857             :   {                                /* after odd number of division(s) */
     858     4063796 :     if (dd1 && (sh = bfffo(dd1)))
     859             :     {
     860     4003892 :       shc = BITS_IN_LONG - sh;
     861     4003892 :       dd = (ddlo >> shc) + (dd << sh);
     862     4003892 :       if (!(HIGHMASK & dd))
     863             :       {
     864      260645 :         *u = xu; *u1 = xu1; *v = xv; *v1 = xv1;
     865      260645 :         return -res;                /* full division asked for */
     866             :       }
     867     3743247 :       dd1 = (dd1lo >> shc) + (dd1 << sh);
     868             :     }
     869             :     else
     870             :     {                                /* time to return: <= 1 word left, or sh==0 */
     871       59904 :       *u = xu; *u1 = xu1; *v = xv; *v1 = xv1;
     872       59904 :       return res;
     873             :     }
     874             :   }
     875             :   else
     876             :   {                                /* after even number of divisions */
     877     3572167 :     if (dd)
     878             :     {
     879     3572167 :       sh = bfffo(dd);                /* Known to be positive. */
     880     3572167 :       shc = BITS_IN_LONG - sh;
     881             :                                 /* dd:ddlo will become the new dd1, and v.v. */
     882     3572167 :       tmpd = (ddlo >> shc) + (dd << sh);
     883     3572167 :       dd = (dd1lo >> shc) + (dd1 << sh);
     884     3572167 :       dd1 = tmpd;
     885             :       /* This has completed the swap;  now dd is again the current divisor.
     886             :        * The following test originally inspected dd1 -- a most subtle and
     887             :        * most annoying bug. The Management. */
     888     3572167 :       if (HIGHMASK & dd)
     889             :       {
     890             :         /* recurrence matrix is the wrong way round;  swap it. */
     891     3531305 :         tmp0 = xu; xu = xu1; xu1 = tmp0;
     892     3531305 :         tmp0 = xv; xv = xv1; xv1 = tmp0;
     893             :       }
     894             :       else
     895             :       {
     896             :         /* recurrence matrix is the wrong way round;  fix this. */
     897       40862 :         *u = xu1; *u1 = xu; *v = xv1; *v1 = xv;
     898       40862 :         return -res;                /* full division asked for */
     899             :       }
     900             :     }
     901             :     else
     902             :     {                                /* time to return: <= 1 word left */
     903           0 :       *u = xu1; *u1 = xu; *v = xv1; *v1 = xv;
     904           0 :       return res;
     905             :     }
     906             :   } /* end reshift */
     907             : 
     908             :   /* The Second Loop.  Rip-off of the first, but we now check for overflow
     909             :    * in the recurrences.  Returns instead of breaking when we cannot fix the
     910             :    * quotient any longer. */
     911             :   for(;;)
     912             :   {
     913             :     /* First half of loop divides dd into dd1, and leaves the recurrence
     914             :      * matrix xu,...,xv1 groomed the wrong way round (xu,xv will be the newer
     915             :      * entries) when successful. */
     916    31483939 :     tmpd = dd1 - dd;
     917    31483939 :     if (tmpd < dd)
     918             :     {                                /* quotient suspected to be 1 */
     919    10789938 :       tmpu = xu + xu1;
     920    10789938 :       tmpv = addll(xv, xv1);        /* xv,xv1 will overflow first */
     921    10789938 :       tmp1 = overflow;
     922             :     }
     923             :     else
     924             :     {                                /* division indicated */
     925    20694001 :       hiremainder = 0;
     926    20694001 :       q = 1 + divll(tmpd, dd);
     927    20694001 :       tmpd = hiremainder;
     928    20694001 :       tmpu = xu + q*xu1;
     929    20694001 :       tmpv = addll(xv, mulll(q,xv1));
     930    20694001 :       tmp1 = overflow | hiremainder;
     931             :     }
     932             : 
     933    31483939 :     tmp0 = addll(tmpv, xv1);
     934    60731322 :     if ((tmpd < tmpu) || overflow || tmp1 ||
     935    29247383 :         (dd - tmpd < tmp0))        /* !(Jebelean cond.) */
     936             :     {
     937             :       /* The recurrence matrix has not yet been warped... */
     938     3982774 :       *u = xu; *u1 = xu1; *v = xv; *v1 = xv1;
     939     3982774 :       break;
     940             :     }
     941             :     /* commit dd1, xu, xv */
     942    27501165 :     res++;
     943    27501165 :     dd1 = tmpd; xu = tmpu; xv = tmpv;
     944    27501165 :     if (xv > vmax)
     945             :     {                                /* time to return */
     946       55547 :       *u = xu1; *u1 = xu; *v = xv1; *v1 = xv;
     947       55547 :       return res;
     948             :     }
     949             : 
     950             :     /* Second half of loop divides dd1 into dd, and the matrix returns to its
     951             :      * normal arrangement. */
     952    27445618 :     tmpd = dd - dd1;
     953    27445618 :     if (tmpd < dd1)
     954             :     {                                /* quotient suspected to be 1 */
     955    11694553 :       tmpu = xu1 + xu;
     956    11694553 :       tmpv = addll(xv1, xv);
     957    11694553 :       tmp1 = overflow;
     958             :     }
     959             :     else
     960             :     {                                /* division indicated */
     961    15751065 :       hiremainder = 0;
     962    15751065 :       q = 1 + divll(tmpd, dd1);
     963    15751065 :       tmpd = hiremainder;
     964    15751065 :       tmpu = xu1 + q*xu;
     965    15751065 :       tmpv = addll(xv1, mulll(q, xv));
     966    15751065 :       tmp1 = overflow | hiremainder;
     967             :     }
     968             : 
     969    27445618 :     tmp0 = addll(tmpu, xu);
     970    52242435 :     if ((tmpd < tmpv) || overflow || tmp1 ||
     971    24796817 :         (dd1 - tmpd < tmp0))        /* !(Jebelean cond.) */
     972             :     {
     973             :       /* The recurrence matrix has not yet been unwarped, so it is
     974             :        * the wrong way round;  fix this. */
     975     3209060 :       *u = xu1; *u1 = xu; *v = xv1; *v1 = xv;
     976     3209060 :       break;
     977             :     }
     978             : 
     979    24236558 :     res++; /* commit dd, xu1, xv1 */
     980    24236558 :     dd = tmpd; xu1 = tmpu; xv1 = tmpv;
     981    24236558 :     if (xv1 > vmax)
     982             :     {                                /* time to return */
     983       27171 :       *u = xu; *u1 = xu1; *v = xv; *v1 = xv1;
     984       27171 :       return res;
     985             :     }
     986    24209387 :   } /* end of second loop */
     987             : 
     988     7191834 :   return res;
     989             : }
     990             : 
     991             : /* 1 / Mod(x,p). Assume x < p */
     992             : ulong
     993   145014418 : Fl_invsafe(ulong x, ulong p)
     994             : {
     995             :   long s;
     996   145014418 :   ulong xv, xv1, g = xgcduu(p, x, 1, &xv, &xv1, &s);
     997   145459727 :   if (g != 1UL) return 0UL;
     998   145319076 :   xv = xv1 % p; if (s < 0) xv = p - xv;
     999   145319076 :   return xv;
    1000             : }
    1001             : 
    1002             : /* assume 0 < x < p; return u such that u x = gcd(x,p) (mod p);
    1003             :  * set *pg = gcd(x,p) */
    1004             : ulong
    1005      322427 : Fl_invgen(ulong x, ulong p, ulong *pg)
    1006             : {
    1007             :   ulong v, v1;
    1008             :   long s;
    1009      322427 :   *pg = xgcduu(p, x, 0, &v, &v1, &s);
    1010      322427 :   if (s > 0) v = p - v;
    1011      322427 :   return v;
    1012             : }
    1013             : 
    1014             : /* 1 / Mod(x,p). Assume x < p */
    1015             : ulong
    1016   128853593 : Fl_inv(ulong x, ulong p)
    1017             : {
    1018   128853593 :   ulong xv  = Fl_invsafe(x, p);
    1019   128973961 :   if (!xv && p!=1UL) pari_err_INV("Fl_inv", mkintmod(utoi(x), utoi(p)));
    1020   128974724 :   return xv;
    1021             : }

Generated by: LCOV version 1.11