Code coverage tests

This page documents the degree to which the PARI/GP source code is tested by our public test suite, distributed with the source distribution in directory src/test/. This is measured by the gcov utility; we then process gcov output using the lcov frond-end.

We test a few variants depending on Configure flags on the pari.math.u-bordeaux.fr machine (x86_64 architecture), and agregate them in the final report:

The target is to exceed 90% coverage for all mathematical modules (given that branches depending on DEBUGLEVEL or DEBUGMEM are not covered). This script is run to produce the results below.

LCOV - code coverage report
Current view: top level - kernel/none - gcdll.c (source / functions) Hit Total Coverage
Test: PARI/GP v2.12.0 lcov report (development 23022-5b494f78e) Lines: 345 348 99.1 %
Date: 2018-09-24 05:37:44 Functions: 15 16 93.8 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : #line 2 "../src/kernel/none/gcdll.c"
       2             : /* Copyright (C) 2000  The PARI group.
       3             : 
       4             : This file is part of the PARI/GP package.
       5             : 
       6             : PARI/GP is free software; you can redistribute it and/or modify it under the
       7             : terms of the GNU General Public License as published by the Free Software
       8             : Foundation. It is distributed in the hope that it will be useful, but WITHOUT
       9             : ANY WARRANTY WHATSOEVER.
      10             : 
      11             : Check the License for details. You should have received a copy of it, along
      12             : with the package; see the file 'COPYING'. If not, write to the Free Software
      13             : Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */
      14             : 
      15             : /***********************************************************************/
      16             : /**                                                                   **/
      17             : /**                          GCD                                      **/
      18             : /**                                                                   **/
      19             : /***********************************************************************/
      20             : /* Fast ulong gcd.  Called with y odd; x can be arbitrary (but will most of
      21             :  * the time be smaller than y). */
      22             : 
      23             : /* Gotos are Harmful, and Programming is a Science.  E.W.Dijkstra. */
      24             : INLINE ulong
      25   146259792 : gcduodd(ulong x, ulong y)         /* assume y&1==1, y > 1 */
      26             : {
      27   146259792 :   if (!x) return y;
      28             :   /* fix up x */
      29   146259792 :   while (!(x&1)) x>>=1;
      30   146259792 :   if (x==1) return 1;
      31   113092835 :   if (x==y) return y;
      32   108635485 :   else if (x>y) goto xislarger;/* will be rare, given how we'll use this */
      33             :   /* loop invariants: x,y odd and distinct. */
      34             :  yislarger:
      35   565886393 :   if ((x^y)&2)                 /* ...01, ...11 or vice versa */
      36   294447325 :     y=(x>>2)+(y>>2)+1;         /* ==(x+y)>>2 except it can't overflow */
      37             :   else                         /* ...01,...01 or ...11,...11 */
      38   271439068 :     y=(y-x)>>2;                /* now y!=0 in either case */
      39   565886393 :   while (!(y&1)) y>>=1;        /* kill any windfall-gained powers of 2 */
      40   565886393 :   if (y==1) return 1;          /* comparand == return value... */
      41   521137086 :   if (x==y) return y;          /* this and the next is just one comparison */
      42   505788226 :   else if (x<y) goto yislarger;/* else fall through to xislarger */
      43             : 
      44             :  xislarger:                    /* same as above, seen through a mirror */
      45   513295523 :   if ((x^y)&2)
      46   262665882 :     x=(x>>2)+(y>>2)+1;
      47             :   else
      48   250629641 :     x=(x-y)>>2;                /* x!=0 */
      49   513295523 :   while (!(x&1)) x>>=1;
      50   513295523 :   if (x==1) return 1;
      51   477263029 :   if (x==y) return y;
      52   464758205 :   else if (x>y) goto xislarger;
      53             : 
      54   304750467 :   goto yislarger;
      55             : }
      56             : /* Gotos are useful, and Programming is an Art.  D.E.Knuth. */
      57             : /* PS: Of course written with Dijkstra's lessons firmly in mind... --GN */
      58             : 
      59             : /* at least one of a or b is odd, return gcd(a,b) */
      60             : INLINE ulong
      61   187008288 : mygcduodd(ulong a, ulong b)
      62             : {
      63             :   ulong c;
      64   187008288 :   if (b&1)
      65             :   {
      66   100403726 :     if (a==1 || b==1)
      67    18519849 :       c = 1;
      68             :     else
      69    81883877 :      c = gcduodd(a, b);
      70             :   }
      71             :   else
      72             :   {
      73    86604562 :     if (a==1)
      74    24879349 :       c = 1;
      75             :     else
      76    61725213 :       c = gcduodd(b, a);
      77             :   }
      78   186765859 :   return c;
      79             : }
      80             : 
      81             : /* modified right shift binary algorithm with at most one division */
      82             : ulong
      83   164418161 : ugcd(ulong a,ulong b)
      84             : {
      85             :   long v;
      86   164418161 :   if (!b) return a;
      87   162081517 :   if (!a) return b;
      88   151285907 :   if (a>b) { a %= b; if (!a) return b; }
      89    64952500 :   else     { b %= a; if (!b) return a; }
      90   110618515 :   v = vals(a|b);
      91   110705636 :   return mygcduodd(a>>v, b>>v) << v;
      92             : }
      93             : long
      94     7304909 : cgcd(long a,long b) { return (long)ugcd(labs(a), labs(b)); }
      95             : 
      96             : /* For gcdii(): assume a>b>0, return gcd(a,b) as a GEN */
      97             : static GEN
      98   180393421 : igcduu(ulong a, ulong b)
      99             : {
     100             :   long v;
     101   180393421 :   a %= b; if (!a) return utoipos(b);
     102    75878745 :   v = vals(a|b);
     103    75878745 :   return utoipos( mygcduodd(a>>v, b>>v) << v );
     104             : }
     105             : 
     106             : /*Warning: overflows silently if lcm does not fit*/
     107             : ulong
     108     2179724 : ulcm(ulong a, ulong b)
     109             : {
     110     2179724 :   ulong d = ugcd(a,b);
     111     2179724 :   if (!d) return 0;
     112     2179724 :   return d == 1? a*b: a*(b/d);
     113             : }
     114             : long
     115           0 : clcm(long a,long b) { return ulcm(labs(a), labs(b)); }
     116             : 
     117             : /********************************************************************/
     118             : /**                                                                **/
     119             : /**               INTEGER EXTENDED GCD  (AND INVMOD)               **/
     120             : /**                                                                **/
     121             : /********************************************************************/
     122             : 
     123             : /* GN 1998Oct25, originally developed in January 1998 under 2.0.4.alpha,
     124             :  * in the context of trying to improve elliptic curve cryptosystem attacking
     125             :  * algorithms.  2001Jan02 -- added bezout() functionality.
     126             :  *
     127             :  * Two basic ideas - (1) avoid many integer divisions, especially when the
     128             :  * quotient is 1 (which happens more than 40% of the time).  (2) Use Lehmer's
     129             :  * trick as modified by Jebelean of extracting a couple of words' worth of
     130             :  * leading bits from both operands, and compute partial quotients from them
     131             :  * as long as we can be sure of their values.  The Jebelean modifications
     132             :  * consist in reliable inequalities from which we can decide fast whether
     133             :  * to carry on or to return to the outer loop, and in re-shifting after the
     134             :  * first word's worth of bits has been used up.  All of this is described
     135             :  * in R. Lercier's these [pp148-153 & 163f.], except his outer loop isn't
     136             :  * quite right  (the catch-up divisions needed when one partial quotient is
     137             :  * larger than a word are missing).
     138             :  *
     139             :  * The API consists of invmod() and bezout() below;  the single-word routines
     140             :  * xgcduu and xxgcduu may be called directly if desired;  lgcdii() probably
     141             :  * doesn't make much sense out of context.
     142             :  *
     143             :  * The whole lot is a factor 6 .. 8 faster on word-sized operands, and asym-
     144             :  * ptotically about a factor 2.5 .. 3, depending on processor architecture,
     145             :  * than the naive continued-division code.  Unfortunately, thanks to the
     146             :  * unrolled loops and all, the code is a bit lengthy.
     147             :  */
     148             : 
     149             : /*==================================
     150             :  * xgcduu(d,d1,f,v,v1,s)
     151             :  * xxgcduu(d,d1,f,u,u1,v,v1,s)
     152             :  * rgcduu(d,d1,vmax,u,u1,v,v1,s)
     153             :  *==================================*/
     154             : /*
     155             :  * Fast `final' extended gcd algorithm, acting on two ulongs.  Ideally this
     156             :  * should be replaced with assembler versions wherever possible.  The present
     157             :  * code essentially does `subtract, compare, and possibly divide' at each step,
     158             :  * which is reasonable when hardware division (a) exists, (b) is a bit slowish
     159             :  * and (c) does not depend a lot on the operand values (as on i486).  When
     160             :  * wordsize division is in fact an assembler routine based on subtraction,
     161             :  * this strategy may not be the most efficient one.
     162             :  *
     163             :  * xxgcduu() should be called with  d > d1 > 0, returns gcd(d,d1), and assigns
     164             :  * the usual signless cont.frac. recurrence matrix to [u, u1; v, v1]  (i.e.,
     165             :  * the product of all the [0, 1; 1 q_j] where the leftmost factor arises from
     166             :  * the quotient of the first division step),  and the information about the
     167             :  * implied signs to s  (-1 when an odd number of divisions has been done,
     168             :  * 1 otherwise).  xgcduu() is exactly the same except that u,u1 are not com-
     169             :  * puted (and not returned, of course).
     170             :  *
     171             :  * The input flag f should be set to 1 if we know in advance that gcd(d,d1)==1
     172             :  * (so we can stop the chain division one step early:  as soon as the remainder
     173             :  * equals 1).  Use this when you intend to use only what would be v, or only
     174             :  * what would be u and v, after that final division step, but not u1 and v1.
     175             :  * With the flag in force and thus without that final step, the interesting
     176             :  * quantity/ies will still sit in [u1 and] v1, of course.
     177             :  *
     178             :  * For computing the inverse of a single-word INTMOD known to exist, pass f=1
     179             :  * to xgcduu(), and obtain the result from s and v1.  (The routine does the
     180             :  * right thing when d1==1 already.)  For finishing a multiword modinv known
     181             :  * to exist, pass f=1 to xxgcduu(), and multiply the returned matrix  (with
     182             :  * properly adjusted signs)  onto the values v' and v1' previously obtained
     183             :  * from the multiword division steps.  Actually, just take the scalar product
     184             :  * of [v',v1'] with [u1,-v1], and change the sign if s==-1.  (If the final
     185             :  * step had been carried out, it would be [-u,v], and s would also change.)
     186             :  * For reducing a rational number to lowest terms, pass f=0 to xgcduu().
     187             :  * Finally, f=0 with xxgcduu() is useful for Bezout computations.
     188             :  * [Harrumph.  In the above prescription, the sign turns out to be precisely
     189             :  * wrong.]
     190             :  * (It is safe for invmod() to call xgcduu() with f=1, because f&1 doesn't
     191             :  * make a difference when gcd(d,d1)>1.  The speedup is negligible.)
     192             :  *
     193             :  * In principle, when gcd(d,d1) is known to be 1, it is straightforward to
     194             :  * recover the final u,u1 given only v,v1 and s.  However, it probably isn't
     195             :  * worthwhile, as it trades a few multiplications for a division.
     196             :  *
     197             :  * Note that these routines do not know and do not need to know about the
     198             :  * PARI stack.
     199             :  *
     200             :  * Added 2001Jan15:
     201             :  * rgcduu() is a variant of xxgcduu() which does not have f  (the effect is
     202             :  * that of f=0),  but instead has a ulong vmax parameter, for use in rational
     203             :  * reconstruction below.  It returns when v1 exceeds vmax;  v will never
     204             :  * exceed vmax.  (vmax=0 is taken as a synonym of ULONG_MAX i.e. unlimited,
     205             :  * in which case rgcduu behaves exactly like xxgcduu with f=0.)  The return
     206             :  * value of rgcduu() is typically meaningless;  the interesting part is the
     207             :  * matrix.
     208             :  */
     209             : 
     210             : ulong
     211   208263590 : xgcduu(ulong d, ulong d1, int f, ulong* v, ulong* v1, long *s)
     212             : {
     213             :   ulong xv,xv1, xs, q,res;
     214             :   LOCAL_HIREMAINDER;
     215             : 
     216             :   /* The above blurb contained a lie.  The main loop always stops when d1
     217             :    * has become equal to 1.  If (d1 == 1 && !(f&1)) after the loop, we do
     218             :    * the final `division' of d by 1 `by hand' as it were.
     219             :    *
     220             :    * The loop has already been unrolled once.  Aggressive optimization could
     221             :    * well lead to a totally unrolled assembler version...
     222             :    *
     223             :    * On modern x86 architectures, this loop is a pig anyway.  The division
     224             :    * instruction always puts its result into the same pair of registers, and
     225             :    * we always want to use one of them straight away, so pipeline performance
     226             :    * will suck big time.  An assembler version should probably do a first loop
     227             :    * computing and storing all the quotients -- their number is bounded in
     228             :    * advance -- and then assembling the matrix in a second pass.  On other
     229             :    * architectures where we can cycle through four or so groups of registers
     230             :    * and exploit a fast ALU result-to-operand feedback path, this is much less
     231             :    * of an issue.  (Intel sucks.  See http://www.x86.org/ ...)
     232             :    */
     233   208263590 :   xs = res = 0;
     234   208263590 :   xv = 0UL; xv1 = 1UL;
     235  1443726754 :   while (d1 > 1UL)
     236             :   {
     237  1130837356 :     d -= d1;                        /* no need to use subll */
     238  1130837356 :     if (d >= d1)
     239             :     {
     240   619384233 :       hiremainder = 0; q = 1 + divll(d,d1); d = hiremainder;
     241   619384233 :       xv += q * xv1;
     242             :     }
     243             :     else
     244   511453123 :       xv += xv1;
     245             :                                 /* possible loop exit */
     246  1130837356 :     if (d <= 1UL) { xs=1; break; }
     247             :                                 /* repeat with inverted roles */
     248  1027199574 :     d1 -= d;
     249  1027199574 :     if (d1 >= d)
     250             :     {
     251   584886385 :       hiremainder = 0; q = 1 + divll(d1,d); d1 = hiremainder;
     252   584886385 :       xv1 += q * xv;
     253             :     }
     254             :     else
     255   442313189 :       xv1 += xv;
     256             :   } /* while */
     257             : 
     258   208263590 :   if (!(f&1))                        /* division by 1 postprocessing if needed */
     259             :   {
     260     3736042 :     if (xs && d==1)
     261      573462 :     { xv1 += d1 * xv; xs = 0; res = 1UL; }
     262     3162580 :     else if (!xs && d1==1)
     263      434553 :     { xv += d * xv1; xs = 1; res = 1UL; }
     264             :   }
     265             : 
     266   208263590 :   if (xs)
     267             :   {
     268   108590810 :     *s = -1; *v = xv1; *v1 = xv;
     269   108590810 :     return (res ? res : (d==1 ? 1UL : d1));
     270             :   }
     271             :   else
     272             :   {
     273    99672780 :     *s = 1; *v = xv; *v1 = xv1;
     274    99672780 :     return (res ? res : (d1==1 ? 1UL : d));
     275             :   }
     276             : }
     277             : 
     278             : 
     279             : ulong
     280   169273295 : xxgcduu(ulong d, ulong d1, int f,
     281             :         ulong* u, ulong* u1, ulong* v, ulong* v1, long *s)
     282             : {
     283             :   ulong xu,xu1, xv,xv1, xs, q,res;
     284             :   LOCAL_HIREMAINDER;
     285             : 
     286   169273295 :   xs = res = 0;
     287   169273295 :   xu = xv1 = 1UL;
     288   169273295 :   xu1 = xv = 0UL;
     289   510569018 :   while (d1 > 1UL)
     290             :   {
     291   256242046 :     d -= d1;                        /* no need to use subll */
     292   256242046 :     if (d >= d1)
     293             :     {
     294   148139444 :       hiremainder = 0; q = 1 + divll(d,d1); d = hiremainder;
     295   148139444 :       xv += q * xv1;
     296   148139444 :       xu += q * xu1;
     297             :     }
     298             :     else
     299   108102602 :     { xv += xv1; xu += xu1; }
     300             :                                 /* possible loop exit */
     301   256242046 :     if (d <= 1UL) { xs=1; break; }
     302             :                                 /* repeat with inverted roles */
     303   172022428 :     d1 -= d;
     304   172022428 :     if (d1 >= d)
     305             :     {
     306    94363324 :       hiremainder = 0; q = 1 + divll(d1,d); d1 = hiremainder;
     307    94363324 :       xv1 += q * xv;
     308    94363324 :       xu1 += q * xu;
     309             :     }
     310             :     else
     311    77659104 :     { xv1 += xv; xu1 += xu; }
     312             :   } /* while */
     313             : 
     314   169273295 :   if (!(f&1))                        /* division by 1 postprocessing if needed */
     315             :   {
     316   166649754 :     if (xs && d==1)
     317             :     {
     318    48027265 :       xv1 += d1 * xv;
     319    48027265 :       xu1 += d1 * xu;
     320    48027265 :       xs = 0; res = 1UL;
     321             :     }
     322   118622489 :     else if (!xs && d1==1)
     323             :     {
     324    73346403 :       xv += d * xv1;
     325    73346403 :       xu += d * xu1;
     326    73346403 :       xs = 1; res = 1UL;
     327             :     }
     328             :   }
     329             : 
     330   169273295 :   if (xs)
     331             :   {
     332   109538780 :     *s = -1; *u = xu1; *u1 = xu; *v = xv1; *v1 = xv;
     333   109538780 :     return (res ? res : (d==1 ? 1UL : d1));
     334             :   }
     335             :   else
     336             :   {
     337    59734515 :     *s = 1; *u = xu; *u1 = xu1; *v = xv; *v1 = xv1;
     338    59734515 :     return (res ? res : (d1==1 ? 1UL : d));
     339             :   }
     340             : }
     341             : 
     342             : ulong
     343      739103 : rgcduu(ulong d, ulong d1, ulong vmax,
     344             :        ulong* u, ulong* u1, ulong* v, ulong* v1, long *s)
     345             : {
     346      739103 :   ulong xu,xu1, xv,xv1, xs, q, res=0;
     347      739103 :   int f = 0;
     348             :   LOCAL_HIREMAINDER;
     349             : 
     350      739103 :   if (vmax == 0) vmax = ULONG_MAX;
     351      739103 :   xs = res = 0;
     352      739103 :   xu = xv1 = 1UL;
     353      739103 :   xu1 = xv = 0UL;
     354     3370033 :   while (d1 > 1UL)
     355             :   {
     356     2509990 :     d -= d1;                        /* no need to use subll */
     357     2509990 :     if (d >= d1)
     358             :     {
     359     1391381 :       hiremainder = 0; q = 1 + divll(d,d1); d = hiremainder;
     360     1391381 :       xv += q * xv1;
     361     1391381 :       xu += q * xu1;
     362             :     }
     363             :     else
     364     1118609 :     { xv += xv1; xu += xu1; }
     365             :                                 /* possible loop exit */
     366     2509990 :     if (xv > vmax) { f=xs=1; break; }
     367     2258708 :     if (d <= 1UL) { xs=1; break; }
     368             :                                 /* repeat with inverted roles */
     369     2153919 :     d1 -= d;
     370     2153919 :     if (d1 >= d)
     371             :     {
     372     1276473 :       hiremainder = 0; q = 1 + divll(d1,d); d1 = hiremainder;
     373     1276473 :       xv1 += q * xv;
     374     1276473 :       xu1 += q * xu;
     375             :     }
     376             :     else
     377      877446 :     { xv1 += xv; xu1 += xu; }
     378             :                                 /* possible loop exit */
     379     2153919 :     if (xv1 > vmax) { f=1; break; }
     380             :   } /* while */
     381             : 
     382      739103 :   if (!(f&1))                        /* division by 1 postprocessing if needed */
     383             :   {
     384      225729 :     if (xs && d==1)
     385             :     {
     386      104789 :       xv1 += d1 * xv;
     387      104789 :       xu1 += d1 * xu;
     388      104789 :       xs = 0; res = 1UL;
     389             :     }
     390      120940 :     else if (!xs && d1==1)
     391             :     {
     392      120940 :       xv += d * xv1;
     393      120940 :       xu += d * xu1;
     394      120940 :       xs = 1; res = 1UL;
     395             :     }
     396             :   }
     397             : 
     398      739103 :   if (xs)
     399             :   {
     400      372222 :     *s = -1; *u = xu1; *u1 = xu; *v = xv1; *v1 = xv;
     401      372222 :     return (res ? res : (d==1 ? 1UL : d1));
     402             :   }
     403             :   else
     404             :   {
     405      366881 :     *s = 1; *u = xu; *u1 = xu1; *v = xv; *v1 = xv1;
     406      366881 :     return (res ? res : (d1==1 ? 1UL : d));
     407             :   }
     408             : }
     409             : 
     410             : /*==================================
     411             :  * cbezout(a,b,uu,vv)
     412             :  *==================================
     413             :  * Same as bezout() but for C longs.
     414             :  *    Return g = gcd(a,b) >= 0, and assign longs u,v through pointers uu,vv
     415             :  *    such that g = u*a + v*b.
     416             :  * Special cases:
     417             :  *    a == b == 0 ==> pick u=1, v=0 (and return 1, surprisingly)
     418             :  *    a != 0 == b ==> keep v=0
     419             :  *    a == 0 != b ==> keep u=0
     420             :  *    |a| == |b| != 0 ==> keep u=0, set v=+-1
     421             :  * Assignments through uu,vv happen unconditionally;  non-NULL pointers
     422             :  * _must_ be used.
     423             :  */
     424             : long
     425      241696 : cbezout(long a,long b,long *uu,long *vv)
     426             : {
     427             :   long s,*t;
     428      241696 :   ulong d = labs(a), d1 = labs(b);
     429             :   ulong r,u,u1,v,v1;
     430             : 
     431      241696 :   if (!b)
     432             :   {
     433         973 :     *vv=0L;
     434         973 :     if (!a) { *uu=1L; return 0L; }
     435         973 :     *uu = a < 0 ? -1L : 1L;
     436         973 :     return (long)d;
     437             :   }
     438      240723 :   else if (!a || (d == d1))
     439             :   {
     440       43883 :     *uu = 0L; *vv = b < 0 ? -1L : 1L;
     441       43883 :     return (long)d1;
     442             :   }
     443      196840 :   else if (d == 1)                /* frequently used by nfinit */
     444             :   {
     445      104853 :     *uu = a; *vv = 0L;
     446      104853 :     return 1L;
     447             :   }
     448       91987 :   else if (d < d1)
     449             :   {
     450             : /* bug in gcc-2.95.3:
     451             :  * s = a; a = b; b = s; produces wrong result a = b. This is OK:  */
     452       81886 :     { long _x = a; a = b; b = _x; }        /* in order to keep the right signs */
     453       81886 :     r = d; d = d1; d1 = r;
     454       81886 :     t = uu; uu = vv; vv = t;
     455             :   }
     456             :   /* d > d1 > 0 */
     457       91987 :   r = xxgcduu(d, d1, 0, &u, &u1, &v, &v1, &s);
     458       91987 :   if (s < 0)
     459             :   {
     460       52535 :     *uu = a < 0 ? (long)u : -(long)u;
     461       52535 :     *vv = b < 0 ? -(long)v : (long)v;
     462             :   }
     463             :   else
     464             :   {
     465       39452 :     *uu = a < 0 ? -(long)u : (long)u;
     466       39452 :     *vv = b < 0 ? (long)v : -(long)v;
     467             :   }
     468       91987 :   return (long)r;
     469             : }
     470             : 
     471             : /*==================================
     472             :  * lgcdii(d,d1,u,u1,v,v1,vmax)
     473             :  *==================================*/
     474             : /* Lehmer's partial extended gcd algorithm, acting on two t_INT GENs.
     475             :  *
     476             :  * Tries to determine, using the leading 2*BITS_IN_LONG significant bits of d
     477             :  * and a quantity of bits from d1 obtained by a shift of the same displacement,
     478             :  * as many partial quotients of d/d1 as possible, and assigns to [u,u1;v,v1]
     479             :  * the product of all the [0,1; 1,qj] thus obtained, where the leftmost
     480             :  * factor arises from the quotient of the first division step.
     481             :  *
     482             :  * For use in rational reconstruction, vmax can be given a nonzero value.
     483             :  * In this case, we will return early as soon as v1 > vmax (i.e. v <= vmax)
     484             :  *
     485             :  * MUST be called with  d > d1 > 0, and with  d occupying more than one
     486             :  * significant word.  Returns the number of reduction/swap steps carried out,
     487             :  * possibly zero, or under certain conditions minus that number.  When the
     488             :  * return value is nonzero, the caller should use the returned recurrence
     489             :  * matrix to update its own copies of d,d1.  When the return value is
     490             :  * non-positive, and the latest remainder after updating turns out to be
     491             :  * nonzero, the caller should at once attempt a full division, rather than
     492             :  * trying lgcdii() again -- this typically happens when we are about to
     493             :  * encounter a quotient larger than half a word. (This is not detected
     494             :  * infallibly -- after a positive return value, it is possible that the next
     495             :  * stage will end up needing a full division.  After a negative return value,
     496             :  * however, this is certain, and should be acted upon.)
     497             :  *
     498             :  * (The sign information, for which xgcduu() has its return argument s, is now
     499             :  * implicit in the LSB of our return value, and the caller may take advantage
     500             :  * of the fact that a return value of +-1 implies u==0,u1==v==1  [only v1 pro-
     501             :  * vides interesting information in this case].  One might also use the fact
     502             :  * that if the return value is +-2, then u==1, but this is rather marginal.)
     503             :  *
     504             :  * If it was not possible to determine even the first quotient, either because
     505             :  * we're too close to an integer quotient or because the quotient would be
     506             :  * larger than one word  (if the `leading digit' of d1 after shifting is all
     507             :  * zeros), we return 0 and do not assign anything to the last four args.
     508             :  *
     509             :  * The division chain might even run to completion. It is up to the caller to
     510             :  * detect this case. This routine does not change d or d1; this is also up to
     511             :  * the caller */
     512             : int
     513     7513464 : lgcdii(ulong* d, ulong* d1, ulong* u, ulong* u1, ulong* v, ulong* v1,
     514             :        ulong vmax)
     515             : {
     516             :   /* Strategy:  (1) Extract/shift most significant bits.  We assume that d
     517             :    * has at least two significant words, but we can cope with a one-word d1.
     518             :    * Let dd,dd1 be the most significant dividend word and matching part of the
     519             :    * divisor.
     520             :    * (2) Check for overflow on the first division.  For our purposes, this
     521             :    * happens when the upper half of dd1 is zero.  (Actually this is detected
     522             :    * during extraction.)
     523             :    * (3) Get a fix on the first quotient.  We compute q = floor(dd/dd1), which
     524             :    * is an upper bound for floor(d/d1), and which gives the true value of the
     525             :    * latter if (and-almost-only-if) the remainder dd' = dd-q*dd1 is >= q.
     526             :    * (If it isn't, we give up.  This is annoying because the subsequent full
     527             :    * division will repeat some work already done, but it happens infrequently.
     528             :    * Doing the extra-bit-fetch in this case would be awkward.)
     529             :    * (4) Finish initializations.
     530             :    *
     531             :    * The remainder of the action is comparatively boring... The main loop has
     532             :    * been unrolled once (so we don't swap things and we can apply Jebelean's
     533             :    * termination conditions which alternatingly take two different forms during
     534             :    * successive iterations).  When we first run out of sufficient bits to form
     535             :    * a quotient, and have an extra word of each operand, we pull out two whole
     536             :    * word's worth of dividend bits, and divisor bits of matching significance;
     537             :    * to these we apply our partial matrix (disregarding overflow because the
     538             :    * result mod 2^(2*BITS_IN_LONG) will in fact give the correct values), and
     539             :    * re-extract one word's worth of the current dividend and a matching amount
     540             :    * of divisor bits.  The affair will normally terminate with matrix entries
     541             :    * just short of a whole word.  (We terminate the inner loop before these can
     542             :    * possibly overflow.) */
     543             :   ulong dd,dd1,ddlo,dd1lo, sh,shc; /* `digits', shift count */
     544             :   ulong xu,xu1, xv,xv1, q,res; /* recurrences, partial quotient, count */
     545             :   ulong tmp0,tmp1,tmp2,tmpd,tmpu,tmpv; /* temps */
     546             :   ulong dm1, d1m1;
     547             :   long ld, ld1, lz;
     548     7513464 :   int skip = 0;
     549             :   LOCAL_OVERFLOW;
     550             :   LOCAL_HIREMAINDER;
     551             : 
     552             :   /* following is just for convenience: vmax==0 means no bound */
     553     7513464 :   if (vmax == 0) vmax = ULONG_MAX;
     554     7513464 :   ld = lgefint(d); ld1 = lgefint(d1); lz = ld - ld1; /* >= 0 */
     555     7513464 :   if (lz > 1) return 0; /* rare */
     556             : 
     557     7449511 :   d = int_MSW(d);  dm1  = *int_precW(d);
     558     7449511 :   d1 = int_MSW(d1);d1m1 = *int_precW(d1);
     559     7449511 :   dd1lo = 0; /* unless we find something better */
     560     7449511 :   sh = bfffo(*d);
     561             : 
     562     7449511 :   if (sh)
     563             :   { /* do the shifting */
     564     7157977 :     shc = BITS_IN_LONG - sh;
     565     7157977 :     if (lz)
     566             :     { /* dividend longer than divisor */
     567     1691763 :       dd1 = (*d1 >> shc);
     568     1691763 :       if (!(HIGHMASK & dd1)) return 0;  /* overflow detected */
     569     1033342 :       if (ld1 > 3)
     570       93387 :         dd1lo = (*d1 << sh) + (d1m1 >> shc);
     571             :       else
     572      939955 :         dd1lo = (*d1 << sh);
     573             :     }
     574             :     else
     575             :     { /* dividend and divisor have the same length */
     576     5466214 :       dd1 = (*d1 << sh);
     577     5466214 :       if (!(HIGHMASK & dd1)) return 0;
     578     5444348 :       if (ld1 > 3)
     579             :       {
     580     5444348 :         dd1 += (d1m1 >> shc);
     581     5444348 :         if (ld1 > 4)
     582     2919460 :           dd1lo = (d1m1 << sh) + (*int_precW(int_precW(d1)) >> shc);
     583             :         else
     584     2524888 :           dd1lo = (d1m1 << sh);
     585             :       }
     586             :     }
     587             :     /* following lines assume d to have 2 or more significant words */
     588     6477690 :     dd = (*d << sh) + (dm1 >> shc);
     589     6477690 :     if (ld > 4)
     590     3012847 :       ddlo = (dm1 << sh) + (*int_precW(int_precW(d)) >> shc);
     591             :     else
     592     3464843 :       ddlo = (dm1 << sh);
     593             :   }
     594             :   else
     595             :   { /* no shift needed */
     596      291534 :     if (lz) return 0; /* dividend longer than divisor: overflow */
     597      285143 :     dd1 = *d1;
     598      285143 :     if (!(HIGHMASK & dd1)) return 0;
     599      284386 :     if(ld1 > 3) dd1lo = d1m1;
     600             :     /* assume again that d has another significant word */
     601      284386 :     dd = *d; ddlo = dm1;
     602             :   }
     603             : 
     604             :   /* First subtraction/division stage.  (If a subtraction initially suffices,
     605             :    * we don't divide at all.)  If a Jebelean condition is violated, and we
     606             :    * can't fix it even by looking at the low-order bits in ddlo,dd1lo, we
     607             :    * give up and ask for a full division.  Otherwise we commit the result,
     608             :    * possibly deciding to re-shift immediately afterwards.
     609             :    */
     610     6762076 :   dd -= dd1;
     611     6762076 :   if (dd < dd1)
     612             :   { /* first quotient known to be == 1 */
     613     2148527 :     xv1 = 1UL;
     614     2148527 :     if (!dd) /* !(Jebelean condition), extraspecial case */
     615             :     { /* note this can actually happen...  Now q==1 is known, but we underflow
     616             :        * already. OTOH we've just shortened d by a whole word. Thus we are
     617             :        * happy and return. */
     618      216402 :       *u = 0; *v = *u1 = *v1 = 1UL;
     619      216402 :       return -1; /* Next step will be a full division. */
     620             :     }
     621             :   }
     622             :   else
     623             :   { /* division indicated */
     624     4613549 :     hiremainder = 0;
     625     4613549 :     xv1 = 1 + divll(dd, dd1); /* xv1: alternative spelling of `q', here ;) */
     626     4613549 :     dd = hiremainder;
     627     4613549 :     if (dd < xv1) /* !(Jebelean cond'), non-extra special case */
     628             :     { /* Attempt to complete the division using the less significant bits,
     629             :        * before skipping right past the 1st loop to the reshift stage. */
     630      400040 :       ddlo = subll(ddlo, mulll(xv1, dd1lo));
     631      400040 :       dd = subllx(dd, hiremainder);
     632             : 
     633             :       /* If we now have an overflow, q was too large. Thanks to our decision
     634             :        * not to get here unless the original dd1 had bits set in the upper half
     635             :        * of the word, we now know that the correct quotient is in fact q-1. */
     636      400040 :       if (overflow)
     637             :       {
     638      103170 :         xv1--;
     639      103170 :         ddlo = addll(ddlo,dd1lo);
     640      103170 :         dd = addllx(dd,dd1); /* overflows again which cancels the borrow */
     641             :         /* ...and fall through to skip=1 below */
     642             :       }
     643             :       else
     644             :       /* Test Jebelean condition anew, at this point using _all_ the extracted
     645             :        * bits we have.  This is clutching at straws; we have a more or less
     646             :        * even chance of succeeding this time.  Note that if we fail, we really
     647             :        * do not know whether the correct quotient would have been q or some
     648             :        * smaller value. */
     649      296870 :         if (!dd && ddlo < xv1) return 0;
     650             : 
     651             :       /* Otherwise, we now know that q is correct, but we cannot go into the
     652             :        * 1st loop.  Raise a flag so we'll remember to skip past the loop.
     653             :        * Get here also after the q-1 adjustment case. */
     654      208309 :       skip = 1;
     655             :     } /* if !(Jebelean) then */
     656             :   }
     657     6353943 :   res = 1;
     658     6353943 :   if (xv1 > vmax)
     659             :   { /* gone past the bound already */
     660       32103 :     *u = 0UL; *u1 = 1UL; *v = 1UL; *v1 = xv1;
     661       32103 :     return res;
     662             :   }
     663     6321840 :   xu = 0UL; xv = xu1 = 1UL;
     664             : 
     665             :   /* Some invariants from here across the first loop:
     666             :    *
     667             :    * At this point, and again after we are finished with the first loop and
     668             :    * subsequent conditional, a division and the attached update of the
     669             :    * recurrence matrix have just been carried out completely.  The matrix
     670             :    * xu,xu1;xv,xv1 has been initialized (or updated, possibly with permuted
     671             :    * columns), and the current remainder == next divisor (dd at the moment)
     672             :    * is nonzero (it might be zero here, but then skip will have been set).
     673             :    *
     674             :    * After the first loop, or when skip is set already, it will also be the
     675             :    * case that there aren't sufficiently many bits to continue without re-
     676             :    * shifting.  If the divisor after reshifting is zero, or indeed if it
     677             :    * doesn't have more than half a word's worth of bits, we will have to
     678             :    * return at that point.  Otherwise, we proceed into the second loop.
     679             :    *
     680             :    * Furthermore, when we reach the re-shift stage, dd:ddlo and dd1:dd1lo will
     681             :    * already reflect the result of applying the current matrix to the old
     682             :    * ddorig:ddlo and dd1orig:dd1lo.  (For the first iteration above, this
     683             :    * was easy to achieve, and we didn't even need to peek into the (now
     684             :    * no longer existent!) saved words.  After the loop, we'll stop for a
     685             :    * moment to merge in the ddlo,dd1lo contributions.)
     686             :    *
     687             :    * Note that after the 1st division, even an a priori quotient of 1 cannot be
     688             :    * trusted until we've checked Jebelean's condition: it might be too small */
     689             : 
     690     6321840 :   if (!skip)
     691             :   {
     692             :     for(;;)
     693             :     { /* First half of loop divides dd into dd1, and leaves the recurrence
     694             :        * matrix xu,...,xv1 groomed the wrong way round (xu,xv will be the newer
     695             :        * entries) when successful. */
     696    89611866 :       tmpd = dd1 - dd;
     697    47863521 :       if (tmpd < dd)
     698             :       { /* quotient suspected to be 1 */
     699    20156756 :         tmpu = xu + xu1; /* cannot overflow -- everything bounded by
     700             :                           * the original dd during first loop */
     701    20156756 :         tmpv = xv + xv1;
     702             :       }
     703             :       else
     704             :       { /* division indicated */
     705    27706765 :         hiremainder = 0;
     706    27706765 :         q = 1 + divll(tmpd, dd);
     707    27706765 :         tmpd = hiremainder;
     708    27706765 :         tmpu = xu + q*xu1; /* can't overflow, but may need to be undone */
     709    27706765 :         tmpv = xv + q*xv1;
     710             :       }
     711             : 
     712    47863521 :       tmp0 = addll(tmpv, xv1);
     713    94500637 :       if ((tmpd < tmpu) || overflow ||
     714    46637116 :           (dd - tmpd < tmp0)) /* !(Jebelean cond.) */
     715             :         break; /* skip ahead to reshift stage */
     716             :       else
     717             :       { /* commit dd1, xu, xv */
     718    44878966 :         res++;
     719    44878966 :         dd1 = tmpd; xu = tmpu; xv = tmpv;
     720    44878966 :         if (xv > vmax) { *u = xu1; *u1 = xu; *v = xv1; *v1 = xv; return res; }
     721             :       }
     722             : 
     723             :       /* Second half of loop divides dd1 into dd, and the matrix returns to its
     724             :        * normal arrangement. */
     725    44749169 :       tmpd = dd - dd1;
     726    44749169 :       if (tmpd < dd1)
     727             :       { /* quotient suspected to be 1 */
     728    17959263 :         tmpu = xu1 + xu; /* cannot overflow */
     729    17959263 :         tmpv = xv1 + xv;
     730             :       }
     731             :       else
     732             :       { /* division indicated */
     733    26789906 :         hiremainder = 0;
     734    26789906 :         q = 1 + divll(tmpd, dd1);
     735    26789906 :         tmpd = hiremainder;
     736    26789906 :         tmpu = xu1 + q*xu;
     737    26789906 :         tmpv = xv1 + q*xv;
     738             :       }
     739             : 
     740    44749169 :       tmp0 = addll(tmpu, xu);
     741    86931137 :       if ((tmpd < tmpv) || overflow ||
     742    42181968 :           (dd1 - tmpd < tmp0)) /* !(Jebelean cond.) */
     743             :         break; /* skip ahead to reshift stage */
     744             :       else
     745             :       { /* commit dd, xu1, xv1 */
     746    41835358 :         res++;
     747    41835358 :         dd = tmpd; xu1 = tmpu; xv1 = tmpv;
     748    41835358 :         if (xv1 > vmax) { *u = xu; *u1 = xu1; *v = xv; *v1 = xv1; return res; }
     749             :       }
     750             :     } /* end of first loop */
     751             : 
     752             :     /* Intermezzo: update dd:ddlo, dd1:dd1lo.  (But not if skip is set.) */
     753     5898366 :     if (res&1)
     754             :     { /* after failed division in 1st half of loop:
     755             :        * [dd1:dd1lo,dd:ddlo] = [ddorig:ddlo,dd1orig:dd1lo]
     756             :        *                       * [ -xu, xu1 ; xv, -xv1 ]
     757             :        * Actually, we only multiply [ddlo,dd1lo] onto the matrix and add the
     758             :        * high-order remainders + overflows onto [dd1,dd] */
     759     2984555 :       tmp1 = mulll(ddlo, xu); tmp0 = hiremainder;
     760     2984555 :       tmp1 = subll(mulll(dd1lo,xv), tmp1);
     761     2984555 :       dd1 += subllx(hiremainder, tmp0);
     762     2984555 :       tmp2 = mulll(ddlo, xu1); tmp0 = hiremainder;
     763     2984555 :       ddlo = subll(tmp2, mulll(dd1lo,xv1));
     764     2984555 :       dd += subllx(tmp0, hiremainder);
     765     2984555 :       dd1lo = tmp1;
     766             :     }
     767             :     else
     768             :     { /* after failed division in 2nd half of loop:
     769             :        * [dd:ddlo,dd1:dd1lo] = [ddorig:ddlo,dd1orig:dd1lo]
     770             :        *                       * [ xu1, -xu ; -xv1, xv ]
     771             :        * Actually, we only multiply [ddlo,dd1lo] onto the matrix and add the
     772             :        * high-order remainders + overflows onto [dd,dd1] */
     773     2913811 :       tmp1 = mulll(ddlo, xu1); tmp0 = hiremainder;
     774     2913811 :       tmp1 = subll(tmp1, mulll(dd1lo,xv1));
     775     2913811 :       dd += subllx(tmp0, hiremainder);
     776     2913811 :       tmp2 = mulll(ddlo, xu); tmp0 = hiremainder;
     777     2913811 :       dd1lo = subll(mulll(dd1lo,xv), tmp2);
     778     2913811 :       dd1 += subllx(hiremainder, tmp0);
     779     2913811 :       ddlo = tmp1;
     780             :     }
     781             :   } /* end of skip-pable section:  get here also, with res==1, when there
     782             :      * was a problem immediately after the very first division. */
     783             : 
     784             :   /* Re-shift.  Note:  the shift count _can_ be zero, viz. under the following
     785             :    * precise conditions:  The original dd1 had its topmost bit set, so the 1st
     786             :    * q was 1, and after subtraction, dd had its topmost bit unset.  If now
     787             :    * dd==0, we'd have taken the return exit already, so we couldn't have got
     788             :    * here.  If not, then it must have been the second division which has gone
     789             :    * amiss  (because dd1 was very close to an exact multiple of the remainder
     790             :    * dd value, so this will be very rare).  At this point, we'd have a fairly
     791             :    * slim chance of fixing things by re-examining dd1:dd1lo vs. dd:ddlo, but
     792             :    * this is not guaranteed to work.  Instead of trying, we return at once.
     793             :    * The caller will see to it that the initial subtraction is re-done using
     794             :    * _all_ the bits of both operands, which already helps, and the next round
     795             :    * will either be a full division  (if dd occupied a halfword or less),  or
     796             :    * another llgcdii() first step.  In the latter case, since we try a little
     797             :    * harder during our first step, we may actually be able to fix the problem,
     798             :    * and get here again with improved low-order bits and with another step
     799             :    * under our belt.  Otherwise we'll have given up above and forced a full-
     800             :    * blown division.
     801             :    *
     802             :    * If res is even, the shift count _cannot_ be zero.  (The first step forces
     803             :    * a zero into the remainder's MSB, and all subsequent remainders will have
     804             :    * inherited it.)
     805             :    *
     806             :    * The re-shift stage exits if the next divisor has at most half a word's
     807             :    * worth of bits.
     808             :    *
     809             :    * For didactic reasons, the second loop will be arranged in the same way
     810             :    * as the first -- beginning with the division of dd into dd1, as if res
     811             :    * was odd.  To cater for this, if res is actually even, we swap things
     812             :    * around during reshifting.  (During the second loop, the parity of res
     813             :    * does not matter;  we know in which half of the loop we are when we decide
     814             :    * to return.) */
     815     6105030 :   if (res&1)
     816             :   { /* after odd number of division(s) */
     817     3191219 :     if (dd1 && (sh = bfffo(dd1)))
     818             :     {
     819     3121093 :       shc = BITS_IN_LONG - sh;
     820     3121093 :       dd = (ddlo >> shc) + (dd << sh);
     821     3121093 :       if (!(HIGHMASK & dd))
     822             :       {
     823      138660 :         *u = xu; *u1 = xu1; *v = xv; *v1 = xv1;
     824      138660 :         return -res; /* full division asked for */
     825             :       }
     826     2982433 :       dd1 = (dd1lo >> shc) + (dd1 << sh);
     827             :     }
     828             :     else
     829             :     { /* time to return: <= 1 word left, or sh==0 */
     830       70126 :       *u = xu; *u1 = xu1; *v = xv; *v1 = xv1;
     831       70126 :       return res;
     832             :     }
     833             :   }
     834             :   else
     835             :   { /* after even number of divisions */
     836     2913811 :     if (dd)
     837             :     {
     838     2913811 :       sh = bfffo(dd); /* > 0 */
     839     2913811 :       shc = BITS_IN_LONG - sh;
     840             :       /* dd:ddlo will become the new dd1, and v.v. */
     841     2913811 :       tmpd = (ddlo >> shc) + (dd << sh);
     842     2913811 :       dd = (dd1lo >> shc) + (dd1 << sh);
     843     2913811 :       dd1 = tmpd;
     844             :       /* This completes the swap; now dd is again the current divisor */
     845     2913811 :       if (HIGHMASK & dd)
     846             :       { /* recurrence matrix is the wrong way round; swap it */
     847     2876836 :         tmp0 = xu; xu = xu1; xu1 = tmp0;
     848     2876836 :         tmp0 = xv; xv = xv1; xv1 = tmp0;
     849             :       }
     850             :       else
     851             :       { /* recurrence matrix is the wrong way round; fix this */
     852       36975 :         *u = xu1; *u1 = xu; *v = xv1; *v1 = xv;
     853       36975 :         return -res;                /* full division asked for */
     854             :       }
     855             :     }
     856             :     else
     857             :     { /* time to return: <= 1 word left */
     858           0 :       *u = xu1; *u1 = xu; *v = xv1; *v1 = xv;
     859           0 :       return res;
     860             :     }
     861             :   } /* end reshift */
     862             : 
     863             :   /* The Second Loop.  Rip-off of the first, but we now check for overflow
     864             :    * in the recurrences.  Returns instead of breaking when we cannot fix the
     865             :    * quotient any longer. */
     866             :   for(;;)
     867             :   { /* First half of loop divides dd into dd1, and leaves the recurrence
     868             :      * matrix xu,...,xv1 groomed the wrong way round (xu,xv will be the newer
     869             :      * entries) when successful */
     870    45625531 :     tmpd = dd1 - dd;
     871    25742400 :     if (tmpd < dd)
     872             :     { /* quotient suspected to be 1 */
     873     8820275 :       tmpu = xu + xu1;
     874     8820275 :       tmpv = addll(xv, xv1); /* xv,xv1 will overflow first */
     875     8820275 :       tmp1 = overflow;
     876             :     }
     877             :     else
     878             :     { /* division indicated */
     879    16922125 :       hiremainder = 0;
     880    16922125 :       q = 1 + divll(tmpd, dd);
     881    16922125 :       tmpd = hiremainder;
     882    16922125 :       tmpu = xu + q*xu1;
     883    16922125 :       tmpv = addll(xv, mulll(q,xv1));
     884    16922125 :       tmp1 = overflow | hiremainder;
     885             :     }
     886             : 
     887    25742400 :     tmp0 = addll(tmpv, xv1);
     888    49988922 :     if ((tmpd < tmpu) || overflow || tmp1 ||
     889    24246522 :         (dd - tmpd < tmp0)) /* !(Jebelean cond.) */
     890             :     { /* The recurrence matrix has not yet been warped... */
     891     3063947 :       *u = xu; *u1 = xu1; *v = xv; *v1 = xv1;
     892     3063947 :       break;
     893             :     }
     894             :     /* commit dd1, xu, xv */
     895    22678453 :     res++;
     896    22678453 :     dd1 = tmpd; xu = tmpu; xv = tmpv;
     897    22678453 :     if (xv > vmax) { *u = xu1; *u1 = xu; *v = xv1; *v1 = xv; return res; }
     898             : 
     899             :     /* Second half of loop divides dd1 into dd, and the matrix returns to its
     900             :      * normal arrangement */
     901    22596674 :     tmpd = dd - dd1;
     902    22596674 :     if (tmpd < dd1)
     903             :     { /* quotient suspected to be 1 */
     904     9610295 :       tmpu = xu1 + xu;
     905     9610295 :       tmpv = addll(xv1, xv);
     906     9610295 :       tmp1 = overflow;
     907             :     }
     908             :     else
     909             :     { /* division indicated */
     910    12986379 :       hiremainder = 0;
     911    12986379 :       q = 1 + divll(tmpd, dd1);
     912    12986379 :       tmpd = hiremainder;
     913    12986379 :       tmpu = xu1 + q*xu;
     914    12986379 :       tmpv = addll(xv1, mulll(q, xv));
     915    12986379 :       tmp1 = overflow | hiremainder;
     916             :     }
     917             : 
     918    22596674 :     tmp0 = addll(tmpu, xu);
     919    42896667 :     if ((tmpd < tmpv) || overflow || tmp1 ||
     920    20299993 :         (dd1 - tmpd < tmp0)) /* !(Jebelean cond.) */
     921             :     { /* The recurrence matrix has not yet been unwarped, so it is
     922             :        * the wrong way round;  fix this. */
     923     2652544 :       *u = xu1; *u1 = xu; *v = xv1; *v1 = xv;
     924     2652544 :       break;
     925             :     }
     926             : 
     927    19944130 :     res++; /* commit dd, xu1, xv1 */
     928    19944130 :     dd = tmpd; xu1 = tmpu; xv1 = tmpv;
     929    19944130 :     if (xv1 > vmax) { *u = xu; *u1 = xu1; *v = xv; *v1 = xv1; return res; }
     930             :   } /* end of second loop */
     931             : 
     932     5716491 :   return res;
     933             : }
     934             : 
     935             : /* 1 / Mod(x,p). Assume x < p */
     936             : ulong
     937   197061251 : Fl_invsafe(ulong x, ulong p)
     938             : {
     939             :   long s;
     940   197061251 :   ulong xv, xv1, g = xgcduu(p, x, 1, &xv, &xv1, &s);
     941   198712112 :   if (g != 1UL) return 0UL;
     942   198267222 :   xv = xv1 % p; if (s < 0) xv = p - xv;
     943   198267222 :   return xv;
     944             : }
     945             : 
     946             : /* result known to be representable as an ulong */
     947             : static ulong
     948     2159133 : lcmuu(ulong a, ulong b) { ulong d = ugcd(a,b); return (a/d) * b; }
     949             : /* assume 0 < x < N; return u in (Z/NZ)^* such that u x = gcd(x,N) (mod N);
     950             :  * set *pd = gcd(x,N) */
     951             : ulong
     952     3736042 : Fl_invgen(ulong x, ulong N, ulong *pd)
     953             : {
     954             :   ulong d, d0, e, v, v1;
     955             :   long s;
     956     3736042 :   *pd = d = xgcduu(N, x, 0, &v, &v1, &s);
     957     3736042 :   if (s > 0) v = N - v;
     958     3736042 :   if (d == 1) return v;
     959             :   /* vx = gcd(x,N) (mod N), v coprime to N/d but need not be coprime to N */
     960     2728027 :   e = N / d;
     961     2728027 :   d0 = u_ppo(d, e); /* d = d0 d1, d0 coprime to N/d, rad(d1) | N/d */
     962     2728027 :   if (d0 == 1) return v;
     963     2159133 :   e = lcmuu(e, d / d0);
     964     2159133 :   return u_chinese_coprime(v, 1, e, d0, e*d0);
     965             : }
     966             : 
     967             : /* 1 / Mod(x,p). Assume x < p */
     968             : ulong
     969   180242305 : Fl_inv(ulong x, ulong p)
     970             : {
     971   180242305 :   ulong xv  = Fl_invsafe(x, p);
     972   180854328 :   if (!xv && p!=1UL) pari_err_INV("Fl_inv", mkintmod(utoi(x), utoi(p)));
     973   180857269 :   return xv;
     974             : }

Generated by: LCOV version 1.13