Code coverage tests

This page documents the degree to which the PARI/GP source code is tested by our public test suite, distributed with the source distribution in directory src/test/. This is measured by the gcov utility; we then process gcov output using the lcov frond-end.

We test a few variants depending on Configure flags on the pari.math.u-bordeaux.fr machine (x86_64 architecture), and agregate them in the final report:

The target is to exceed 90% coverage for all mathematical modules (given that branches depending on DEBUGLEVEL or DEBUGMEM are not covered). This script is run to produce the results below.

LCOV - code coverage report
Current view: top level - kernel/none - gcdll.c (source / functions) Hit Total Coverage
Test: PARI/GP v2.18.0 lcov report (development 29789-eb252ba62d) Lines: 338 340 99.4 %
Date: 2024-12-03 09:08:48 Functions: 14 14 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : #line 2 "../src/kernel/none/gcdll.c"
       2             : /* Copyright (C) 2000  The PARI group.
       3             : 
       4             : This file is part of the PARI/GP package.
       5             : 
       6             : PARI/GP is free software; you can redistribute it and/or modify it under the
       7             : terms of the GNU General Public License as published by the Free Software
       8             : Foundation; either version 2 of the License, or (at your option) any later
       9             : version. It is distributed in the hope that it will be useful, but WITHOUT
      10             : ANY WARRANTY WHATSOEVER.
      11             : 
      12             : Check the License for details. You should have received a copy of it, along
      13             : with the package; see the file 'COPYING'. If not, write to the Free Software
      14             : Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */
      15             : 
      16             : /***********************************************************************/
      17             : /**                                                                   **/
      18             : /**                          GCD                                      **/
      19             : /**                                                                   **/
      20             : /***********************************************************************/
      21             : /* Fast ulong gcd.  Called with y odd; x can be arbitrary (but will most of
      22             :  * the time be smaller than y). */
      23             : 
      24             : /* Gotos are Harmful, and Programming is a Science.  E.W.Dijkstra. */
      25             : INLINE ulong
      26   290634982 : gcduodd(ulong x, ulong y)         /* assume y&1==1, y > 1 */
      27             : {
      28   290634982 :   if (!x) return y;
      29             :   /* fix up x */
      30   681942003 :   while (!(x&1)) x>>=1;
      31   290634982 :   if (x==1) return 1;
      32   227292897 :   if (x==y) return y;
      33   218665956 :   else if (x>y) goto xislarger;/* will be rare, given how we'll use this */
      34             :   /* loop invariants: x,y odd and distinct. */
      35   174606014 :  yislarger:
      36   810530651 :   if ((x^y)&2)                 /* ...01, ...11 or vice versa */
      37   424447623 :     y=(x>>2)+(y>>2)+1;         /* ==(x+y)>>2 except it can't overflow */
      38             :   else                         /* ...01,...01 or ...11,...11 */
      39   386083028 :     y=(y-x)>>2;                /* now y!=0 in either case */
      40  1628475362 :   while (!(y&1)) y>>=1;        /* kill any windfall-gained powers of 2 */
      41   810530651 :   if (y==1) return 1;          /* comparand == return value... */
      42   722210850 :   if (x==y) return y;          /* this and the next is just one comparison */
      43   688181210 :   else if (x<y) goto yislarger;/* else fall through to xislarger */
      44             : 
      45   450213374 :  xislarger:                    /* same as above, seen through a mirror */
      46   702265771 :   if ((x^y)&2)
      47   363156198 :     x=(x>>2)+(y>>2)+1;
      48             :   else
      49   339109573 :     x=(x-y)>>2;                /* x!=0 */
      50  1361012392 :   while (!(x&1)) x>>=1;
      51   702265771 :   if (x==1) return 1;
      52   632068315 :   if (x==y) return y;
      53   605949256 :   else if (x>y) goto xislarger;
      54             : 
      55   397956801 :   goto yislarger;
      56             : }
      57             : /* Gotos are useful, and Programming is an Art.  D.E.Knuth. */
      58             : /* PS: Of course written with Dijkstra's lessons firmly in mind... --GN */
      59             : 
      60             : /* at least one of a or b is odd, return gcd(a,b) */
      61             : INLINE ulong
      62   374574620 : mygcduodd(ulong a, ulong b)
      63             : {
      64             :   ulong c;
      65   374574620 :   if (b&1)
      66             :   {
      67   223129678 :     if (a==1 || b==1)
      68    39313668 :       c = 1;
      69             :     else
      70   183816010 :      c = gcduodd(a, b);
      71             :   }
      72             :   else
      73             :   {
      74   151444942 :     if (a==1)
      75    51371976 :       c = 1;
      76             :     else
      77   100072966 :       c = gcduodd(b, a);
      78             :   }
      79   374591423 :   return c;
      80             : }
      81             : 
      82             : /* modified right shift binary algorithm with at most one division */
      83             : ulong
      84   348019638 : ugcd(ulong a,ulong b)
      85             : {
      86             :   long v;
      87   348019638 :   if (!b) return a;
      88   337498487 :   if (!a) return b;
      89   322768547 :   if (a>b) { a %= b; if (!a) return b; }
      90    96798048 :   else     { b %= a; if (!b) return a; }
      91   209316533 :   v = vals(a|b);
      92   209648129 :   return mygcduodd(a>>v, b>>v) << v;
      93             : }
      94             : long
      95    28666859 : cgcd(long a,long b) { return (long)ugcd(labs(a), labs(b)); }
      96             : 
      97             : /* For gcdii(): assume a>b>0, return gcd(a,b) as a GEN */
      98             : static GEN
      99   529895650 : igcduu(ulong a, ulong b)
     100             : {
     101             :   long v;
     102   529895650 :   a %= b; if (!a) return utoipos(b);
     103   164866901 :   v = vals(a|b);
     104   164920882 :   return utoipos( mygcduodd(a>>v, b>>v) << v );
     105             : }
     106             : 
     107             : /*Warning: overflows silently if lcm does not fit*/
     108             : ulong
     109     3010957 : ulcm(ulong a, ulong b)
     110             : {
     111     3010957 :   ulong d = ugcd(a,b);
     112     3010951 :   if (!d) return 0;
     113     3010951 :   return d == 1? a*b: a*(b/d);
     114             : }
     115             : long
     116       36952 : clcm(long a,long b) { return ulcm(labs(a), labs(b)); }
     117             : 
     118             : /********************************************************************/
     119             : /**                                                                **/
     120             : /**               INTEGER EXTENDED GCD  (AND INVMOD)               **/
     121             : /**                                                                **/
     122             : /********************************************************************/
     123             : /* Two basic ideas - (1) avoid many integer divisions, especially when the
     124             :  * quotient is 1 which happens ~ 40% of the time.  (2) Use Lehmer's trick as
     125             :  * modified by Jebelean of extracting a couple of words' worth of leading bits
     126             :  * from both operands, and compute partial quotients from them as long as we
     127             :  * can be sure of their values.  Jebelean's modifications consist in
     128             :  * inequalities from which we can quickly decide whether to carry on or to
     129             :  * return to the outer loop, and in re-shifting after the first word's worth of
     130             :  * bits has been used up.  All of this is described in R. Lercier's thesis
     131             :  * [pp148-153 & 163f.], except his outer loop isn't quite right: the catch-up
     132             :  * divisions needed when one partial quotient is larger than a word are missing.
     133             :  *
     134             :  * The API consists of invmod() and bezout() below; the single-word routines
     135             :  * xgcduu and xxgcduu may be called directly if desired; lgcdii() probably
     136             :  * doesn't make much sense out of context.
     137             :  *
     138             :  * The whole lot is a factor 6 .. 8 faster on word-sized operands, and asym-
     139             :  * ptotically about a factor 2.5 .. 3, depending on processor architecture,
     140             :  * than the naive continued-division code.  Unfortunately, thanks to the
     141             :  * unrolled loops and all, the code is lengthy. */
     142             : 
     143             : /*==================================
     144             :  * xgcduu(d,d1,f,v,v1,s)
     145             :  * xxgcduu(d,d1,f,u,u1,v,v1,s)
     146             :  * rgcduu(d,d1,vmax,u,u1,v,v1,s)
     147             :  *==================================*/
     148             : /*
     149             :  * Fast `final' extended gcd algorithm, acting on two ulongs.  Ideally this
     150             :  * should be replaced with assembler versions wherever possible.  The present
     151             :  * code essentially does `subtract, compare, and possibly divide' at each step,
     152             :  * which is reasonable when hardware division (a) exists, (b) is a bit slowish
     153             :  * and (c) does not depend a lot on the operand values (as on i486).  When
     154             :  * wordsize division is in fact an assembler routine based on subtraction,
     155             :  * this strategy may not be the most efficient one.
     156             :  *
     157             :  * xxgcduu() should be called with  d > d1 > 0, returns gcd(d,d1), and assigns
     158             :  * the usual signless cont.frac. recurrence matrix to [u, u1; v, v1]  (i.e.,
     159             :  * the product of all the [0, 1; 1 q_j] where the leftmost factor arises from
     160             :  * the quotient of the first division step),  and the information about the
     161             :  * implied signs to s  (-1 when an odd number of divisions has been done,
     162             :  * 1 otherwise).  xgcduu() is exactly the same except that u,u1 are not com-
     163             :  * puted (and not returned, of course).
     164             :  *
     165             :  * The input flag f should be set to 1 if we know in advance that gcd(d,d1)==1
     166             :  * (so we can stop the chain division one step early:  as soon as the remainder
     167             :  * equals 1).  Use this when you intend to use only what would be v, or only
     168             :  * what would be u and v, after that final division step, but not u1 and v1.
     169             :  * With the flag in force and thus without that final step, the interesting
     170             :  * quantity/ies will still sit in [u1 and] v1, of course.
     171             :  *
     172             :  * For computing the inverse of a single-word INTMOD known to exist, pass f=1
     173             :  * to xgcduu(), and obtain the result from s and v1.  (The routine does the
     174             :  * right thing when d1==1 already.)  For finishing a multiword modinv known
     175             :  * to exist, pass f=1 to xxgcduu(), and multiply the returned matrix  (with
     176             :  * properly adjusted signs)  onto the values v' and v1' previously obtained
     177             :  * from the multiword division steps.  Actually, just take the scalar product
     178             :  * of [v',v1'] with [u1,-v1], and change the sign if s==-1.  (If the final
     179             :  * step had been carried out, it would be [-u,v], and s would also change.)
     180             :  * For reducing a rational number to lowest terms, pass f=0 to xgcduu().
     181             :  * Finally, f=0 with xxgcduu() is useful for Bezout computations.
     182             :  * (It is safe for invmod() to call xgcduu() with f=1, because f&1 doesn't
     183             :  * make a difference when gcd(d,d1)>1.  The speedup is negligible.)
     184             :  *
     185             :  * In principle, when gcd(d,d1) is known to be 1, it is straightforward to
     186             :  * recover the final u,u1 given only v,v1 and s.  However, it probably isn't
     187             :  * worthwhile, as it trades a few multiplications for a division.
     188             :  *
     189             :  * rgcduu() is a variant of xxgcduu() which does not have f  (the effect is
     190             :  * that of f=0),  but instead has a ulong vmax parameter, for use in rational
     191             :  * reconstruction below.  It returns when v1 exceeds vmax;  v will never
     192             :  * exceed vmax.  (vmax=0 is taken as a synonym of ULONG_MAX i.e. unlimited,
     193             :  * in which case rgcduu behaves exactly like xxgcduu with f=0.)  The return
     194             :  * value of rgcduu() is typically meaningless;  the interesting part is the
     195             :  * matrix. */
     196             : 
     197             : ulong
     198   505725728 : xgcduu(ulong d, ulong d1, int f, ulong* v, ulong* v1, long *s)
     199             : {
     200             :   ulong xv,xv1, xs, q,res;
     201             :   LOCAL_HIREMAINDER;
     202             : 
     203             :   /* The above blurb contained a lie.  The main loop always stops when d1
     204             :    * has become equal to 1.  If (d1 == 1 && !(f&1)) after the loop, we do
     205             :    * the final `division' of d by 1 `by hand' as it were.
     206             :    *
     207             :    * The loop has already been unrolled once.  Aggressive optimization could
     208             :    * well lead to a totally unrolled assembler version.
     209             :    *
     210             :    * On modern x86 architectures, this loop is a pig anyway.  The division
     211             :    * instruction always puts its result into the same pair of registers, and
     212             :    * we always want to use one of them straight away, so pipeline performance
     213             :    * will suck big time.  An assembler version should probably do a first loop
     214             :    * computing and storing all the quotients -- their number is bounded in
     215             :    * advance -- and then assembling the matrix in a second pass.  On other
     216             :    * architectures where we can cycle through four or so groups of registers
     217             :    * and exploit a fast ALU result-to-operand feedback path, this is much less
     218             :    * of an issue. */
     219   505725728 :   xs = res = 0;
     220   505725728 :   xv = 0UL; xv1 = 1UL;
     221  3512889526 :   while (d1 > 1UL)
     222             :   {
     223  3267032202 :     d -= d1; /* no need to use subll */
     224  3267032202 :     if (d >= d1)
     225             :     {
     226  1825678391 :       hiremainder = 0; q = 1 + divll(d,d1); d = hiremainder;
     227  1825678391 :       xv += q * xv1;
     228             :     }
     229             :     else
     230  1441353811 :       xv += xv1;
     231  3267032202 :     if (d <= 1UL) { xs=1; break; } /* possible loop exit */
     232             :     /* repeat with inverted roles */
     233  3007163798 :     d1 -= d;
     234  3007163798 :     if (d1 >= d)
     235             :     {
     236  1720395198 :       hiremainder = 0; q = 1 + divll(d1,d); d1 = hiremainder;
     237  1720395198 :       xv1 += q * xv;
     238             :     }
     239             :     else
     240  1286768600 :       xv1 += xv;
     241             :   }
     242             : 
     243   505725728 :   if (!(f&1))
     244             :   { /* division by 1 postprocessing if needed */
     245     5745385 :     if (xs && d==1)
     246     1779216 :     { xv1 += d1 * xv; xs = 0; res = 1UL; }
     247     3966169 :     else if (!xs && d1==1)
     248     1256619 :     { xv += d * xv1; xs = 1; res = 1UL; }
     249             :   }
     250             : 
     251   505725728 :   if (xs)
     252             :   {
     253   259311099 :     *s = -1; *v = xv1; *v1 = xv;
     254   259311099 :     return (res ? res : (d==1 ? 1UL : d1));
     255             :   }
     256             :   else
     257             :   {
     258   246414629 :     *s = 1; *v = xv; *v1 = xv1;
     259   246414629 :     return (res ? res : (d1==1 ? 1UL : d));
     260             :   }
     261             : }
     262             : 
     263             : ulong
     264   119216197 : xxgcduu(ulong d, ulong d1, int f,
     265             :         ulong* u, ulong* u1, ulong* v, ulong* v1, long *s)
     266             : {
     267             :   ulong xu,xu1, xv,xv1, xs, q,res;
     268             :   LOCAL_HIREMAINDER;
     269             : 
     270   119216197 :   xs = res = 0;
     271   119216197 :   xu = xv1 = 1UL;
     272   119216197 :   xu1 = xv = 0UL;
     273   259882417 :   while (d1 > 1UL)
     274             :   {
     275             :     /* no need to use subll */
     276   201198228 :     d -= d1;
     277   201198228 :     if (d >= d1)
     278             :     {
     279   130251770 :       hiremainder = 0; q = 1 + divll(d,d1); d = hiremainder;
     280   130251770 :       xv += q * xv1;
     281   130251770 :       xu += q * xu1;
     282             :     }
     283             :     else
     284    70946458 :     { xv += xv1; xu += xu1; }
     285   201198228 :     if (d <= 1UL) { xs=1; break; } /* possible loop exit */
     286             :     /* repeat with inverted roles */
     287   140666220 :     d1 -= d;
     288   140666220 :     if (d1 >= d)
     289             :     {
     290    77577960 :       hiremainder = 0; q = 1 + divll(d1,d); d1 = hiremainder;
     291    77577960 :       xv1 += q * xv;
     292    77577960 :       xu1 += q * xu;
     293             :     }
     294             :     else
     295    63088260 :     { xv1 += xv; xu1 += xu; }
     296             :   }
     297             : 
     298   119216197 :   if (!(f&1))
     299             :   { /* division by 1 postprocessing if needed */
     300   113769508 :     if (xs && d==1)
     301             :     {
     302    24630636 :       xv1 += d1 * xv;
     303    24630636 :       xu1 += d1 * xu;
     304    24630636 :       xs = 0; res = 1UL;
     305             :     }
     306    89138872 :     else if (!xs && d1==1)
     307             :     {
     308    49297381 :       xv += d * xv1;
     309    49297381 :       xu += d * xu1;
     310    49297381 :       xs = 1; res = 1UL;
     311             :     }
     312             :   }
     313             : 
     314   119216197 :   if (xs)
     315             :   {
     316    85201583 :     *s = -1; *u = xu1; *u1 = xu; *v = xv1; *v1 = xv;
     317    85201583 :     return (res ? res : (d==1 ? 1UL : d1));
     318             :   }
     319             :   else
     320             :   {
     321    34014614 :     *s = 1; *u = xu; *u1 = xu1; *v = xv; *v1 = xv1;
     322    34014614 :     return (res ? res : (d1==1 ? 1UL : d));
     323             :   }
     324             : }
     325             : 
     326             : ulong
     327       41190 : rgcduu(ulong d, ulong d1, ulong vmax,
     328             :        ulong* u, ulong* u1, ulong* v, ulong* v1, long *s)
     329             : {
     330       41190 :   ulong xu,xu1, xv,xv1, xs, q, res=0;
     331       41190 :   int f = 0;
     332             :   LOCAL_HIREMAINDER;
     333             : 
     334       41190 :   if (vmax == 0) vmax = ULONG_MAX;
     335       41190 :   xs = res = 0;
     336       41190 :   xu = xv1 = 1UL;
     337       41190 :   xu1 = xv = 0UL;
     338       78790 :   while (d1 > 1UL)
     339             :   {
     340       77924 :     d -= d1; /* no need to use subll */
     341       77924 :     if (d >= d1)
     342             :     {
     343       35728 :       hiremainder = 0; q = 1 + divll(d,d1); d = hiremainder;
     344       35728 :       xv += q * xv1;
     345       35728 :       xu += q * xu1;
     346             :     }
     347             :     else
     348       42196 :     { xv += xv1; xu += xu1; }
     349             :     /* possible loop exit */
     350       77924 :     if (xv > vmax) { f=xs=1; break; }
     351       65038 :     if (d <= 1UL) { xs=1; break; }
     352             :     /* repeat with inverted roles */
     353       58076 :     d1 -= d;
     354       58076 :     if (d1 >= d)
     355             :     {
     356       37020 :       hiremainder = 0; q = 1 + divll(d1,d); d1 = hiremainder;
     357       37020 :       xv1 += q * xv;
     358       37020 :       xu1 += q * xu;
     359             :     }
     360             :     else
     361       21056 :     { xv1 += xv; xu1 += xu; }
     362             :     /* possible loop exit */
     363       58076 :     if (xv1 > vmax) { f=1; break; }
     364             :   }
     365             : 
     366       41190 :   if (!(f&1))
     367             :   { /* division by 1 postprocessing if needed */
     368        7828 :     if (xs && d==1)
     369             :     {
     370        6962 :       xv1 += d1 * xv;
     371        6962 :       xu1 += d1 * xu;
     372        6962 :       xs = 0; res = 1UL;
     373             :     }
     374         866 :     else if (!xs && d1==1)
     375             :     {
     376         866 :       xv += d * xv1;
     377         866 :       xu += d * xu1;
     378         866 :       xs = 1; res = 1UL;
     379             :     }
     380             :   }
     381             : 
     382       41190 :   if (xs)
     383             :   {
     384       13752 :     *s = -1; *u = xu1; *u1 = xu; *v = xv1; *v1 = xv;
     385       13752 :     return (res ? res : (d==1 ? 1UL : d1));
     386             :   }
     387             :   else
     388             :   {
     389       27438 :     *s = 1; *u = xu; *u1 = xu1; *v = xv; *v1 = xv1;
     390       27438 :     return (res ? res : (d1==1 ? 1UL : d));
     391             :   }
     392             : }
     393             : 
     394             : /*==================================
     395             :  * cbezout(a,b,uu,vv)
     396             :  *==================================
     397             :  * Same as bezout() but for C longs.
     398             :  *    Return g = gcd(a,b) >= 0, and assign longs u,v through pointers uu,vv
     399             :  *    such that g = u*a + v*b.
     400             :  * Special cases:
     401             :  *    a = b = 0 ==> pick u=1, v=0 (and return 1, surprisingly)
     402             :  *    a != 0 = b ==> keep v=0
     403             :  *    a = 0 != b ==> keep u=0
     404             :  *    |a| = |b| != 0 ==> keep u=0, set v=+-1
     405             :  * Assignments through uu,vv happen unconditionally. */
     406             : long
     407      667176 : cbezout(long a,long b,long *uu,long *vv)
     408             : {
     409             :   long s,*t;
     410      667176 :   ulong d = labs(a), d1 = labs(b);
     411             :   ulong r,u,u1,v,v1;
     412             : 
     413      667176 :   if (!b)
     414             :   {
     415        2688 :     *vv=0L;
     416        2688 :     if (!a) { *uu=1L; return 0L; }
     417        2688 :     *uu = a < 0 ? -1L : 1L;
     418        2688 :     return (long)d;
     419             :   }
     420      664488 :   else if (!a || (d == d1))
     421             :   {
     422       56756 :     *uu = 0L; *vv = b < 0 ? -1L : 1L;
     423       56756 :     return (long)d1;
     424             :   }
     425      607732 :   else if (d == 1) /* frequently used by nfinit */
     426             :   {
     427      356421 :     *uu = a; *vv = 0L;
     428      356421 :     return 1L;
     429             :   }
     430      251311 :   else if (d < d1)
     431             :   {
     432             : /* bug in gcc-2.95.3:
     433             :  * s = a; a = b; b = s; produces wrong result a = b. This is OK:  */
     434      170736 :     { long _x = a; a = b; b = _x; } /* in order to keep the right signs */
     435      170736 :     r = d; d = d1; d1 = r;
     436      170736 :     t = uu; uu = vv; vv = t;
     437             :   }
     438             :   /* d > d1 > 0 */
     439      251311 :   r = xxgcduu(d, d1, 0, &u, &u1, &v, &v1, &s);
     440      251315 :   if (s < 0)
     441             :   {
     442       93821 :     *uu = a < 0 ? (long)u : -(long)u;
     443       93821 :     *vv = b < 0 ? -(long)v : (long)v;
     444             :   }
     445             :   else
     446             :   {
     447      157494 :     *uu = a < 0 ? -(long)u : (long)u;
     448      157494 :     *vv = b < 0 ? (long)v : -(long)v;
     449             :   }
     450      251315 :   return (long)r;
     451             : }
     452             : 
     453             : /*==================================
     454             :  * lgcdii(d,d1,u,u1,v,v1,vmax)
     455             :  *==================================*/
     456             : /* Lehmer's partial extended gcd algorithm, acting on two t_INT GENs.
     457             :  *
     458             :  * Tries to determine, using the leading 2*BITS_IN_LONG significant bits of d
     459             :  * and a quantity of bits from d1 obtained by a shift of the same displacement,
     460             :  * as many partial quotients of d/d1 as possible, and assigns to [u,u1;v,v1]
     461             :  * the product of all the [0,1; 1,qj] thus obtained, where the leftmost
     462             :  * factor arises from the quotient of the first division step.
     463             :  *
     464             :  * For use in rational reconstruction, vmax can be given a nonzero value.
     465             :  * In this case, we will return early as soon as v1 > vmax (i.e. v <= vmax)
     466             :  *
     467             :  * MUST be called with  d > d1 > 0, and with  d occupying more than one
     468             :  * significant word.  Returns the number of reduction/swap steps carried out,
     469             :  * possibly zero, or under certain conditions minus that number.  When the
     470             :  * return value is nonzero, the caller should use the returned recurrence
     471             :  * matrix to update its own copies of d,d1.  When the return value is
     472             :  * nonpositive, and the latest remainder after updating turns out to be
     473             :  * nonzero, the caller should at once attempt a full division, rather than
     474             :  * trying lgcdii() again -- this typically happens when we are about to
     475             :  * encounter a quotient larger than half a word. (This is not detected
     476             :  * infallibly -- after a positive return value, it is possible that the next
     477             :  * stage will end up needing a full division.  After a negative return value,
     478             :  * however, this is certain, and should be acted upon.)
     479             :  *
     480             :  * The sign information, for which xgcduu() has its return argument s, is now
     481             :  * implicit in the LSB of our return value, and the caller may take advantage
     482             :  * of the fact that a return value of +-1 implies u==0,u1==v==1  [only v1 pro-
     483             :  * vides interesting information in this case].  One might also use the fact
     484             :  * that if the return value is +-2, then u==1, but this is rather marginal.
     485             :  *
     486             :  * If it was not possible to determine even the first quotient, either because
     487             :  * we're too close to an integer quotient or because the quotient would be
     488             :  * larger than one word  (if the `leading digit' of d1 after shifting is all
     489             :  * zeros), we return 0 and do not assign anything to the last four args.
     490             :  *
     491             :  * The division chain might even run to completion. It is up to the caller to
     492             :  * detect this case. This routine does not change d or d1; this is also up to
     493             :  * the caller */
     494             : int
     495    29381548 : lgcdii(ulong* d, ulong* d1, ulong* u, ulong* u1, ulong* v, ulong* v1,
     496             :        ulong vmax)
     497             : {
     498             :   /* Strategy:  (1) Extract/shift most significant bits.  We assume that d
     499             :    * has at least two significant words, but we can cope with a one-word d1.
     500             :    * Let dd,dd1 be the most significant dividend word and matching part of the
     501             :    * divisor.
     502             :    * (2) Check for overflow on the first division.  For our purposes, this
     503             :    * happens when the upper half of dd1 is zero.  (Actually this is detected
     504             :    * during extraction.)
     505             :    * (3) Get a fix on the first quotient.  We compute q = floor(dd/dd1), which
     506             :    * is an upper bound for floor(d/d1), and which gives the true value of the
     507             :    * latter if (and-almost-only-if) the remainder dd' = dd-q*dd1 is >= q.
     508             :    * (If it isn't, we give up.  This is annoying because the subsequent full
     509             :    * division will repeat some work already done, but it happens infrequently.
     510             :    * Doing the extra-bit-fetch in this case would be awkward.)
     511             :    * (4) Finish initializations.
     512             :    *
     513             :    * The remainder of the action is comparatively boring... The main loop has
     514             :    * been unrolled once (so we don't swap things and we can apply Jebelean's
     515             :    * termination conditions which alternatingly take two different forms during
     516             :    * successive iterations).  When we first run out of sufficient bits to form
     517             :    * a quotient, and have an extra word of each operand, we pull out two whole
     518             :    * word's worth of dividend bits, and divisor bits of matching significance;
     519             :    * to these we apply our partial matrix (disregarding overflow because the
     520             :    * result mod 2^(2*BITS_IN_LONG) will in fact give the correct values), and
     521             :    * re-extract one word's worth of the current dividend and a matching amount
     522             :    * of divisor bits.  The affair will normally terminate with matrix entries
     523             :    * just short of a whole word.  (We terminate the inner loop before these can
     524             :    * possibly overflow.) */
     525             :   ulong dd,dd1,ddlo,dd1lo, sh,shc; /* `digits', shift count */
     526             :   ulong xu,xu1, xv,xv1, q,res; /* recurrences, partial quotient, count */
     527             :   ulong tmp0,tmp1,tmp2,tmpd,tmpu,tmpv; /* temps */
     528             :   ulong dm1, d1m1;
     529             :   long ld, ld1, lz;
     530    29381548 :   int skip = 0;
     531             :   LOCAL_OVERFLOW;
     532             :   LOCAL_HIREMAINDER;
     533             : 
     534             :   /* following is just for convenience: vmax==0 means no bound */
     535    29381548 :   if (vmax == 0) vmax = ULONG_MAX;
     536    29381548 :   ld = lgefint(d); ld1 = lgefint(d1); lz = ld - ld1; /* >= 0 */
     537    29381548 :   if (lz > 1) return 0; /* rare */
     538             : 
     539    28949417 :   d = int_MSW(d);  dm1  = *int_precW(d);
     540    28949417 :   d1 = int_MSW(d1);d1m1 = *int_precW(d1);
     541    28949417 :   dd1lo = 0; /* unless we find something better */
     542    28949417 :   sh = bfffo(*d);
     543             : 
     544    28949417 :   if (sh)
     545             :   { /* do the shifting */
     546    28404990 :     shc = BITS_IN_LONG - sh;
     547    28404990 :     if (lz)
     548             :     { /* dividend longer than divisor */
     549     4430817 :       dd1 = (*d1 >> shc);
     550     4430817 :       if (!(HIGHMASK & dd1)) return 0; /* overflow detected */
     551     3843033 :       if (ld1 > 3)
     552      866134 :         dd1lo = (*d1 << sh) + (d1m1 >> shc);
     553             :       else
     554     2976899 :         dd1lo = (*d1 << sh);
     555             :     }
     556             :     else
     557             :     { /* dividend and divisor have the same length */
     558    23974173 :       dd1 = (*d1 << sh);
     559    23974173 :       if (!(HIGHMASK & dd1)) return 0;
     560    23648329 :       if (ld1 > 3)
     561             :       {
     562    23649340 :         dd1 += (d1m1 >> shc);
     563    23649340 :         if (ld1 > 4)
     564    20552674 :           dd1lo = (d1m1 << sh) + (*int_precW(int_precW(d1)) >> shc);
     565             :         else
     566     3096666 :           dd1lo = (d1m1 << sh);
     567             :       }
     568             :     }
     569             :     /* following lines assume d to have 2 or more significant words */
     570    27491362 :     dd = (*d << sh) + (dm1 >> shc);
     571    27491362 :     if (ld > 4)
     572    21418940 :       ddlo = (dm1 << sh) + (*int_precW(int_precW(d)) >> shc);
     573             :     else
     574     6072422 :       ddlo = (dm1 << sh);
     575             :   }
     576             :   else
     577             :   { /* no shift needed */
     578      544427 :     if (lz) return 0; /* dividend longer than divisor: overflow */
     579      540212 :     dd1 = *d1;
     580      540212 :     if (!(HIGHMASK & dd1)) return 0;
     581      537002 :     if(ld1 > 3) dd1lo = d1m1;
     582             :     /* assume again that d has another significant word */
     583      537002 :     dd = *d; ddlo = dm1;
     584             :   }
     585             :   /* First subtraction/division stage.  (If a subtraction initially suffices,
     586             :    * we don't divide at all.)  If a Jebelean condition is violated, and we
     587             :    * can't fix it even by looking at the low-order bits in ddlo,dd1lo, we
     588             :    * give up and ask for a full division.  Otherwise we commit the result,
     589             :    * possibly deciding to re-shift immediately afterwards. */
     590    28028364 :   dd -= dd1;
     591    28028364 :   if (dd < dd1)
     592             :   { /* first quotient known to be == 1 */
     593     6395702 :     xv1 = 1UL;
     594     6395702 :     if (!dd) /* !(Jebelean condition), extraspecial case */
     595             :     { /* This actually happens. Now q==1 is known, but we underflow already.
     596             :        * OTOH we've just shortened d by a whole word. Thus we are happy and
     597             :        * return. */
     598      406668 :       *u = 0; *v = *u1 = *v1 = 1UL;
     599      406668 :       return -1; /* Next step will be a full division. */
     600             :     }
     601             :   }
     602             :   else
     603             :   { /* division indicated */
     604    21632662 :     hiremainder = 0;
     605    21632662 :     xv1 = 1 + divll(dd, dd1); /* xv1: alternative spelling of `q', here ;) */
     606    21632662 :     dd = hiremainder;
     607    21632662 :     if (dd < xv1) /* !(Jebelean cond'), nonextra special case */
     608             :     { /* Attempt to complete the division using the less significant bits,
     609             :        * before skipping right past the 1st loop to the reshift stage. */
     610     1030746 :       ddlo = subll(ddlo, mulll(xv1, dd1lo));
     611     1030746 :       dd = subllx(dd, hiremainder);
     612             : 
     613             :       /* If we now have an overflow, q was too large. Thanks to our decision
     614             :        * not to get here unless the original dd1 had bits set in the upper half
     615             :        * of the word, we now know that the correct quotient is in fact q-1. */
     616     1030746 :       if (overflow)
     617             :       {
     618       23902 :         xv1--;
     619       23902 :         ddlo = addll(ddlo,dd1lo);
     620       23902 :         dd = addllx(dd,dd1); /* overflows again which cancels the borrow */
     621             :         /* ...and fall through to skip=1 below */
     622             :       }
     623             :       else
     624             :       /* Test Jebelean condition anew, at this point using _all_ the extracted
     625             :        * bits we have.  This is clutching at straws; we have a more or less
     626             :        * even chance of succeeding this time.  Note that if we fail, we really
     627             :        * do not know whether the correct quotient would have been q or some
     628             :        * smaller value. */
     629     1006844 :         if (!dd && ddlo < xv1) return 0;
     630             : 
     631             :       /* Otherwise, we now know that q is correct, but we cannot go into the
     632             :        * 1st loop.  Raise a flag so we'll remember to skip past the loop.
     633             :        * Get here also after the q-1 adjustment case. */
     634       50734 :       skip = 1;
     635             :     } /* if !(Jebelean) then */
     636             :   }
     637    26641684 :   res = 1;
     638    26641684 :   if (xv1 > vmax)
     639             :   { /* gone past the bound already */
     640         746 :     *u = 0UL; *u1 = 1UL; *v = 1UL; *v1 = xv1;
     641         746 :     return res;
     642             :   }
     643    26640938 :   xu = 0UL; xv = xu1 = 1UL;
     644             : 
     645             :   /* Some invariants from here across the first loop:
     646             :    *
     647             :    * At this point, and again after we are finished with the first loop and
     648             :    * subsequent conditional, a division and the attached update of the
     649             :    * recurrence matrix have just been carried out completely.  The matrix
     650             :    * xu,xu1;xv,xv1 has been initialized (or updated, possibly with permuted
     651             :    * columns), and the current remainder == next divisor (dd at the moment)
     652             :    * is nonzero (it might be zero here, but then skip will have been set).
     653             :    *
     654             :    * After the first loop, or when skip is set already, it will also be the
     655             :    * case that there aren't sufficiently many bits to continue without re-
     656             :    * shifting.  If the divisor after reshifting is zero, or indeed if it
     657             :    * doesn't have more than half a word's worth of bits, we will have to
     658             :    * return at that point.  Otherwise, we proceed into the second loop.
     659             :    *
     660             :    * Furthermore, when we reach the re-shift stage, dd:ddlo and dd1:dd1lo will
     661             :    * already reflect the result of applying the current matrix to the old
     662             :    * ddorig:ddlo and dd1orig:dd1lo.  (For the first iteration above, this
     663             :    * was easy to achieve, and we didn't even need to peek into the (now
     664             :    * no longer existent!) saved words.  After the loop, we'll stop for a
     665             :    * moment to merge in the ddlo,dd1lo contributions.)
     666             :    *
     667             :    * Note that after the 1st division, even an a priori quotient of 1 cannot be
     668             :    * trusted until we've checked Jebelean's condition: it might be too small */
     669    26640938 :   if (!skip)
     670             :   {
     671             :     for(;;)
     672             :     { /* First half of loop divides dd into dd1, and leaves the recurrence
     673             :        * matrix xu,...,xv1 groomed the wrong way round (xu,xv will be the newer
     674             :        * entries) when successful. */
     675   221890810 :       tmpd = dd1 - dd;
     676   221890810 :       if (tmpd < dd)
     677             :       { /* quotient suspected to be 1 */
     678    93829008 :         tmpu = xu + xu1; /* cannot overflow -- everything bounded by
     679             :                           * the original dd during first loop */
     680    93829008 :         tmpv = xv + xv1;
     681             :       }
     682             :       else
     683             :       { /* division indicated */
     684   128061802 :         hiremainder = 0;
     685   128061802 :         q = 1 + divll(tmpd, dd);
     686   128061802 :         tmpd = hiremainder;
     687   128061802 :         tmpu = xu + q*xu1; /* can't overflow, but may need to be undone */
     688   128061802 :         tmpv = xv + q*xv1;
     689             :       }
     690             : 
     691   221890810 :       tmp0 = addll(tmpv, xv1);
     692   221890810 :       if ((tmpd < tmpu) || overflow ||
     693   216986742 :           (dd - tmpd < tmp0)) /* !(Jebelean cond.) */
     694             :         break; /* skip ahead to reshift stage */
     695             :       else
     696             :       { /* commit dd1, xu, xv */
     697   208564058 :         res++;
     698   208564058 :         dd1 = tmpd; xu = tmpu; xv = tmpv;
     699   208564058 :         if (xv > vmax) { *u = xu1; *u1 = xu; *v = xv1; *v1 = xv; return res; }
     700             :       }
     701             : 
     702             :       /* Second half of loop divides dd1 into dd, and the matrix returns to its
     703             :        * normal arrangement. */
     704   208562192 :       tmpd = dd - dd1;
     705   208562192 :       if (tmpd < dd1)
     706             :       { /* quotient suspected to be 1 */
     707    83443119 :         tmpu = xu1 + xu; /* cannot overflow */
     708    83443119 :         tmpv = xv1 + xv;
     709             :       }
     710             :       else
     711             :       { /* division indicated */
     712   125119073 :         hiremainder = 0;
     713   125119073 :         q = 1 + divll(tmpd, dd1);
     714   125119073 :         tmpd = hiremainder;
     715   125119073 :         tmpu = xu1 + q*xu;
     716   125119073 :         tmpv = xv1 + q*xv;
     717             :       }
     718             : 
     719   208562192 :       tmp0 = addll(tmpu, xu);
     720   208562192 :       if ((tmpd < tmpv) || overflow ||
     721   196685178 :           (dd1 - tmpd < tmp0)) /* !(Jebelean cond.) */
     722             :         break; /* skip ahead to reshift stage */
     723             :       else
     724             :       { /* commit dd, xu1, xv1 */
     725   195291966 :         res++;
     726   195291966 :         dd = tmpd; xu1 = tmpu; xv1 = tmpv;
     727   195291966 :         if (xv1 > vmax) { *u = xu; *u1 = xu1; *v = xv; *v1 = xv1; return res; }
     728             :       }
     729             :     } /* end of first loop */
     730             : 
     731             :     /* Intermezzo: update dd:ddlo, dd1:dd1lo.  (But not if skip is set.) */
     732    26596978 :     if (res&1)
     733             :     { /* after failed division in 1st half of loop:
     734             :        * [dd1:dd1lo,dd:ddlo] = [ddorig:ddlo,dd1orig:dd1lo]
     735             :        *                       * [ -xu, xu1 ; xv, -xv1 ]
     736             :        * Actually, we only multiply [ddlo,dd1lo] onto the matrix and add the
     737             :        * high-order remainders + overflows onto [dd1,dd] */
     738    13321390 :       tmp1 = mulll(ddlo, xu); tmp0 = hiremainder;
     739    13321390 :       tmp1 = subll(mulll(dd1lo,xv), tmp1);
     740    13321390 :       dd1 += subllx(hiremainder, tmp0);
     741    13321390 :       tmp2 = mulll(ddlo, xu1); tmp0 = hiremainder;
     742    13321390 :       ddlo = subll(tmp2, mulll(dd1lo,xv1));
     743    13321390 :       dd += subllx(tmp0, hiremainder);
     744    13321390 :       dd1lo = tmp1;
     745             :     }
     746             :     else
     747             :     { /* after failed division in 2nd half of loop:
     748             :        * [dd:ddlo,dd1:dd1lo] = [ddorig:ddlo,dd1orig:dd1lo]
     749             :        *                       * [ xu1, -xu ; -xv1, xv ]
     750             :        * Actually, we only multiply [ddlo,dd1lo] onto the matrix and add the
     751             :        * high-order remainders + overflows onto [dd,dd1] */
     752    13275588 :       tmp1 = mulll(ddlo, xu1); tmp0 = hiremainder;
     753    13275588 :       tmp1 = subll(tmp1, mulll(dd1lo,xv1));
     754    13275588 :       dd += subllx(tmp0, hiremainder);
     755    13275588 :       tmp2 = mulll(ddlo, xu); tmp0 = hiremainder;
     756    13275588 :       dd1lo = subll(mulll(dd1lo,xv), tmp2);
     757    13275588 :       dd1 += subllx(hiremainder, tmp0);
     758    13275588 :       ddlo = tmp1;
     759             :     }
     760             :   } /* end of skip-pable section:  get here also, with res==1, when there
     761             :      * was a problem immediately after the very first division. */
     762             : 
     763             :   /* Re-shift.  Note: the shift count _can_ be zero, viz. under the following
     764             :    * precise conditions:  The original dd1 had its topmost bit set, so the 1st
     765             :    * q was 1, and after subtraction, dd had its topmost bit unset.  If now dd=0,
     766             :    * we'd have taken the return exit already, so we couldn't have got here.
     767             :    * If not, then it must have been the second division which has gone amiss
     768             :    * (because dd1 was very close to an exact multiple of the remainder dd value,
     769             :    * so this will be very rare).  At this point, we'd have a fairly slim chance
     770             :    * of fixing things by re-examining dd1:dd1lo vs. dd:ddlo, but this is not
     771             :    * guaranteed to work. Instead of trying, we return at once and let caller
     772             :    * see to it that the initial subtraction is re-done usingall the bits of
     773             :    * both operands, which already helps, and the next round will either be a
     774             :    * full division  (if dd occupied a halfword or less), or another llgcdii()
     775             :    * first step.  In the latter case, since we try a little harder during our
     776             :    * first step, we may actually be able to fix the problem, and get here again
     777             :    * with improved low-order bits and with another step under our belt.
     778             :    * Otherwise we'll have given up above and forced a full division.
     779             :    *
     780             :    * If res is even, the shift count _cannot_ be zero.  (The first step forces
     781             :    * a zero into the remainder's MSB, and all subsequent remainders will have
     782             :    * inherited it.)
     783             :    *
     784             :    * The re-shift stage exits if the next divisor has at most half a word's
     785             :    * worth of bits.
     786             :    *
     787             :    * For didactic reasons, the second loop will be arranged in the same way
     788             :    * as the first -- beginning with the division of dd into dd1, as if res
     789             :    * was odd.  To cater for this, if res is actually even, we swap things
     790             :    * around during reshifting.  (During the second loop, the parity of res
     791             :    * does not matter; we know in which half of the loop we are when we decide
     792             :    * to return.) */
     793    26638062 :   if (res&1)
     794             :   { /* after odd number of division(s) */
     795    13372334 :     if (dd1 && (sh = bfffo(dd1)))
     796             :     {
     797    13216715 :       shc = BITS_IN_LONG - sh;
     798    13216715 :       dd = (ddlo >> shc) + (dd << sh);
     799    13216715 :       if (!(HIGHMASK & dd))
     800             :       {
     801       46003 :         *u = xu; *u1 = xu1; *v = xv; *v1 = xv1;
     802       46003 :         return -res; /* full division asked for */
     803             :       }
     804    13170712 :       dd1 = (dd1lo >> shc) + (dd1 << sh);
     805             :     }
     806             :     else
     807             :     { /* time to return: <= 1 word left, or sh==0 */
     808      155619 :       *u = xu; *u1 = xu1; *v = xv; *v1 = xv1;
     809      155619 :       return res;
     810             :     }
     811             :   }
     812             :   else
     813             :   { /* after even number of divisions */
     814    13265728 :     if (dd)
     815             :     {
     816    13273739 :       sh = bfffo(dd); /* > 0 */
     817    13273739 :       shc = BITS_IN_LONG - sh;
     818             :       /* dd:ddlo will become the new dd1, and v.v. */
     819    13273739 :       tmpd = (ddlo >> shc) + (dd << sh);
     820    13273739 :       dd = (dd1lo >> shc) + (dd1 << sh);
     821    13273739 :       dd1 = tmpd;
     822             :       /* This completes the swap; now dd is again the current divisor */
     823    13273739 :       if (HIGHMASK & dd)
     824             :       { /* recurrence matrix is the wrong way round; swap it */
     825    13114065 :         tmp0 = xu; xu = xu1; xu1 = tmp0;
     826    13114065 :         tmp0 = xv; xv = xv1; xv1 = tmp0;
     827             :       }
     828             :       else
     829             :       { /* recurrence matrix is the wrong way round; fix this */
     830      159674 :         *u = xu1; *u1 = xu; *v = xv1; *v1 = xv;
     831      159674 :         return -res;                /* full division asked for */
     832             :       }
     833             :     }
     834             :     else
     835             :     { /* time to return: <= 1 word left */
     836           0 :       *u = xu1; *u1 = xu; *v = xv1; *v1 = xv;
     837           0 :       return res;
     838             :     }
     839             :   } /* end reshift */
     840             : 
     841             :   /* The Second Loop.  Rip-off of the first, but we now check for overflow
     842             :    * in the recurrences.  Returns instead of breaking when we cannot fix the
     843             :    * quotient any longer. */
     844             :   for(;;)
     845             :   { /* First half of loop divides dd into dd1, and leaves the recurrence
     846             :      * matrix xu,...,xv1 groomed the wrong way round (xu,xv will be the newer
     847             :      * entries) when successful */
     848   125025796 :     tmpd = dd1 - dd;
     849   125025796 :     if (tmpd < dd)
     850             :     { /* quotient suspected to be 1 */
     851    43251675 :       tmpu = xu + xu1;
     852    43251675 :       tmpv = addll(xv, xv1); /* xv,xv1 will overflow first */
     853    43251675 :       tmp1 = overflow;
     854             :     }
     855             :     else
     856             :     { /* division indicated */
     857    81774121 :       hiremainder = 0;
     858    81774121 :       q = 1 + divll(tmpd, dd);
     859    81774121 :       tmpd = hiremainder;
     860    81774121 :       tmpu = xu + q*xu1;
     861    81774121 :       tmpv = addll(xv, mulll(q,xv1));
     862    81774121 :       tmp1 = overflow | hiremainder;
     863             :     }
     864             : 
     865   125025796 :     tmp0 = addll(tmpv, xv1);
     866   125025796 :     if ((tmpd < tmpu) || overflow || tmp1 ||
     867   119738205 :         (dd - tmpd < tmp0)) /* !(Jebelean cond.) */
     868             :     { /* The recurrence matrix has not yet been warped... */
     869    13337745 :       *u = xu; *u1 = xu1; *v = xv; *v1 = xv1;
     870    13337745 :       break;
     871             :     }
     872             :     /* commit dd1, xu, xv */
     873   111688051 :     res++;
     874   111688051 :     dd1 = tmpd; xu = tmpu; xv = tmpv;
     875   111688051 :     if (xv > vmax) { *u = xu1; *u1 = xu; *v = xv1; *v1 = xv; return res; }
     876             : 
     877             :     /* Second half of loop divides dd1 into dd, and the matrix returns to its
     878             :      * normal arrangement */
     879   111659853 :     tmpd = dd - dd1;
     880   111659853 :     if (tmpd < dd1)
     881             :     { /* quotient suspected to be 1 */
     882    47135469 :       tmpu = xu1 + xu;
     883    47135469 :       tmpv = addll(xv1, xv);
     884    47135469 :       tmp1 = overflow;
     885             :     }
     886             :     else
     887             :     { /* division indicated */
     888    64524384 :       hiremainder = 0;
     889    64524384 :       q = 1 + divll(tmpd, dd1);
     890    64524384 :       tmpd = hiremainder;
     891    64524384 :       tmpu = xu1 + q*xu;
     892    64524384 :       tmpv = addll(xv1, mulll(q, xv));
     893    64524384 :       tmp1 = overflow | hiremainder;
     894             :     }
     895             : 
     896   111659853 :     tmp0 = addll(tmpu, xu);
     897   111659853 :     if ((tmpd < tmpv) || overflow || tmp1 ||
     898   100139863 :         (dd1 - tmpd < tmp0)) /* !(Jebelean cond.) */
     899             :     { /* The recurrence matrix has not yet been unwarped, so it is
     900             :        * the wrong way round;  fix this. */
     901    12893877 :       *u = xu1; *u1 = xu; *v = xv1; *v1 = xv;
     902    12893877 :       break;
     903             :     }
     904             : 
     905    98765976 :     res++; /* commit dd, xu1, xv1 */
     906    98765976 :     dd = tmpd; xu1 = tmpu; xv1 = tmpv;
     907    98765976 :     if (xv1 > vmax) { *u = xu; *u1 = xu1; *v = xv; *v1 = xv1; return res; }
     908             :   } /* end of second loop */
     909             : 
     910    26231622 :   return res;
     911             : }
     912             : 
     913             : /* 1 / Mod(x,p). Assume x < p */
     914             : ulong
     915   469634359 : Fl_invsafe(ulong x, ulong p)
     916             : {
     917             :   long s;
     918   469634359 :   ulong xv, xv1, g = xgcduu(p, x, 1, &xv, &xv1, &s);
     919   471540852 :   if (g != 1UL) return 0UL;
     920   471073569 :   xv = xv1 % p; if (s < 0) xv = p - xv;
     921   471073569 :   return xv;
     922             : }
     923             : 
     924             : /* 1 / Mod(x,p). Assume x < p */
     925             : ulong
     926   456805969 : Fl_inv(ulong x, ulong p)
     927             : {
     928   456805969 :   ulong xv  = Fl_invsafe(x, p);
     929   458280676 :   if (!xv && p!=1UL) pari_err_INV("Fl_inv", mkintmod(utoi(x), utoi(p)));
     930   458186386 :   return xv;
     931             : }

Generated by: LCOV version 1.16