Code coverage tests

This page documents the degree to which the PARI/GP source code is tested by our public test suite, distributed with the source distribution in directory src/test/. This is measured by the gcov utility; we then process gcov output using the lcov frond-end.

We test a few variants depending on Configure flags on the pari.math.u-bordeaux.fr machine (x86_64 architecture), and agregate them in the final report:

The target is to exceed 90% coverage for all mathematical modules (given that branches depending on DEBUGLEVEL or DEBUGMEM are not covered). This script is run to produce the results below.

LCOV - code coverage report
Current view: top level - kernel/none - gcdll.c (source / functions) Hit Total Coverage
Test: PARI/GP v2.12.0 lcov report (development 23344-f0cc1b3f6) Lines: 345 348 99.1 %
Date: 2018-12-12 05:41:43 Functions: 15 16 93.8 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : #line 2 "../src/kernel/none/gcdll.c"
       2             : /* Copyright (C) 2000  The PARI group.
       3             : 
       4             : This file is part of the PARI/GP package.
       5             : 
       6             : PARI/GP is free software; you can redistribute it and/or modify it under the
       7             : terms of the GNU General Public License as published by the Free Software
       8             : Foundation. It is distributed in the hope that it will be useful, but WITHOUT
       9             : ANY WARRANTY WHATSOEVER.
      10             : 
      11             : Check the License for details. You should have received a copy of it, along
      12             : with the package; see the file 'COPYING'. If not, write to the Free Software
      13             : Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */
      14             : 
      15             : /***********************************************************************/
      16             : /**                                                                   **/
      17             : /**                          GCD                                      **/
      18             : /**                                                                   **/
      19             : /***********************************************************************/
      20             : /* Fast ulong gcd.  Called with y odd; x can be arbitrary (but will most of
      21             :  * the time be smaller than y). */
      22             : 
      23             : /* Gotos are Harmful, and Programming is a Science.  E.W.Dijkstra. */
      24             : INLINE ulong
      25   150445934 : gcduodd(ulong x, ulong y)         /* assume y&1==1, y > 1 */
      26             : {
      27   150445934 :   if (!x) return y;
      28             :   /* fix up x */
      29   150445934 :   while (!(x&1)) x>>=1;
      30   150445934 :   if (x==1) return 1;
      31   113348438 :   if (x==y) return y;
      32   108688498 :   else if (x>y) goto xislarger;/* will be rare, given how we'll use this */
      33             :   /* loop invariants: x,y odd and distinct. */
      34             :  yislarger:
      35   559289402 :   if ((x^y)&2)                 /* ...01, ...11 or vice versa */
      36   291281991 :     y=(x>>2)+(y>>2)+1;         /* ==(x+y)>>2 except it can't overflow */
      37             :   else                         /* ...01,...01 or ...11,...11 */
      38   268007411 :     y=(y-x)>>2;                /* now y!=0 in either case */
      39   559289402 :   while (!(y&1)) y>>=1;        /* kill any windfall-gained powers of 2 */
      40   559289402 :   if (y==1) return 1;          /* comparand == return value... */
      41   512690488 :   if (x==y) return y;          /* this and the next is just one comparison */
      42   498318226 :   else if (x<y) goto yislarger;/* else fall through to xislarger */
      43             : 
      44             :  xislarger:                    /* same as above, seen through a mirror */
      45   506907518 :   if ((x^y)&2)
      46   259335528 :     x=(x>>2)+(y>>2)+1;
      47             :   else
      48   247571990 :     x=(x-y)>>2;                /* x!=0 */
      49   506907518 :   while (!(x&1)) x>>=1;
      50   506907518 :   if (x==1) return 1;
      51   470581969 :   if (x==y) return y;
      52   459190196 :   else if (x>y) goto xislarger;
      53             : 
      54   300347508 :   goto yislarger;
      55             : }
      56             : /* Gotos are useful, and Programming is an Art.  D.E.Knuth. */
      57             : /* PS: Of course written with Dijkstra's lessons firmly in mind... --GN */
      58             : 
      59             : /* at least one of a or b is odd, return gcd(a,b) */
      60             : INLINE ulong
      61   196835096 : mygcduodd(ulong a, ulong b)
      62             : {
      63             :   ulong c;
      64   196835096 :   if (b&1)
      65             :   {
      66   105367120 :     if (a==1 || b==1)
      67    21169839 :       c = 1;
      68             :     else
      69    84197281 :      c = gcduodd(a, b);
      70             :   }
      71             :   else
      72             :   {
      73    91467976 :     if (a==1)
      74    27934861 :       c = 1;
      75             :     else
      76    63533115 :       c = gcduodd(b, a);
      77             :   }
      78   196681529 :   return c;
      79             : }
      80             : 
      81             : /* modified right shift binary algorithm with at most one division */
      82             : ulong
      83   181833603 : ugcd(ulong a,ulong b)
      84             : {
      85             :   long v;
      86   181833603 :   if (!b) return a;
      87   179515644 :   if (!a) return b;
      88   168696959 :   if (a>b) { a %= b; if (!a) return b; }
      89    67997346 :   else     { b %= a; if (!b) return a; }
      90   118937519 :   v = vals(a|b);
      91   118982544 :   return mygcduodd(a>>v, b>>v) << v;
      92             : }
      93             : long
      94     6396762 : cgcd(long a,long b) { return (long)ugcd(labs(a), labs(b)); }
      95             : 
      96             : /* For gcdii(): assume a>b>0, return gcd(a,b) as a GEN */
      97             : static GEN
      98   181272687 : igcduu(ulong a, ulong b)
      99             : {
     100             :   long v;
     101   181272687 :   a %= b; if (!a) return utoipos(b);
     102    77541460 :   v = vals(a|b);
     103    77541470 :   return utoipos( mygcduodd(a>>v, b>>v) << v );
     104             : }
     105             : 
     106             : /*Warning: overflows silently if lcm does not fit*/
     107             : ulong
     108     2182312 : ulcm(ulong a, ulong b)
     109             : {
     110     2182312 :   ulong d = ugcd(a,b);
     111     2182304 :   if (!d) return 0;
     112     2182304 :   return d == 1? a*b: a*(b/d);
     113             : }
     114             : long
     115           0 : clcm(long a,long b) { return ulcm(labs(a), labs(b)); }
     116             : 
     117             : /********************************************************************/
     118             : /**                                                                **/
     119             : /**               INTEGER EXTENDED GCD  (AND INVMOD)               **/
     120             : /**                                                                **/
     121             : /********************************************************************/
     122             : /* Two basic ideas - (1) avoid many integer divisions, especially when the
     123             :  * quotient is 1 which happens ~ 40% of the time.  (2) Use Lehmer's trick as
     124             :  * modified by Jebelean of extracting a couple of words' worth of leading bits
     125             :  * from both operands, and compute partial quotients from them as long as we
     126             :  * can be sure of their values.  Jebelean's modifications consist in
     127             :  * inequalities from which we can quickly decide whether to carry on or to
     128             :  * return to the outer loop, and in re-shifting after the first word's worth of
     129             :  * bits has been used up.  All of this is described in R. Lercier's thesis
     130             :  * [pp148-153 & 163f.], except his outer loop isn't quite right: the catch-up
     131             :  * divisions needed when one partial quotient is larger than a word are missing.
     132             :  *
     133             :  * The API consists of invmod() and bezout() below; the single-word routines
     134             :  * xgcduu and xxgcduu may be called directly if desired; lgcdii() probably
     135             :  * doesn't make much sense out of context.
     136             :  *
     137             :  * The whole lot is a factor 6 .. 8 faster on word-sized operands, and asym-
     138             :  * ptotically about a factor 2.5 .. 3, depending on processor architecture,
     139             :  * than the naive continued-division code.  Unfortunately, thanks to the
     140             :  * unrolled loops and all, the code is lengthy. */
     141             : 
     142             : /*==================================
     143             :  * xgcduu(d,d1,f,v,v1,s)
     144             :  * xxgcduu(d,d1,f,u,u1,v,v1,s)
     145             :  * rgcduu(d,d1,vmax,u,u1,v,v1,s)
     146             :  *==================================*/
     147             : /*
     148             :  * Fast `final' extended gcd algorithm, acting on two ulongs.  Ideally this
     149             :  * should be replaced with assembler versions wherever possible.  The present
     150             :  * code essentially does `subtract, compare, and possibly divide' at each step,
     151             :  * which is reasonable when hardware division (a) exists, (b) is a bit slowish
     152             :  * and (c) does not depend a lot on the operand values (as on i486).  When
     153             :  * wordsize division is in fact an assembler routine based on subtraction,
     154             :  * this strategy may not be the most efficient one.
     155             :  *
     156             :  * xxgcduu() should be called with  d > d1 > 0, returns gcd(d,d1), and assigns
     157             :  * the usual signless cont.frac. recurrence matrix to [u, u1; v, v1]  (i.e.,
     158             :  * the product of all the [0, 1; 1 q_j] where the leftmost factor arises from
     159             :  * the quotient of the first division step),  and the information about the
     160             :  * implied signs to s  (-1 when an odd number of divisions has been done,
     161             :  * 1 otherwise).  xgcduu() is exactly the same except that u,u1 are not com-
     162             :  * puted (and not returned, of course).
     163             :  *
     164             :  * The input flag f should be set to 1 if we know in advance that gcd(d,d1)==1
     165             :  * (so we can stop the chain division one step early:  as soon as the remainder
     166             :  * equals 1).  Use this when you intend to use only what would be v, or only
     167             :  * what would be u and v, after that final division step, but not u1 and v1.
     168             :  * With the flag in force and thus without that final step, the interesting
     169             :  * quantity/ies will still sit in [u1 and] v1, of course.
     170             :  *
     171             :  * For computing the inverse of a single-word INTMOD known to exist, pass f=1
     172             :  * to xgcduu(), and obtain the result from s and v1.  (The routine does the
     173             :  * right thing when d1==1 already.)  For finishing a multiword modinv known
     174             :  * to exist, pass f=1 to xxgcduu(), and multiply the returned matrix  (with
     175             :  * properly adjusted signs)  onto the values v' and v1' previously obtained
     176             :  * from the multiword division steps.  Actually, just take the scalar product
     177             :  * of [v',v1'] with [u1,-v1], and change the sign if s==-1.  (If the final
     178             :  * step had been carried out, it would be [-u,v], and s would also change.)
     179             :  * For reducing a rational number to lowest terms, pass f=0 to xgcduu().
     180             :  * Finally, f=0 with xxgcduu() is useful for Bezout computations.
     181             :  * (It is safe for invmod() to call xgcduu() with f=1, because f&1 doesn't
     182             :  * make a difference when gcd(d,d1)>1.  The speedup is negligible.)
     183             :  *
     184             :  * In principle, when gcd(d,d1) is known to be 1, it is straightforward to
     185             :  * recover the final u,u1 given only v,v1 and s.  However, it probably isn't
     186             :  * worthwhile, as it trades a few multiplications for a division.
     187             :  *
     188             :  * rgcduu() is a variant of xxgcduu() which does not have f  (the effect is
     189             :  * that of f=0),  but instead has a ulong vmax parameter, for use in rational
     190             :  * reconstruction below.  It returns when v1 exceeds vmax;  v will never
     191             :  * exceed vmax.  (vmax=0 is taken as a synonym of ULONG_MAX i.e. unlimited,
     192             :  * in which case rgcduu behaves exactly like xxgcduu with f=0.)  The return
     193             :  * value of rgcduu() is typically meaningless;  the interesting part is the
     194             :  * matrix. */
     195             : 
     196             : ulong
     197   214766558 : xgcduu(ulong d, ulong d1, int f, ulong* v, ulong* v1, long *s)
     198             : {
     199             :   ulong xv,xv1, xs, q,res;
     200             :   LOCAL_HIREMAINDER;
     201             : 
     202             :   /* The above blurb contained a lie.  The main loop always stops when d1
     203             :    * has become equal to 1.  If (d1 == 1 && !(f&1)) after the loop, we do
     204             :    * the final `division' of d by 1 `by hand' as it were.
     205             :    *
     206             :    * The loop has already been unrolled once.  Aggressive optimization could
     207             :    * well lead to a totally unrolled assembler version.
     208             :    *
     209             :    * On modern x86 architectures, this loop is a pig anyway.  The division
     210             :    * instruction always puts its result into the same pair of registers, and
     211             :    * we always want to use one of them straight away, so pipeline performance
     212             :    * will suck big time.  An assembler version should probably do a first loop
     213             :    * computing and storing all the quotients -- their number is bounded in
     214             :    * advance -- and then assembling the matrix in a second pass.  On other
     215             :    * architectures where we can cycle through four or so groups of registers
     216             :    * and exploit a fast ALU result-to-operand feedback path, this is much less
     217             :    * of an issue. */
     218   214766558 :   xs = res = 0;
     219   214766558 :   xv = 0UL; xv1 = 1UL;
     220  1463450517 :   while (d1 > 1UL)
     221             :   {
     222  1141542299 :     d -= d1; /* no need to use subll */
     223  1141542299 :     if (d >= d1)
     224             :     {
     225   626258261 :       hiremainder = 0; q = 1 + divll(d,d1); d = hiremainder;
     226   626258261 :       xv += q * xv1;
     227             :     }
     228             :     else
     229   515284038 :       xv += xv1;
     230  1141542299 :     if (d <= 1UL) { xs=1; break; } /* possible loop exit */
     231             :     /* repeat with inverted roles */
     232  1033917401 :     d1 -= d;
     233  1033917401 :     if (d1 >= d)
     234             :     {
     235   588362177 :       hiremainder = 0; q = 1 + divll(d1,d); d1 = hiremainder;
     236   588362177 :       xv1 += q * xv;
     237             :     }
     238             :     else
     239   445555224 :       xv1 += xv;
     240             :   }
     241             : 
     242   214766558 :   if (!(f&1))
     243             :   { /* division by 1 postprocessing if needed */
     244     3824830 :     if (xs && d==1)
     245      583343 :     { xv1 += d1 * xv; xs = 0; res = 1UL; }
     246     3241487 :     else if (!xs && d1==1)
     247      439672 :     { xv += d * xv1; xs = 1; res = 1UL; }
     248             :   }
     249             : 
     250   214766558 :   if (xs)
     251             :   {
     252   112947864 :     *s = -1; *v = xv1; *v1 = xv;
     253   112947864 :     return (res ? res : (d==1 ? 1UL : d1));
     254             :   }
     255             :   else
     256             :   {
     257   101818694 :     *s = 1; *v = xv; *v1 = xv1;
     258   101818694 :     return (res ? res : (d1==1 ? 1UL : d));
     259             :   }
     260             : }
     261             : 
     262             : ulong
     263   169510130 : xxgcduu(ulong d, ulong d1, int f,
     264             :         ulong* u, ulong* u1, ulong* v, ulong* v1, long *s)
     265             : {
     266             :   ulong xu,xu1, xv,xv1, xs, q,res;
     267             :   LOCAL_HIREMAINDER;
     268             : 
     269   169510130 :   xs = res = 0;
     270   169510130 :   xu = xv1 = 1UL;
     271   169510130 :   xu1 = xv = 0UL;
     272   510583600 :   while (d1 > 1UL)
     273             :   {
     274             :     /* no need to use subll */
     275   255875380 :     d -= d1;
     276   255875380 :     if (d >= d1)
     277             :     {
     278   148006630 :       hiremainder = 0; q = 1 + divll(d,d1); d = hiremainder;
     279   148006630 :       xv += q * xv1;
     280   148006630 :       xu += q * xu1;
     281             :     }
     282             :     else
     283   107868750 :     { xv += xv1; xu += xu1; }
     284   255875380 :     if (d <= 1UL) { xs=1; break; } /* possible loop exit */
     285             :     /* repeat with inverted roles */
     286   171563340 :     d1 -= d;
     287   171563340 :     if (d1 >= d)
     288             :     {
     289    94121458 :       hiremainder = 0; q = 1 + divll(d1,d); d1 = hiremainder;
     290    94121458 :       xv1 += q * xv;
     291    94121458 :       xu1 += q * xu;
     292             :     }
     293             :     else
     294    77441882 :     { xv1 += xv; xu1 += xu; }
     295             :   }
     296             : 
     297   169510130 :   if (!(f&1))
     298             :   { /* division by 1 postprocessing if needed */
     299   167176643 :     if (xs && d==1)
     300             :     {
     301    48075600 :       xv1 += d1 * xv;
     302    48075600 :       xu1 += d1 * xu;
     303    48075600 :       xs = 0; res = 1UL;
     304             :     }
     305   119101043 :     else if (!xs && d1==1)
     306             :     {
     307    73628870 :       xv += d * xv1;
     308    73628870 :       xu += d * xu1;
     309    73628870 :       xs = 1; res = 1UL;
     310             :     }
     311             :   }
     312             : 
     313   169510130 :   if (xs)
     314             :   {
     315   109865586 :     *s = -1; *u = xu1; *u1 = xu; *v = xv1; *v1 = xv;
     316   109865586 :     return (res ? res : (d==1 ? 1UL : d1));
     317             :   }
     318             :   else
     319             :   {
     320    59644544 :     *s = 1; *u = xu; *u1 = xu1; *v = xv; *v1 = xv1;
     321    59644544 :     return (res ? res : (d1==1 ? 1UL : d));
     322             :   }
     323             : }
     324             : 
     325             : ulong
     326      773583 : rgcduu(ulong d, ulong d1, ulong vmax,
     327             :        ulong* u, ulong* u1, ulong* v, ulong* v1, long *s)
     328             : {
     329      773583 :   ulong xu,xu1, xv,xv1, xs, q, res=0;
     330      773583 :   int f = 0;
     331             :   LOCAL_HIREMAINDER;
     332             : 
     333      773583 :   if (vmax == 0) vmax = ULONG_MAX;
     334      773583 :   xs = res = 0;
     335      773583 :   xu = xv1 = 1UL;
     336      773583 :   xu1 = xv = 0UL;
     337     3510273 :   while (d1 > 1UL)
     338             :   {
     339     2607388 :     d -= d1; /* no need to use subll */
     340     2607388 :     if (d >= d1)
     341             :     {
     342     1444078 :       hiremainder = 0; q = 1 + divll(d,d1); d = hiremainder;
     343     1444078 :       xv += q * xv1;
     344     1444078 :       xu += q * xu1;
     345             :     }
     346             :     else
     347     1163310 :     { xv += xv1; xu += xu1; }
     348             :     /* possible loop exit */
     349     2607388 :     if (xv > vmax) { f=xs=1; break; }
     350     2347832 :     if (d <= 1UL) { xs=1; break; }
     351             :     /* repeat with inverted roles */
     352     2233962 :     d1 -= d;
     353     2233962 :     if (d1 >= d)
     354             :     {
     355     1324020 :       hiremainder = 0; q = 1 + divll(d1,d); d1 = hiremainder;
     356     1324020 :       xv1 += q * xv;
     357     1324020 :       xu1 += q * xu;
     358             :     }
     359             :     else
     360      909942 :     { xv1 += xv; xu1 += xu; }
     361             :     /* possible loop exit */
     362     2233962 :     if (xv1 > vmax) { f=1; break; }
     363             :   }
     364             : 
     365      773583 :   if (!(f&1))
     366             :   { /* division by 1 postprocessing if needed */
     367      243172 :     if (xs && d==1)
     368             :     {
     369      113870 :       xv1 += d1 * xv;
     370      113870 :       xu1 += d1 * xu;
     371      113870 :       xs = 0; res = 1UL;
     372             :     }
     373      129302 :     else if (!xs && d1==1)
     374             :     {
     375      129302 :       xv += d * xv1;
     376      129302 :       xu += d * xu1;
     377      129302 :       xs = 1; res = 1UL;
     378             :     }
     379             :   }
     380             : 
     381      773583 :   if (xs)
     382             :   {
     383      388858 :     *s = -1; *u = xu1; *u1 = xu; *v = xv1; *v1 = xv;
     384      388858 :     return (res ? res : (d==1 ? 1UL : d1));
     385             :   }
     386             :   else
     387             :   {
     388      384725 :     *s = 1; *u = xu; *u1 = xu1; *v = xv; *v1 = xv1;
     389      384725 :     return (res ? res : (d1==1 ? 1UL : d));
     390             :   }
     391             : }
     392             : 
     393             : /*==================================
     394             :  * cbezout(a,b,uu,vv)
     395             :  *==================================
     396             :  * Same as bezout() but for C longs.
     397             :  *    Return g = gcd(a,b) >= 0, and assign longs u,v through pointers uu,vv
     398             :  *    such that g = u*a + v*b.
     399             :  * Special cases:
     400             :  *    a = b = 0 ==> pick u=1, v=0 (and return 1, surprisingly)
     401             :  *    a != 0 = b ==> keep v=0
     402             :  *    a = 0 != b ==> keep u=0
     403             :  *    |a| = |b| != 0 ==> keep u=0, set v=+-1
     404             :  * Assignments through uu,vv happen unconditionally. */
     405             : long
     406      266190 : cbezout(long a,long b,long *uu,long *vv)
     407             : {
     408             :   long s,*t;
     409      266190 :   ulong d = labs(a), d1 = labs(b);
     410             :   ulong r,u,u1,v,v1;
     411             : 
     412      266190 :   if (!b)
     413             :   {
     414         994 :     *vv=0L;
     415         994 :     if (!a) { *uu=1L; return 0L; }
     416         994 :     *uu = a < 0 ? -1L : 1L;
     417         994 :     return (long)d;
     418             :   }
     419      265196 :   else if (!a || (d == d1))
     420             :   {
     421       48426 :     *uu = 0L; *vv = b < 0 ? -1L : 1L;
     422       48426 :     return (long)d1;
     423             :   }
     424      216770 :   else if (d == 1) /* frequently used by nfinit */
     425             :   {
     426      116621 :     *uu = a; *vv = 0L;
     427      116621 :     return 1L;
     428             :   }
     429      100149 :   else if (d < d1)
     430             :   {
     431             : /* bug in gcc-2.95.3:
     432             :  * s = a; a = b; b = s; produces wrong result a = b. This is OK:  */
     433       89984 :     { long _x = a; a = b; b = _x; } /* in order to keep the right signs */
     434       89984 :     r = d; d = d1; d1 = r;
     435       89984 :     t = uu; uu = vv; vv = t;
     436             :   }
     437             :   /* d > d1 > 0 */
     438      100149 :   r = xxgcduu(d, d1, 0, &u, &u1, &v, &v1, &s);
     439      100149 :   if (s < 0)
     440             :   {
     441       57323 :     *uu = a < 0 ? (long)u : -(long)u;
     442       57323 :     *vv = b < 0 ? -(long)v : (long)v;
     443             :   }
     444             :   else
     445             :   {
     446       42826 :     *uu = a < 0 ? -(long)u : (long)u;
     447       42826 :     *vv = b < 0 ? (long)v : -(long)v;
     448             :   }
     449      100149 :   return (long)r;
     450             : }
     451             : 
     452             : /*==================================
     453             :  * lgcdii(d,d1,u,u1,v,v1,vmax)
     454             :  *==================================*/
     455             : /* Lehmer's partial extended gcd algorithm, acting on two t_INT GENs.
     456             :  *
     457             :  * Tries to determine, using the leading 2*BITS_IN_LONG significant bits of d
     458             :  * and a quantity of bits from d1 obtained by a shift of the same displacement,
     459             :  * as many partial quotients of d/d1 as possible, and assigns to [u,u1;v,v1]
     460             :  * the product of all the [0,1; 1,qj] thus obtained, where the leftmost
     461             :  * factor arises from the quotient of the first division step.
     462             :  *
     463             :  * For use in rational reconstruction, vmax can be given a nonzero value.
     464             :  * In this case, we will return early as soon as v1 > vmax (i.e. v <= vmax)
     465             :  *
     466             :  * MUST be called with  d > d1 > 0, and with  d occupying more than one
     467             :  * significant word.  Returns the number of reduction/swap steps carried out,
     468             :  * possibly zero, or under certain conditions minus that number.  When the
     469             :  * return value is nonzero, the caller should use the returned recurrence
     470             :  * matrix to update its own copies of d,d1.  When the return value is
     471             :  * non-positive, and the latest remainder after updating turns out to be
     472             :  * nonzero, the caller should at once attempt a full division, rather than
     473             :  * trying lgcdii() again -- this typically happens when we are about to
     474             :  * encounter a quotient larger than half a word. (This is not detected
     475             :  * infallibly -- after a positive return value, it is possible that the next
     476             :  * stage will end up needing a full division.  After a negative return value,
     477             :  * however, this is certain, and should be acted upon.)
     478             :  *
     479             :  * The sign information, for which xgcduu() has its return argument s, is now
     480             :  * implicit in the LSB of our return value, and the caller may take advantage
     481             :  * of the fact that a return value of +-1 implies u==0,u1==v==1  [only v1 pro-
     482             :  * vides interesting information in this case].  One might also use the fact
     483             :  * that if the return value is +-2, then u==1, but this is rather marginal.
     484             :  *
     485             :  * If it was not possible to determine even the first quotient, either because
     486             :  * we're too close to an integer quotient or because the quotient would be
     487             :  * larger than one word  (if the `leading digit' of d1 after shifting is all
     488             :  * zeros), we return 0 and do not assign anything to the last four args.
     489             :  *
     490             :  * The division chain might even run to completion. It is up to the caller to
     491             :  * detect this case. This routine does not change d or d1; this is also up to
     492             :  * the caller */
     493             : int
     494     7254897 : lgcdii(ulong* d, ulong* d1, ulong* u, ulong* u1, ulong* v, ulong* v1,
     495             :        ulong vmax)
     496             : {
     497             :   /* Strategy:  (1) Extract/shift most significant bits.  We assume that d
     498             :    * has at least two significant words, but we can cope with a one-word d1.
     499             :    * Let dd,dd1 be the most significant dividend word and matching part of the
     500             :    * divisor.
     501             :    * (2) Check for overflow on the first division.  For our purposes, this
     502             :    * happens when the upper half of dd1 is zero.  (Actually this is detected
     503             :    * during extraction.)
     504             :    * (3) Get a fix on the first quotient.  We compute q = floor(dd/dd1), which
     505             :    * is an upper bound for floor(d/d1), and which gives the true value of the
     506             :    * latter if (and-almost-only-if) the remainder dd' = dd-q*dd1 is >= q.
     507             :    * (If it isn't, we give up.  This is annoying because the subsequent full
     508             :    * division will repeat some work already done, but it happens infrequently.
     509             :    * Doing the extra-bit-fetch in this case would be awkward.)
     510             :    * (4) Finish initializations.
     511             :    *
     512             :    * The remainder of the action is comparatively boring... The main loop has
     513             :    * been unrolled once (so we don't swap things and we can apply Jebelean's
     514             :    * termination conditions which alternatingly take two different forms during
     515             :    * successive iterations).  When we first run out of sufficient bits to form
     516             :    * a quotient, and have an extra word of each operand, we pull out two whole
     517             :    * word's worth of dividend bits, and divisor bits of matching significance;
     518             :    * to these we apply our partial matrix (disregarding overflow because the
     519             :    * result mod 2^(2*BITS_IN_LONG) will in fact give the correct values), and
     520             :    * re-extract one word's worth of the current dividend and a matching amount
     521             :    * of divisor bits.  The affair will normally terminate with matrix entries
     522             :    * just short of a whole word.  (We terminate the inner loop before these can
     523             :    * possibly overflow.) */
     524             :   ulong dd,dd1,ddlo,dd1lo, sh,shc; /* `digits', shift count */
     525             :   ulong xu,xu1, xv,xv1, q,res; /* recurrences, partial quotient, count */
     526             :   ulong tmp0,tmp1,tmp2,tmpd,tmpu,tmpv; /* temps */
     527             :   ulong dm1, d1m1;
     528             :   long ld, ld1, lz;
     529     7254897 :   int skip = 0;
     530             :   LOCAL_OVERFLOW;
     531             :   LOCAL_HIREMAINDER;
     532             : 
     533             :   /* following is just for convenience: vmax==0 means no bound */
     534     7254897 :   if (vmax == 0) vmax = ULONG_MAX;
     535     7254897 :   ld = lgefint(d); ld1 = lgefint(d1); lz = ld - ld1; /* >= 0 */
     536     7254897 :   if (lz > 1) return 0; /* rare */
     537             : 
     538     7208921 :   d = int_MSW(d);  dm1  = *int_precW(d);
     539     7208921 :   d1 = int_MSW(d1);d1m1 = *int_precW(d1);
     540     7208921 :   dd1lo = 0; /* unless we find something better */
     541     7208921 :   sh = bfffo(*d);
     542             : 
     543     7208921 :   if (sh)
     544             :   { /* do the shifting */
     545     6925082 :     shc = BITS_IN_LONG - sh;
     546     6925082 :     if (lz)
     547             :     { /* dividend longer than divisor */
     548     1428408 :       dd1 = (*d1 >> shc);
     549     1428408 :       if (!(HIGHMASK & dd1)) return 0; /* overflow detected */
     550      967719 :       if (ld1 > 3)
     551       96611 :         dd1lo = (*d1 << sh) + (d1m1 >> shc);
     552             :       else
     553      871108 :         dd1lo = (*d1 << sh);
     554             :     }
     555             :     else
     556             :     { /* dividend and divisor have the same length */
     557     5496674 :       dd1 = (*d1 << sh);
     558     5496674 :       if (!(HIGHMASK & dd1)) return 0;
     559     5474690 :       if (ld1 > 3)
     560             :       {
     561     5474691 :         dd1 += (d1m1 >> shc);
     562     5474691 :         if (ld1 > 4)
     563     2982369 :           dd1lo = (d1m1 << sh) + (*int_precW(int_precW(d1)) >> shc);
     564             :         else
     565     2492322 :           dd1lo = (d1m1 << sh);
     566             :       }
     567             :     }
     568             :     /* following lines assume d to have 2 or more significant words */
     569     6442409 :     dd = (*d << sh) + (dm1 >> shc);
     570     6442409 :     if (ld > 4)
     571     3078977 :       ddlo = (dm1 << sh) + (*int_precW(int_precW(d)) >> shc);
     572             :     else
     573     3363432 :       ddlo = (dm1 << sh);
     574             :   }
     575             :   else
     576             :   { /* no shift needed */
     577      283839 :     if (lz) return 0; /* dividend longer than divisor: overflow */
     578      282294 :     dd1 = *d1;
     579      282294 :     if (!(HIGHMASK & dd1)) return 0;
     580      281537 :     if(ld1 > 3) dd1lo = d1m1;
     581             :     /* assume again that d has another significant word */
     582      281537 :     dd = *d; ddlo = dm1;
     583             :   }
     584             :   /* First subtraction/division stage.  (If a subtraction initially suffices,
     585             :    * we don't divide at all.)  If a Jebelean condition is violated, and we
     586             :    * can't fix it even by looking at the low-order bits in ddlo,dd1lo, we
     587             :    * give up and ask for a full division.  Otherwise we commit the result,
     588             :    * possibly deciding to re-shift immediately afterwards. */
     589     6723946 :   dd -= dd1;
     590     6723946 :   if (dd < dd1)
     591             :   { /* first quotient known to be == 1 */
     592     2064754 :     xv1 = 1UL;
     593     2064754 :     if (!dd) /* !(Jebelean condition), extraspecial case */
     594             :     { /* This actually happens. Now q==1 is known, but we underflow already.
     595             :        * OTOH we've just shortened d by a whole word. Thus we are happy and
     596             :        * return. */
     597      188844 :       *u = 0; *v = *u1 = *v1 = 1UL;
     598      188844 :       return -1; /* Next step will be a full division. */
     599             :     }
     600             :   }
     601             :   else
     602             :   { /* division indicated */
     603     4659192 :     hiremainder = 0;
     604     4659192 :     xv1 = 1 + divll(dd, dd1); /* xv1: alternative spelling of `q', here ;) */
     605     4659192 :     dd = hiremainder;
     606     4659192 :     if (dd < xv1) /* !(Jebelean cond'), non-extra special case */
     607             :     { /* Attempt to complete the division using the less significant bits,
     608             :        * before skipping right past the 1st loop to the reshift stage. */
     609      413273 :       ddlo = subll(ddlo, mulll(xv1, dd1lo));
     610      413273 :       dd = subllx(dd, hiremainder);
     611             : 
     612             :       /* If we now have an overflow, q was too large. Thanks to our decision
     613             :        * not to get here unless the original dd1 had bits set in the upper half
     614             :        * of the word, we now know that the correct quotient is in fact q-1. */
     615      413273 :       if (overflow)
     616             :       {
     617      106382 :         xv1--;
     618      106382 :         ddlo = addll(ddlo,dd1lo);
     619      106382 :         dd = addllx(dd,dd1); /* overflows again which cancels the borrow */
     620             :         /* ...and fall through to skip=1 below */
     621             :       }
     622             :       else
     623             :       /* Test Jebelean condition anew, at this point using _all_ the extracted
     624             :        * bits we have.  This is clutching at straws; we have a more or less
     625             :        * even chance of succeeding this time.  Note that if we fail, we really
     626             :        * do not know whether the correct quotient would have been q or some
     627             :        * smaller value. */
     628      306891 :         if (!dd && ddlo < xv1) return 0;
     629             : 
     630             :       /* Otherwise, we now know that q is correct, but we cannot go into the
     631             :        * 1st loop.  Raise a flag so we'll remember to skip past the loop.
     632             :        * Get here also after the q-1 adjustment case. */
     633      217174 :       skip = 1;
     634             :     } /* if !(Jebelean) then */
     635             :   }
     636     6339003 :   res = 1;
     637     6339003 :   if (xv1 > vmax)
     638             :   { /* gone past the bound already */
     639       32289 :     *u = 0UL; *u1 = 1UL; *v = 1UL; *v1 = xv1;
     640       32289 :     return res;
     641             :   }
     642     6306714 :   xu = 0UL; xv = xu1 = 1UL;
     643             : 
     644             :   /* Some invariants from here across the first loop:
     645             :    *
     646             :    * At this point, and again after we are finished with the first loop and
     647             :    * subsequent conditional, a division and the attached update of the
     648             :    * recurrence matrix have just been carried out completely.  The matrix
     649             :    * xu,xu1;xv,xv1 has been initialized (or updated, possibly with permuted
     650             :    * columns), and the current remainder == next divisor (dd at the moment)
     651             :    * is nonzero (it might be zero here, but then skip will have been set).
     652             :    *
     653             :    * After the first loop, or when skip is set already, it will also be the
     654             :    * case that there aren't sufficiently many bits to continue without re-
     655             :    * shifting.  If the divisor after reshifting is zero, or indeed if it
     656             :    * doesn't have more than half a word's worth of bits, we will have to
     657             :    * return at that point.  Otherwise, we proceed into the second loop.
     658             :    *
     659             :    * Furthermore, when we reach the re-shift stage, dd:ddlo and dd1:dd1lo will
     660             :    * already reflect the result of applying the current matrix to the old
     661             :    * ddorig:ddlo and dd1orig:dd1lo.  (For the first iteration above, this
     662             :    * was easy to achieve, and we didn't even need to peek into the (now
     663             :    * no longer existent!) saved words.  After the loop, we'll stop for a
     664             :    * moment to merge in the ddlo,dd1lo contributions.)
     665             :    *
     666             :    * Note that after the 1st division, even an a priori quotient of 1 cannot be
     667             :    * trusted until we've checked Jebelean's condition: it might be too small */
     668     6306714 :   if (!skip)
     669             :   {
     670             :     for(;;)
     671             :     { /* First half of loop divides dd into dd1, and leaves the recurrence
     672             :        * matrix xu,...,xv1 groomed the wrong way round (xu,xv will be the newer
     673             :        * entries) when successful. */
     674    89660591 :       tmpd = dd1 - dd;
     675    47875888 :       if (tmpd < dd)
     676             :       { /* quotient suspected to be 1 */
     677    20193766 :         tmpu = xu + xu1; /* cannot overflow -- everything bounded by
     678             :                           * the original dd during first loop */
     679    20193766 :         tmpv = xv + xv1;
     680             :       }
     681             :       else
     682             :       { /* division indicated */
     683    27682122 :         hiremainder = 0;
     684    27682122 :         q = 1 + divll(tmpd, dd);
     685    27682122 :         tmpd = hiremainder;
     686    27682122 :         tmpu = xu + q*xu1; /* can't overflow, but may need to be undone */
     687    27682122 :         tmpv = xv + q*xv1;
     688             :       }
     689             : 
     690    47875888 :       tmp0 = addll(tmpv, xv1);
     691    94576105 :       if ((tmpd < tmpu) || overflow ||
     692    46700217 :           (dd - tmpd < tmp0)) /* !(Jebelean cond.) */
     693             :         break; /* skip ahead to reshift stage */
     694             :       else
     695             :       { /* commit dd1, xu, xv */
     696    44933557 :         res++;
     697    44933557 :         dd1 = tmpd; xu = tmpu; xv = tmpv;
     698    44933557 :         if (xv > vmax) { *u = xu1; *u1 = xu; *v = xv1; *v1 = xv; return res; }
     699             :       }
     700             : 
     701             :       /* Second half of loop divides dd1 into dd, and the matrix returns to its
     702             :        * normal arrangement. */
     703    44802344 :       tmpd = dd - dd1;
     704    44802344 :       if (tmpd < dd1)
     705             :       { /* quotient suspected to be 1 */
     706    17975274 :         tmpu = xu1 + xu; /* cannot overflow */
     707    17975274 :         tmpv = xv1 + xv;
     708             :       }
     709             :       else
     710             :       { /* division indicated */
     711    26827070 :         hiremainder = 0;
     712    26827070 :         q = 1 + divll(tmpd, dd1);
     713    26827070 :         tmpd = hiremainder;
     714    26827070 :         tmpu = xu1 + q*xu;
     715    26827070 :         tmpv = xv1 + q*xv;
     716             :       }
     717             : 
     718    44802344 :       tmp0 = addll(tmpu, xu);
     719    87022057 :       if ((tmpd < tmpv) || overflow ||
     720    42219713 :           (dd1 - tmpd < tmp0)) /* !(Jebelean cond.) */
     721             :         break; /* skip ahead to reshift stage */
     722             :       else
     723             :       { /* commit dd, xu1, xv1 */
     724    41872883 :         res++;
     725    41872883 :         dd = tmpd; xu1 = tmpu; xv1 = tmpv;
     726    41872883 :         if (xv1 > vmax) { *u = xu; *u1 = xu1; *v = xv; *v1 = xv1; return res; }
     727             :       }
     728             :     } /* end of first loop */
     729             : 
     730             :     /* Intermezzo: update dd:ddlo, dd1:dd1lo.  (But not if skip is set.) */
     731     5871792 :     if (res&1)
     732             :     { /* after failed division in 1st half of loop:
     733             :        * [dd1:dd1lo,dd:ddlo] = [ddorig:ddlo,dd1orig:dd1lo]
     734             :        *                       * [ -xu, xu1 ; xv, -xv1 ]
     735             :        * Actually, we only multiply [ddlo,dd1lo] onto the matrix and add the
     736             :        * high-order remainders + overflows onto [dd1,dd] */
     737     2942331 :       tmp1 = mulll(ddlo, xu); tmp0 = hiremainder;
     738     2942331 :       tmp1 = subll(mulll(dd1lo,xv), tmp1);
     739     2942331 :       dd1 += subllx(hiremainder, tmp0);
     740     2942331 :       tmp2 = mulll(ddlo, xu1); tmp0 = hiremainder;
     741     2942331 :       ddlo = subll(tmp2, mulll(dd1lo,xv1));
     742     2942331 :       dd += subllx(tmp0, hiremainder);
     743     2942331 :       dd1lo = tmp1;
     744             :     }
     745             :     else
     746             :     { /* after failed division in 2nd half of loop:
     747             :        * [dd:ddlo,dd1:dd1lo] = [ddorig:ddlo,dd1orig:dd1lo]
     748             :        *                       * [ xu1, -xu ; -xv1, xv ]
     749             :        * Actually, we only multiply [ddlo,dd1lo] onto the matrix and add the
     750             :        * high-order remainders + overflows onto [dd,dd1] */
     751     2929461 :       tmp1 = mulll(ddlo, xu1); tmp0 = hiremainder;
     752     2929461 :       tmp1 = subll(tmp1, mulll(dd1lo,xv1));
     753     2929461 :       dd += subllx(tmp0, hiremainder);
     754     2929461 :       tmp2 = mulll(ddlo, xu); tmp0 = hiremainder;
     755     2929461 :       dd1lo = subll(mulll(dd1lo,xv), tmp2);
     756     2929461 :       dd1 += subllx(hiremainder, tmp0);
     757     2929461 :       ddlo = tmp1;
     758             :     }
     759             :   } /* end of skip-pable section:  get here also, with res==1, when there
     760             :      * was a problem immediately after the very first division. */
     761             : 
     762             :   /* Re-shift.  Note: the shift count _can_ be zero, viz. under the following
     763             :    * precise conditions:  The original dd1 had its topmost bit set, so the 1st
     764             :    * q was 1, and after subtraction, dd had its topmost bit unset.  If now dd=0,
     765             :    * we'd have taken the return exit already, so we couldn't have got here.
     766             :    * If not, then it must have been the second division which has gone amiss
     767             :    * (because dd1 was very close to an exact multiple of the remainder dd value,
     768             :    * so this will be very rare).  At this point, we'd have a fairly slim chance
     769             :    * of fixing things by re-examining dd1:dd1lo vs. dd:ddlo, but this is not
     770             :    * guaranteed to work. Instead of trying, we return at once and let caller
     771             :    * see to it that the initial subtraction is re-done usingall the bits of
     772             :    * both operands, which already helps, and the next round will either be a
     773             :    * full division  (if dd occupied a halfword or less), or another llgcdii()
     774             :    * first step.  In the latter case, since we try a little harder during our
     775             :    * first step, we may actually be able to fix the problem, and get here again
     776             :    * with improved low-order bits and with another step under our belt.
     777             :    * Otherwise we'll have given up above and forced a full division.
     778             :    *
     779             :    * If res is even, the shift count _cannot_ be zero.  (The first step forces
     780             :    * a zero into the remainder's MSB, and all subsequent remainders will have
     781             :    * inherited it.)
     782             :    *
     783             :    * The re-shift stage exits if the next divisor has at most half a word's
     784             :    * worth of bits.
     785             :    *
     786             :    * For didactic reasons, the second loop will be arranged in the same way
     787             :    * as the first -- beginning with the division of dd into dd1, as if res
     788             :    * was odd.  To cater for this, if res is actually even, we swap things
     789             :    * around during reshifting.  (During the second loop, the parity of res
     790             :    * does not matter; we know in which half of the loop we are when we decide
     791             :    * to return.) */
     792     6087321 :   if (res&1)
     793             :   { /* after odd number of division(s) */
     794     3157860 :     if (dd1 && (sh = bfffo(dd1)))
     795             :     {
     796     3131549 :       shc = BITS_IN_LONG - sh;
     797     3131549 :       dd = (ddlo >> shc) + (dd << sh);
     798     3131549 :       if (!(HIGHMASK & dd))
     799             :       {
     800      146771 :         *u = xu; *u1 = xu1; *v = xv; *v1 = xv1;
     801      146771 :         return -res; /* full division asked for */
     802             :       }
     803     2984778 :       dd1 = (dd1lo >> shc) + (dd1 << sh);
     804             :     }
     805             :     else
     806             :     { /* time to return: <= 1 word left, or sh==0 */
     807       26311 :       *u = xu; *u1 = xu1; *v = xv; *v1 = xv1;
     808       26311 :       return res;
     809             :     }
     810             :   }
     811             :   else
     812             :   { /* after even number of divisions */
     813     2929461 :     if (dd)
     814             :     {
     815     2929461 :       sh = bfffo(dd); /* > 0 */
     816     2929461 :       shc = BITS_IN_LONG - sh;
     817             :       /* dd:ddlo will become the new dd1, and v.v. */
     818     2929461 :       tmpd = (ddlo >> shc) + (dd << sh);
     819     2929461 :       dd = (dd1lo >> shc) + (dd1 << sh);
     820     2929461 :       dd1 = tmpd;
     821             :       /* This completes the swap; now dd is again the current divisor */
     822     2929461 :       if (HIGHMASK & dd)
     823             :       { /* recurrence matrix is the wrong way round; swap it */
     824     2892393 :         tmp0 = xu; xu = xu1; xu1 = tmp0;
     825     2892393 :         tmp0 = xv; xv = xv1; xv1 = tmp0;
     826             :       }
     827             :       else
     828             :       { /* recurrence matrix is the wrong way round; fix this */
     829       37068 :         *u = xu1; *u1 = xu; *v = xv1; *v1 = xv;
     830       37068 :         return -res;                /* full division asked for */
     831             :       }
     832             :     }
     833             :     else
     834             :     { /* time to return: <= 1 word left */
     835           0 :       *u = xu1; *u1 = xu; *v = xv1; *v1 = xv;
     836           0 :       return res;
     837             :     }
     838             :   } /* end reshift */
     839             : 
     840             :   /* The Second Loop.  Rip-off of the first, but we now check for overflow
     841             :    * in the recurrences.  Returns instead of breaking when we cannot fix the
     842             :    * quotient any longer. */
     843             :   for(;;)
     844             :   { /* First half of loop divides dd into dd1, and leaves the recurrence
     845             :      * matrix xu,...,xv1 groomed the wrong way round (xu,xv will be the newer
     846             :      * entries) when successful */
     847    45595835 :     tmpd = dd1 - dd;
     848    25736503 :     if (tmpd < dd)
     849             :     { /* quotient suspected to be 1 */
     850     8815264 :       tmpu = xu + xu1;
     851     8815264 :       tmpv = addll(xv, xv1); /* xv,xv1 will overflow first */
     852     8815264 :       tmp1 = overflow;
     853             :     }
     854             :     else
     855             :     { /* division indicated */
     856    16921239 :       hiremainder = 0;
     857    16921239 :       q = 1 + divll(tmpd, dd);
     858    16921239 :       tmpd = hiremainder;
     859    16921239 :       tmpu = xu + q*xu1;
     860    16921239 :       tmpv = addll(xv, mulll(q,xv1));
     861    16921239 :       tmp1 = overflow | hiremainder;
     862             :     }
     863             : 
     864    25736503 :     tmp0 = addll(tmpv, xv1);
     865    49968522 :     if ((tmpd < tmpu) || overflow || tmp1 ||
     866    24232019 :         (dd - tmpd < tmp0)) /* !(Jebelean cond.) */
     867             :     { /* The recurrence matrix has not yet been warped... */
     868     3078747 :       *u = xu; *u1 = xu1; *v = xv; *v1 = xv1;
     869     3078747 :       break;
     870             :     }
     871             :     /* commit dd1, xu, xv */
     872    22657756 :     res++;
     873    22657756 :     dd1 = tmpd; xu = tmpu; xv = tmpv;
     874    22657756 :     if (xv > vmax) { *u = xu1; *u1 = xu; *v = xv1; *v1 = xv; return res; }
     875             : 
     876             :     /* Second half of loop divides dd1 into dd, and the matrix returns to its
     877             :      * normal arrangement */
     878    22574737 :     tmpd = dd - dd1;
     879    22574737 :     if (tmpd < dd1)
     880             :     { /* quotient suspected to be 1 */
     881     9599345 :       tmpu = xu1 + xu;
     882     9599345 :       tmpv = addll(xv1, xv);
     883     9599345 :       tmp1 = overflow;
     884             :     }
     885             :     else
     886             :     { /* division indicated */
     887    12975392 :       hiremainder = 0;
     888    12975392 :       q = 1 + divll(tmpd, dd1);
     889    12975392 :       tmpd = hiremainder;
     890    12975392 :       tmpu = xu1 + q*xu;
     891    12975392 :       tmpv = addll(xv1, mulll(q, xv));
     892    12975392 :       tmp1 = overflow | hiremainder;
     893             :     }
     894             : 
     895    22574737 :     tmp0 = addll(tmpu, xu);
     896    42852181 :     if ((tmpd < tmpv) || overflow || tmp1 ||
     897    20277444 :         (dd1 - tmpd < tmp0)) /* !(Jebelean cond.) */
     898             :     { /* The recurrence matrix has not yet been unwarped, so it is
     899             :        * the wrong way round;  fix this. */
     900     2654279 :       *u = xu1; *u1 = xu; *v = xv1; *v1 = xv;
     901     2654279 :       break;
     902             :     }
     903             : 
     904    19920458 :     res++; /* commit dd, xu1, xv1 */
     905    19920458 :     dd = tmpd; xu1 = tmpu; xv1 = tmpv;
     906    19920458 :     if (xv1 > vmax) { *u = xu; *u1 = xu1; *v = xv; *v1 = xv1; return res; }
     907             :   } /* end of second loop */
     908             : 
     909     5733026 :   return res;
     910             : }
     911             : 
     912             : /* 1 / Mod(x,p). Assume x < p */
     913             : ulong
     914   202145788 : Fl_invsafe(ulong x, ulong p)
     915             : {
     916             :   long s;
     917   202145788 :   ulong xv, xv1, g = xgcduu(p, x, 1, &xv, &xv1, &s);
     918   203816223 :   if (g != 1UL) return 0UL;
     919   203373768 :   xv = xv1 % p; if (s < 0) xv = p - xv;
     920   203373768 :   return xv;
     921             : }
     922             : 
     923             : /* result known to be representable as an ulong */
     924             : static ulong
     925     2211284 : lcmuu(ulong a, ulong b) { ulong d = ugcd(a,b); return (a/d) * b; }
     926             : /* assume 0 < x < N; return u in (Z/NZ)^* such that u x = gcd(x,N) (mod N);
     927             :  * set *pd = gcd(x,N) */
     928             : ulong
     929     3824830 : Fl_invgen(ulong x, ulong N, ulong *pd)
     930             : {
     931             :   ulong d, d0, e, v, v1;
     932             :   long s;
     933     3824830 :   *pd = d = xgcduu(N, x, 0, &v, &v1, &s);
     934     3824830 :   if (s > 0) v = N - v;
     935     3824830 :   if (d == 1) return v;
     936             :   /* vx = gcd(x,N) (mod N), v coprime to N/d but need not be coprime to N */
     937     2801815 :   e = N / d;
     938     2801815 :   d0 = u_ppo(d, e); /* d = d0 d1, d0 coprime to N/d, rad(d1) | N/d */
     939     2801815 :   if (d0 == 1) return v;
     940     2211284 :   e = lcmuu(e, d / d0);
     941     2211284 :   return u_chinese_coprime(v, 1, e, d0, e*d0);
     942             : }
     943             : 
     944             : /* 1 / Mod(x,p). Assume x < p */
     945             : ulong
     946   185325389 : Fl_inv(ulong x, ulong p)
     947             : {
     948   185325389 :   ulong xv  = Fl_invsafe(x, p);
     949   185987392 :   if (!xv && p!=1UL) pari_err_INV("Fl_inv", mkintmod(utoi(x), utoi(p)));
     950   185979210 :   return xv;
     951             : }

Generated by: LCOV version 1.13