Code coverage tests

This page documents the degree to which the PARI/GP source code is tested by our public test suite, distributed with the source distribution in directory src/test/. This is measured by the gcov utility; we then process gcov output using the lcov frond-end.

We test a few variants depending on Configure flags on the pari.math.u-bordeaux1.fr machine (x86_64 architecture), and agregate them in the final report:

The target is 90% coverage for all mathematical modules (given that branches depending on DEBUGLEVEL or DEBUGMEM are not covered). This script is run to produce the results below.

LCOV - code coverage report
Current view: top level - kernel/none - gcdll.c (source / functions) Hit Total Coverage
Test: PARI/GP v2.8.0 lcov report (development 17110-9967e23) Lines: 356 360 98.9 %
Date: 2014-11-26 Functions: 14 14 100.0 %
Legend: Lines: hit not hit | Branches: + taken - not taken # not executed Branches: 337 637 52.9 %

           Branch data     Line data    Source code
       1                 :            : #line 2 "../src/kernel/none/gcdll.c"
       2                 :            : /* Copyright (C) 2000  The PARI group.
       3                 :            : 
       4                 :            : This file is part of the PARI/GP package.
       5                 :            : 
       6                 :            : PARI/GP is free software; you can redistribute it and/or modify it under the
       7                 :            : terms of the GNU General Public License as published by the Free Software
       8                 :            : Foundation. It is distributed in the hope that it will be useful, but WITHOUT
       9                 :            : ANY WARRANTY WHATSOEVER.
      10                 :            : 
      11                 :            : Check the License for details. You should have received a copy of it, along
      12                 :            : with the package; see the file 'COPYING'. If not, write to the Free Software
      13                 :            : Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */
      14                 :            : 
      15                 :            : /***********************************************************************/
      16                 :            : /**                                                                   **/
      17                 :            : /**                          GCD                                      **/
      18                 :            : /**                                                                   **/
      19                 :            : /***********************************************************************/
      20                 :            : /* Fast ulong gcd.  Called with y odd; x can be arbitrary (but will most of
      21                 :            :  * the time be smaller than y). */
      22                 :            : 
      23                 :            : /* Gotos are Harmful, and Programming is a Science.  E.W.Dijkstra. */
      24                 :            : ulong
      25                 :   35285668 : gcduodd(ulong x, ulong y)         /* assume y&1==1, y > 1 */
      26                 :            : {
      27         [ -  + ]:   35285668 :   if (!x) return y;
      28                 :            :   /* fix up x */
      29         [ +  + ]:   51717609 :   while (!(x&1)) x>>=1;
      30         [ +  + ]:   35285668 :   if (x==1) return 1;
      31         [ +  + ]:   33725095 :   if (x==y) return y;
      32         [ +  + ]:   33407117 :   else if (x>y) goto xislarger;/* will be rare, given how we'll use this */
      33                 :            :   /* loop invariants: x,y odd and distinct. */
      34                 :            :  yislarger:
      35         [ +  + ]:  438459714 :   if ((x^y)&2)                 /* ...01, ...11 or vice versa */
      36                 :  222356358 :     y=(x>>2)+(y>>2)+1;         /* ==(x+y)>>2 except it can't overflow */
      37                 :            :   else                         /* ...01,...01 or ...11,...11 */
      38                 :  216103356 :     y=(y-x)>>2;                /* now y!=0 in either case */
      39         [ +  + ]:  876678031 :   while (!(y&1)) y>>=1;        /* kill any windfall-gained powers of 2 */
      40         [ +  + ]:  438459714 :   if (y==1) return 1;          /* comparand == return value... */
      41         [ +  + ]:  425900336 :   if (x==y) return y;          /* this and the next is just one comparison */
      42         [ +  + ]:  421305250 :   else if (x<y) goto yislarger;/* else fall through to xislarger */
      43                 :            : 
      44                 :            :  xislarger:                    /* same as above, seen through a mirror */
      45         [ +  + ]:  226822738 :   if ((x^y)&2)
      46                 :  115624270 :     x=(x>>2)+(y>>2)+1;
      47                 :            :   else
      48                 :  111198468 :     x=(x-y)>>2;                /* x!=0 */
      49         [ +  + ]:  446347974 :   while (!(x&1)) x>>=1;
      50         [ +  + ]:  226822738 :   if (x==1) return 1;
      51         [ +  + ]:  214454428 :   if (x==y) return y;
      52         [ +  + ]:  210570085 :   else if (x>y) goto xislarger;
      53                 :            : 
      54                 :  177742824 :   goto yislarger;
      55                 :            : }
      56                 :            : /* Gotos are useful, and Programming is an Art.  D.E.Knuth. */
      57                 :            : /* PS: Of course written with Dijkstra's lessons firmly in mind... --GN */
      58                 :            : 
      59                 :            : /* at least one of a or b is odd, return gcd(a,b) */
      60                 :            : INLINE ulong
      61                 :   14642238 : mygcduodd(ulong a, ulong b)
      62                 :            : {
      63                 :            :   ulong c;
      64         [ +  + ]:   14642238 :   if (b&1)
      65                 :            :   {
      66 [ +  + ][ +  + ]:    8847075 :     if (a==1 || b==1)
      67                 :     979868 :       c = 1;
      68                 :            :     else
      69                 :    8847075 :      c = gcduodd(a, b);
      70                 :            :   }
      71                 :            :   else
      72                 :            :   {
      73         [ +  + ]:    5795163 :     if (a==1)
      74                 :    1970966 :       c = 1;
      75                 :            :     else
      76                 :    3824197 :       c = gcduodd(b, a);
      77                 :            :   }
      78                 :   14642238 :   return c;
      79                 :            : }
      80                 :            : 
      81                 :            : /* modified right shift binary algorithm with at most one division */
      82                 :            : long
      83                 :    1320251 : cgcd(long a,long b)
      84                 :            : {
      85                 :            :   long v;
      86         [ +  + ]:    1320251 :   a=labs(a); if (!b) return a;
      87         [ +  + ]:     678675 :   b=labs(b); if (!a) return b;
      88 [ +  + ][ +  + ]:     665078 :   if (a>b) { a %= b; if (!a) return b; }
      89         [ +  + ]:     514513 :   else     { b %= a; if (!b) return a; }
      90                 :      85961 :   v = vals(a|b);
      91                 :    1320251 :   return (long)(mygcduodd((ulong)(a>>v), (ulong)(b>>v)) << v);
      92                 :            : }
      93                 :            : ulong
      94                 :    6629712 : ugcd(ulong a,ulong b)
      95                 :            : {
      96                 :            :   long v;
      97         [ +  + ]:    6629712 :   if (!b) return a;
      98         [ +  + ]:    6627942 :   if (!a) return b;
      99 [ +  + ][ +  + ]:    2130986 :   if (a>b) { a %= b; if (!a) return b; }
     100         [ +  + ]:     540306 :   else     { b %= a; if (!b) return a; }
     101                 :     479822 :   v = vals(a|b);
     102                 :    6629712 :   return mygcduodd(a>>v, b>>v) << v;
     103                 :            : }
     104                 :            : 
     105                 :            : /* For gcdii(): assume a>b>0, return gcd(a,b) as a GEN */
     106                 :            : static GEN
     107                 :   31199947 : igcduu(ulong a, ulong b)
     108                 :            : {
     109                 :            :   long v;
     110         [ +  + ]:   31199947 :   a %= b; if (!a) return utoipos(b);
     111                 :   14076455 :   v = vals(a|b);
     112                 :   31199947 :   return utoipos( mygcduodd(a>>v, b>>v) << v );
     113                 :            : }
     114                 :            : 
     115                 :            : /*Warning: overflows silently if lcm does not fit*/
     116                 :            : long
     117                 :     484895 : clcm(long a,long b)
     118                 :            : {
     119                 :     484895 :   long d = cgcd(a,b);
     120         [ -  + ]:     484895 :   if (!d) return 0;
     121         [ +  + ]:     484895 :   return d == 1? a*b: a*(b/d);
     122                 :            : }
     123                 :            : 
     124                 :            : /********************************************************************/
     125                 :            : /**                                                                **/
     126                 :            : /**               INTEGER EXTENDED GCD  (AND INVMOD)               **/
     127                 :            : /**                                                                **/
     128                 :            : /********************************************************************/
     129                 :            : 
     130                 :            : /* GN 1998Oct25, originally developed in January 1998 under 2.0.4.alpha,
     131                 :            :  * in the context of trying to improve elliptic curve cryptosystem attacking
     132                 :            :  * algorithms.  2001Jan02 -- added bezout() functionality.
     133                 :            :  *
     134                 :            :  * Two basic ideas - (1) avoid many integer divisions, especially when the
     135                 :            :  * quotient is 1 (which happens more than 40% of the time).  (2) Use Lehmer's
     136                 :            :  * trick as modified by Jebelean of extracting a couple of words' worth of
     137                 :            :  * leading bits from both operands, and compute partial quotients from them
     138                 :            :  * as long as we can be sure of their values.  The Jebelean modifications
     139                 :            :  * consist in reliable inequalities from which we can decide fast whether
     140                 :            :  * to carry on or to return to the outer loop, and in re-shifting after the
     141                 :            :  * first word's worth of bits has been used up.  All of this is described
     142                 :            :  * in R. Lercier's these [pp148-153 & 163f.], except his outer loop isn't
     143                 :            :  * quite right  (the catch-up divisions needed when one partial quotient is
     144                 :            :  * larger than a word are missing).
     145                 :            :  *
     146                 :            :  * The API consists of invmod() and bezout() below;  the single-word routines
     147                 :            :  * xgcduu and xxgcduu may be called directly if desired;  lgcdii() probably
     148                 :            :  * doesn't make much sense out of context.
     149                 :            :  *
     150                 :            :  * The whole lot is a factor 6 .. 8 faster on word-sized operands, and asym-
     151                 :            :  * ptotically about a factor 2.5 .. 3, depending on processor architecture,
     152                 :            :  * than the naive continued-division code.  Unfortunately, thanks to the
     153                 :            :  * unrolled loops and all, the code is a bit lengthy.
     154                 :            :  */
     155                 :            : 
     156                 :            : /*==================================
     157                 :            :  * xgcduu(d,d1,f,v,v1,s)
     158                 :            :  * xxgcduu(d,d1,f,u,u1,v,v1,s)
     159                 :            :  * rgcduu(d,d1,vmax,u,u1,v,v1,s)
     160                 :            :  *==================================*/
     161                 :            : /*
     162                 :            :  * Fast `final' extended gcd algorithm, acting on two ulongs.  Ideally this
     163                 :            :  * should be replaced with assembler versions wherever possible.  The present
     164                 :            :  * code essentially does `subtract, compare, and possibly divide' at each step,
     165                 :            :  * which is reasonable when hardware division (a) exists, (b) is a bit slowish
     166                 :            :  * and (c) does not depend a lot on the operand values (as on i486).  When
     167                 :            :  * wordsize division is in fact an assembler routine based on subtraction,
     168                 :            :  * this strategy may not be the most efficient one.
     169                 :            :  *
     170                 :            :  * xxgcduu() should be called with  d > d1 > 0, returns gcd(d,d1), and assigns
     171                 :            :  * the usual signless cont.frac. recurrence matrix to [u, u1; v, v1]  (i.e.,
     172                 :            :  * the product of all the [0, 1; 1 q_j] where the leftmost factor arises from
     173                 :            :  * the quotient of the first division step),  and the information about the
     174                 :            :  * implied signs to s  (-1 when an odd number of divisions has been done,
     175                 :            :  * 1 otherwise).  xgcduu() is exactly the same except that u,u1 are not com-
     176                 :            :  * puted (and not returned, of course).
     177                 :            :  *
     178                 :            :  * The input flag f should be set to 1 if we know in advance that gcd(d,d1)==1
     179                 :            :  * (so we can stop the chain division one step early:  as soon as the remainder
     180                 :            :  * equals 1).  Use this when you intend to use only what would be v, or only
     181                 :            :  * what would be u and v, after that final division step, but not u1 and v1.
     182                 :            :  * With the flag in force and thus without that final step, the interesting
     183                 :            :  * quantity/ies will still sit in [u1 and] v1, of course.
     184                 :            :  *
     185                 :            :  * For computing the inverse of a single-word INTMOD known to exist, pass f=1
     186                 :            :  * to xgcduu(), and obtain the result from s and v1.  (The routine does the
     187                 :            :  * right thing when d1==1 already.)  For finishing a multiword modinv known
     188                 :            :  * to exist, pass f=1 to xxgcduu(), and multiply the returned matrix  (with
     189                 :            :  * properly adjusted signs)  onto the values v' and v1' previously obtained
     190                 :            :  * from the multiword division steps.  Actually, just take the scalar product
     191                 :            :  * of [v',v1'] with [u1,-v1], and change the sign if s==-1.  (If the final
     192                 :            :  * step had been carried out, it would be [-u,v], and s would also change.)
     193                 :            :  * For reducing a rational number to lowest terms, pass f=0 to xgcduu().
     194                 :            :  * Finally, f=0 with xxgcduu() is useful for Bezout computations.
     195                 :            :  * [Harrumph.  In the above prescription, the sign turns out to be precisely
     196                 :            :  * wrong.]
     197                 :            :  * (It is safe for invmod() to call xgcduu() with f=1, because f&1 doesn't
     198                 :            :  * make a difference when gcd(d,d1)>1.  The speedup is negligible.)
     199                 :            :  *
     200                 :            :  * In principle, when gcd(d,d1) is known to be 1, it is straightforward to
     201                 :            :  * recover the final u,u1 given only v,v1 and s.  However, it probably isn't
     202                 :            :  * worthwhile, as it trades a few multiplications for a division.
     203                 :            :  *
     204                 :            :  * Note that these routines do not know and do not need to know about the
     205                 :            :  * PARI stack.
     206                 :            :  *
     207                 :            :  * Added 2001Jan15:
     208                 :            :  * rgcduu() is a variant of xxgcduu() which does not have f  (the effect is
     209                 :            :  * that of f=0),  but instead has a ulong vmax parameter, for use in rational
     210                 :            :  * reconstruction below.  It returns when v1 exceeds vmax;  v will never
     211                 :            :  * exceed vmax.  (vmax=0 is taken as a synonym of ULONG_MAX i.e. unlimited,
     212                 :            :  * in which case rgcduu behaves exactly like xxgcduu with f=0.)  The return
     213                 :            :  * value of rgcduu() is typically meaningless;  the interesting part is the
     214                 :            :  * matrix.
     215                 :            :  */
     216                 :            : 
     217                 :            : ulong
     218                 :   95989192 : xgcduu(ulong d, ulong d1, int f, ulong* v, ulong* v1, long *s)
     219                 :            : {
     220                 :            :   ulong xv,xv1, xs, q,res;
     221                 :            :   LOCAL_HIREMAINDER;
     222                 :            : 
     223                 :            :   /* The above blurb contained a lie.  The main loop always stops when d1
     224                 :            :    * has become equal to 1.  If (d1 == 1 && !(f&1)) after the loop, we do
     225                 :            :    * the final `division' of d by 1 `by hand' as it were.
     226                 :            :    *
     227                 :            :    * The loop has already been unrolled once.  Aggressive optimization could
     228                 :            :    * well lead to a totally unrolled assembler version...
     229                 :            :    *
     230                 :            :    * On modern x86 architectures, this loop is a pig anyway.  The division
     231                 :            :    * instruction always puts its result into the same pair of registers, and
     232                 :            :    * we always want to use one of them straight away, so pipeline performance
     233                 :            :    * will suck big time.  An assembler version should probably do a first loop
     234                 :            :    * computing and storing all the quotients -- their number is bounded in
     235                 :            :    * advance -- and then assembling the matrix in a second pass.  On other
     236                 :            :    * architectures where we can cycle through four or so groups of registers
     237                 :            :    * and exploit a fast ALU result-to-operand feedback path, this is much less
     238                 :            :    * of an issue.  (Intel sucks.  See http://www.x86.org/ ...)
     239                 :            :    */
     240                 :   95989192 :   xs = res = 0;
     241                 :   95989192 :   xv = 0UL; xv1 = 1UL;
     242         [ +  + ]:  340826916 :   while (d1 > 1UL)
     243                 :            :   {
     244                 :  294091333 :     d -= d1;                        /* no need to use subll */
     245         [ +  + ]:  294091333 :     if (d >= d1)
     246                 :            :     {
     247 [ +  - ][ #  # ]:  154199848 :       hiremainder = 0; q = 1 + divll(d,d1); d = hiremainder;
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
                 [ #  # ]
     248                 :  154199848 :       xv += q * xv1;
     249                 :            :     }
     250                 :            :     else
     251                 :  139891485 :       xv += xv1;
     252                 :            :                                 /* possible loop exit */
     253         [ +  + ]:  294091333 :     if (d <= 1UL) { xs=1; break; }
     254                 :            :                                 /* repeat with inverted roles */
     255                 :  244837724 :     d1 -= d;
     256         [ +  + ]:  244837724 :     if (d1 >= d)
     257                 :            :     {
     258 [ +  - ][ #  # ]:  135057894 :       hiremainder = 0; q = 1 + divll(d1,d); d1 = hiremainder;
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
                 [ #  # ]
     259                 :  135057894 :       xv1 += q * xv;
     260                 :            :     }
     261                 :            :     else
     262                 :  109779830 :       xv1 += xv;
     263                 :            :   } /* while */
     264                 :            : 
     265         [ +  + ]:   95989192 :   if (!(f&1))                        /* division by 1 postprocessing if needed */
     266                 :            :   {
     267 [ +  + ][ +  + ]:    4018355 :     if (xs && d==1)
     268                 :    2067365 :     { xv1 += d1 * xv; xs = 0; res = 1UL; }
     269 [ +  + ][ +  + ]:    1950990 :     else if (!xs && d1==1)
     270                 :    1620850 :     { xv += d * xv1; xs = 1; res = 1UL; }
     271                 :            :   }
     272                 :            : 
     273         [ +  + ]:   95989192 :   if (xs)
     274                 :            :   {
     275                 :   48807094 :     *s = -1; *v = xv1; *v1 = xv;
     276 [ +  + ][ +  + ]:   48807094 :     return (res ? res : (d==1 ? 1UL : d1));
     277                 :            :   }
     278                 :            :   else
     279                 :            :   {
     280                 :   47182098 :     *s = 1; *v = xv; *v1 = xv1;
     281 [ +  + ][ +  + ]:   95989192 :     return (res ? res : (d1==1 ? 1UL : d));
     282                 :            :   }
     283                 :            : }
     284                 :            : 
     285                 :            : 
     286                 :            : ulong
     287                 :   20054043 : xxgcduu(ulong d, ulong d1, int f,
     288                 :            :         ulong* u, ulong* u1, ulong* v, ulong* v1, long *s)
     289                 :            : {
     290                 :            :   ulong xu,xu1, xv,xv1, xs, q,res;
     291                 :            :   LOCAL_HIREMAINDER;
     292                 :            : 
     293                 :   20054043 :   xs = res = 0;
     294                 :   20054043 :   xu = xv1 = 1UL;
     295                 :   20054043 :   xu1 = xv = 0UL;
     296         [ +  + ]:   69476762 :   while (d1 > 1UL)
     297                 :            :   {
     298                 :   58254108 :     d -= d1;                        /* no need to use subll */
     299         [ +  + ]:   58254108 :     if (d >= d1)
     300                 :            :     {
     301 [ +  - ][ #  # ]:   36735629 :       hiremainder = 0; q = 1 + divll(d,d1); d = hiremainder;
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
                 [ #  # ]
     302                 :   36735629 :       xv += q * xv1;
     303                 :   36735629 :       xu += q * xu1;
     304                 :            :     }
     305                 :            :     else
     306                 :   21518479 :     { xv += xv1; xu += xu1; }
     307                 :            :                                 /* possible loop exit */
     308         [ +  + ]:   58254108 :     if (d <= 1UL) { xs=1; break; }
     309                 :            :                                 /* repeat with inverted roles */
     310                 :   49422719 :     d1 -= d;
     311         [ +  + ]:   49422719 :     if (d1 >= d)
     312                 :            :     {
     313 [ +  - ][ #  # ]:   27636678 :       hiremainder = 0; q = 1 + divll(d1,d); d1 = hiremainder;
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
                 [ #  # ]
     314                 :   27636678 :       xv1 += q * xv;
     315                 :   27636678 :       xu1 += q * xu;
     316                 :            :     }
     317                 :            :     else
     318                 :   21786041 :     { xv1 += xv; xu1 += xu; }
     319                 :            :   } /* while */
     320                 :            : 
     321         [ +  + ]:   20054043 :   if (!(f&1))                        /* division by 1 postprocessing if needed */
     322                 :            :   {
     323 [ +  + ][ +  + ]:   18388560 :     if (xs && d==1)
     324                 :            :     {
     325                 :    4252133 :       xv1 += d1 * xv;
     326                 :    4252133 :       xu1 += d1 * xu;
     327                 :    4252133 :       xs = 0; res = 1UL;
     328                 :            :     }
     329 [ +  + ][ +  + ]:   14136427 :     else if (!xs && d1==1)
     330                 :            :     {
     331                 :    9221515 :       xv += d * xv1;
     332                 :    9221515 :       xu += d * xu1;
     333                 :    9221515 :       xs = 1; res = 1UL;
     334                 :            :     }
     335                 :            :   }
     336                 :            : 
     337         [ +  + ]:   20054043 :   if (xs)
     338                 :            :   {
     339                 :   13800771 :     *s = -1; *u = xu1; *u1 = xu; *v = xv1; *v1 = xv;
     340 [ +  + ][ +  + ]:   13800771 :     return (res ? res : (d==1 ? 1UL : d1));
     341                 :            :   }
     342                 :            :   else
     343                 :            :   {
     344                 :    6253272 :     *s = 1; *u = xu; *u1 = xu1; *v = xv; *v1 = xv1;
     345 [ +  + ][ +  + ]:   20054043 :     return (res ? res : (d1==1 ? 1UL : d));
     346                 :            :   }
     347                 :            : }
     348                 :            : 
     349                 :            : ulong
     350                 :      16349 : rgcduu(ulong d, ulong d1, ulong vmax,
     351                 :            :        ulong* u, ulong* u1, ulong* v, ulong* v1, long *s)
     352                 :            : {
     353                 :      16349 :   ulong xu,xu1, xv,xv1, xs, q, res=0;
     354                 :      16349 :   int f = 0;
     355                 :            :   LOCAL_HIREMAINDER;
     356                 :            : 
     357         [ -  + ]:      16349 :   if (vmax == 0) vmax = ULONG_MAX;
     358                 :      16349 :   xs = res = 0;
     359                 :      16349 :   xu = xv1 = 1UL;
     360                 :      16349 :   xu1 = xv = 0UL;
     361         [ +  + ]:      29117 :   while (d1 > 1UL)
     362                 :            :   {
     363                 :      28983 :     d -= d1;                        /* no need to use subll */
     364         [ +  + ]:      28983 :     if (d >= d1)
     365                 :            :     {
     366 [ +  - ][ #  # ]:      12510 :       hiremainder = 0; q = 1 + divll(d,d1); d = hiremainder;
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
                 [ #  # ]
     367                 :      12510 :       xv += q * xv1;
     368                 :      12510 :       xu += q * xu1;
     369                 :            :     }
     370                 :            :     else
     371                 :      16473 :     { xv += xv1; xu += xu1; }
     372                 :            :                                 /* possible loop exit */
     373         [ +  + ]:      28983 :     if (xv > vmax) { f=xs=1; break; }
     374         [ +  + ]:      24331 :     if (d <= 1UL) { xs=1; break; }
     375                 :            :                                 /* repeat with inverted roles */
     376                 :      21684 :     d1 -= d;
     377         [ +  + ]:      21684 :     if (d1 >= d)
     378                 :            :     {
     379 [ +  - ][ #  # ]:      14107 :       hiremainder = 0; q = 1 + divll(d1,d); d1 = hiremainder;
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
                 [ #  # ]
     380                 :      14107 :       xv1 += q * xv;
     381                 :      14107 :       xu1 += q * xu;
     382                 :            :     }
     383                 :            :     else
     384                 :       7577 :     { xv1 += xv; xu1 += xu; }
     385                 :            :                                 /* possible loop exit */
     386         [ +  + ]:      21684 :     if (xv1 > vmax) { f=1; break; }
     387                 :            :   } /* while */
     388                 :            : 
     389         [ +  + ]:      16349 :   if (!(f&1))                        /* division by 1 postprocessing if needed */
     390                 :            :   {
     391 [ +  + ][ +  - ]:       2781 :     if (xs && d==1)
     392                 :            :     {
     393                 :       2647 :       xv1 += d1 * xv;
     394                 :       2647 :       xu1 += d1 * xu;
     395                 :       2647 :       xs = 0; res = 1UL;
     396                 :            :     }
     397 [ +  - ][ +  - ]:        134 :     else if (!xs && d1==1)
     398                 :            :     {
     399                 :        134 :       xv += d * xv1;
     400                 :        134 :       xu += d * xu1;
     401                 :        134 :       xs = 1; res = 1UL;
     402                 :            :     }
     403                 :            :   }
     404                 :            : 
     405         [ +  + ]:      16349 :   if (xs)
     406                 :            :   {
     407                 :       4786 :     *s = -1; *u = xu1; *u1 = xu; *v = xv1; *v1 = xv;
     408 [ +  + ][ +  + ]:       4786 :     return (res ? res : (d==1 ? 1UL : d1));
     409                 :            :   }
     410                 :            :   else
     411                 :            :   {
     412                 :      11563 :     *s = 1; *u = xu; *u1 = xu1; *v = xv; *v1 = xv1;
     413 [ +  + ][ +  + ]:      16349 :     return (res ? res : (d1==1 ? 1UL : d));
     414                 :            :   }
     415                 :            : }
     416                 :            : 
     417                 :            : /*==================================
     418                 :            :  * cbezout(a,b,uu,vv)
     419                 :            :  *==================================
     420                 :            :  * Same as bezout() but for C longs.
     421                 :            :  *    Return g = gcd(a,b) >= 0, and assign longs u,v through pointers uu,vv
     422                 :            :  *    such that g = u*a + v*b.
     423                 :            :  * Special cases:
     424                 :            :  *    a == b == 0 ==> pick u=1, v=0 (and return 1, surprisingly)
     425                 :            :  *    a != 0 == b ==> keep v=0
     426                 :            :  *    a == 0 != b ==> keep u=0
     427                 :            :  *    |a| == |b| != 0 ==> keep u=0, set v=+-1
     428                 :            :  * Assignments through uu,vv happen unconditionally;  non-NULL pointers
     429                 :            :  * _must_ be used.
     430                 :            :  */
     431                 :            : long
     432                 :      11205 : cbezout(long a,long b,long *uu,long *vv)
     433                 :            : {
     434                 :            :   long s,*t;
     435                 :      11205 :   ulong d = labs(a), d1 = labs(b);
     436                 :            :   ulong r,u,u1,v,v1;
     437                 :            : 
     438                 :            : #ifdef DEBUG_CBEZOUT
     439                 :            :   err_printf("> cbezout(%ld,%ld,%p,%p)\n", a, b, (void *)uu, (void *)vv);
     440                 :            : #endif
     441         [ +  + ]:      11205 :   if (!b)
     442                 :            :   {
     443                 :         15 :     *vv=0L;
     444         [ -  + ]:         15 :     if (!a)
     445                 :            :     {
     446                 :          0 :       *uu=1L;
     447                 :            : #ifdef DEBUG_CBEZOUT
     448                 :            :       err_printf("< %ld (%ld, %ld)\n", 1L, *uu, *vv);
     449                 :            : #endif
     450                 :          0 :       return 0L;
     451                 :            :     }
     452         [ -  + ]:         15 :     *uu = a < 0 ? -1L : 1L;
     453                 :            : #ifdef DEBUG_CBEZOUT
     454                 :            :     err_printf("< %ld (%ld, %ld)\n", (long)d, *uu, *vv);
     455                 :            : #endif
     456                 :         15 :     return (long)d;
     457                 :            :   }
     458 [ +  + ][ +  + ]:      11190 :   else if (!a || (d == d1))
     459                 :            :   {
     460         [ +  + ]:         40 :     *uu = 0L; *vv = b < 0 ? -1L : 1L;
     461                 :            : #ifdef DEBUG_CBEZOUT
     462                 :            :     err_printf("< %ld (%ld, %ld)\n", (long)d1, *uu, *vv);
     463                 :            : #endif
     464                 :         40 :     return (long)d1;
     465                 :            :   }
     466         [ +  + ]:      11150 :   else if (d == 1)                /* frequently used by nfinit */
     467                 :            :   {
     468                 :       8790 :     *uu = a; *vv = 0L;
     469                 :            : #ifdef DEBUG_CBEZOUT
     470                 :            :     err_printf("< %ld (%ld, %ld)\n", 1L, *uu, *vv);
     471                 :            : #endif
     472                 :       8790 :     return 1L;
     473                 :            :   }
     474         [ +  + ]:       2360 :   else if (d < d1)
     475                 :            :   {
     476                 :            : /* bug in gcc-2.95.3:
     477                 :            :  * s = a; a = b; b = s; produces wrong result a = b. This is OK:  */
     478                 :       1720 :     { long _x = a; a = b; b = _x; }        /* in order to keep the right signs */
     479                 :       1720 :     r = d; d = d1; d1 = r;
     480                 :       1720 :     t = uu; uu = vv; vv = t;
     481                 :            : #ifdef DEBUG_CBEZOUT
     482                 :            :     err_printf("  swapping\n");
     483                 :            : #endif
     484                 :            :   }
     485                 :            :   /* d > d1 > 0 */
     486                 :       2360 :   r = xxgcduu(d, d1, 0, &u, &u1, &v, &v1, &s);
     487         [ +  + ]:       2360 :   if (s < 0)
     488                 :            :   {
     489         [ +  + ]:        295 :     *uu = a < 0 ? (long)u : -(long)u;
     490         [ +  + ]:        295 :     *vv = b < 0 ? -(long)v : (long)v;
     491                 :            :   }
     492                 :            :   else
     493                 :            :   {
     494         [ +  + ]:       2065 :     *uu = a < 0 ? -(long)u : (long)u;
     495         [ +  + ]:       2065 :     *vv = b < 0 ? (long)v : -(long)v;
     496                 :            :   }
     497                 :            : #ifdef DEBUG_CBEZOUT
     498                 :            :   err_printf("< %ld (%ld, %ld)\n", (long)r, *uu, *vv);
     499                 :            : #endif
     500                 :      11205 :   return (long)r;
     501                 :            : }
     502                 :            : 
     503                 :            : /*==================================
     504                 :            :  * lgcdii(d,d1,u,u1,v,v1,vmax)
     505                 :            :  *==================================*/
     506                 :            : /* Lehmer's partial extended gcd algorithm, acting on two t_INT GENs.
     507                 :            :  *
     508                 :            :  * Tries to determine, using the leading 2*BITS_IN_LONG significant bits of d
     509                 :            :  * and a quantity of bits from d1 obtained by a shift of the same displacement,
     510                 :            :  * as many partial quotients of d/d1 as possible, and assigns to [u,u1;v,v1]
     511                 :            :  * the product of all the [0, 1; 1, q_j] thus obtained, where the leftmost
     512                 :            :  * factor arises from the quotient of the first division step.
     513                 :            :  *
     514                 :            :  * For use in rational reconstruction, input param vmax can be given a
     515                 :            :  * nonzero value.  In this case, we will return early as soon as v1 > vmax
     516                 :            :  * (i.e. it is guaranteed that v <= vmax). --2001Jan15
     517                 :            :  *
     518                 :            :  * MUST be called with  d > d1 > 0, and with  d  occupying more than one
     519                 :            :  * significant word  (if it doesn't, the caller has no business with us;
     520                 :            :  * he/she/it should use xgcduu() instead).  Returns the number of reduction/
     521                 :            :  * swap steps carried out, possibly zero, or under certain conditions minus
     522                 :            :  * that number.  When the return value is nonzero, the caller should use the
     523                 :            :  * returned recurrence matrix to update its own copies of d,d1.  When the
     524                 :            :  * return value is non-positive, and the latest remainder after updating
     525                 :            :  * turns out to be nonzero, the caller should at once attempt a full division,
     526                 :            :  * rather than first trying lgcdii() again -- this typically happens when we
     527                 :            :  * are about to encounter a quotient larger than half a word.  (This is not
     528                 :            :  * detected infallibly -- after a positive return value, it is perfectly
     529                 :            :  * possible that the next stage will end up needing a full division.  After
     530                 :            :  * a negative return value, however, this is certain, and should be acted
     531                 :            :  * upon.)
     532                 :            :  *
     533                 :            :  * (The sign information, for which xgcduu() has its return argument s, is now
     534                 :            :  * implicit in the LSB of our return value, and the caller may take advantage
     535                 :            :  * of the fact that a return value of +-1 implies u==0,u1==v==1  [only v1 pro-
     536                 :            :  * vides interesting information in this case].  One might also use the fact
     537                 :            :  * that if the return value is +-2, then u==1, but this is rather marginal.)
     538                 :            :  *
     539                 :            :  * If it was not possible to determine even the first quotient, either because
     540                 :            :  * we're too close to an integer quotient or because the quotient would be
     541                 :            :  * larger than one word  (if the `leading digit' of d1 after shifting is all
     542                 :            :  * zeros),  we return 0 and do not bother to assign anything to the last four
     543                 :            :  * args.
     544                 :            :  *
     545                 :            :  * The division chain might (sometimes) even run to completion.  It will be
     546                 :            :  * up to the caller to detect this case.
     547                 :            :  *
     548                 :            :  * This routine does _not_ change d or d1;  this will also be up to the caller.
     549                 :            :  *
     550                 :            :  * Note that this routine does not know and does not need to know about the
     551                 :            :  * PARI stack.
     552                 :            :  */
     553                 :            : /*#define DEBUG_LEHMER 1 */
     554                 :            : int
     555                 :    3329558 : lgcdii(ulong* d, ulong* d1,
     556                 :            :        ulong* u, ulong* u1, ulong* v, ulong* v1,
     557                 :            :        ulong vmax)
     558                 :            : {
     559                 :            :   /* Strategy:  (1) Extract/shift most significant bits.  We assume that d
     560                 :            :    * has at least two significant words, but we can cope with a one-word d1.
     561                 :            :    * Let dd,dd1 be the most significant dividend word and matching part of the
     562                 :            :    * divisor.
     563                 :            :    * (2) Check for overflow on the first division.  For our purposes, this
     564                 :            :    * happens when the upper half of dd1 is zero.  (Actually this is detected
     565                 :            :    * during extraction.)
     566                 :            :    * (3) Get a fix on the first quotient.  We compute q = floor(dd/dd1), which
     567                 :            :    * is an upper bound for floor(d/d1), and which gives the true value of the
     568                 :            :    * latter if (and-almost-only-if) the remainder dd' = dd-q*dd1 is >= q.
     569                 :            :    * (If it isn't, we give up.  This is annoying because the subsequent full
     570                 :            :    * division will repeat some work already done, but it happens very infre-
     571                 :            :    * quently.  Doing the extra-bit-fetch in this case would be awkward.)
     572                 :            :    * (4) Finish initializations.
     573                 :            :    *
     574                 :            :    * The remainder of the action is comparatively boring... The main loop has
     575                 :            :    * been unrolled once  (so we don't swap things and we can apply Jebelean's
     576                 :            :    * termination conditions which alternatingly take two different forms during
     577                 :            :    * successive iterations).  When we first run out of sufficient bits to form
     578                 :            :    * a quotient, and have an extra word of each operand, we pull out two whole
     579                 :            :    * word's worth of dividend bits, and divisor bits of matching significance;
     580                 :            :    * to these we apply our partial matrix (disregarding overflow because the
     581                 :            :    * result mod 2^(2*BITS_IN_LONG) will in fact give the correct values), and
     582                 :            :    * re-extract one word's worth of the current dividend and a matching amount
     583                 :            :    * of divisor bits.  The affair will normally terminate with matrix entries
     584                 :            :    * just short of a whole word.  (We terminate the inner loop before these can
     585                 :            :    * possibly overflow.)
     586                 :            :    */
     587                 :            :   ulong dd,dd1,ddlo,dd1lo, sh,shc;        /* `digits', shift count */
     588                 :            :   ulong xu,xu1, xv,xv1, q,res;        /* recurrences, partial quotient, count */
     589                 :            :   ulong tmp0,tmp1,tmp2,tmpd,tmpu,tmpv; /* temps */
     590                 :            :   ulong dm1,dm2,d1m1,d1m2;
     591                 :            :   long ld, ld1, lz;                /* t_INT effective lengths */
     592                 :    3329558 :   int skip = 0;                        /* a boolean flag */
     593                 :            :   LOCAL_OVERFLOW;
     594                 :            :   LOCAL_HIREMAINDER;
     595                 :            : 
     596                 :            : #ifdef DEBUG_LEHMER
     597                 :            :   voir(d, -1); voir(d1, -1);
     598                 :            : #endif
     599                 :            :   /* following is just for convenience: vmax==0 means no bound */
     600         [ -  + ]:    3329558 :   if (vmax == 0) vmax = ULONG_MAX;
     601                 :    3329558 :   ld = lgefint(d); ld1 = lgefint(d1); lz = ld - ld1; /* >= 0 */
     602         [ +  + ]:    3329558 :   if (lz > 1) return 0;                /* rare, quick and desperate exit */
     603                 :            : 
     604                 :    3304514 :   d = int_MSW(d); d1 = int_MSW(d1);                /* point at the leading `digits' */
     605                 :    3304514 :   dm1 = *int_precW(d); d1m1 = *int_precW(d1);
     606                 :    3304514 :   dm2 = *int_precW(int_precW(d)); d1m2 = *int_precW(int_precW(d1));
     607                 :    3304514 :   dd1lo = 0;                        /* unless we find something better */
     608 [ +  + ][ +  + ]:    3304514 :   sh = bfffo(*d);                /* obtain dividend left shift count */
         [ +  + ][ +  + ]
     609                 :            : 
     610 [ +  + ][ +  + ]:    3304514 :   if (sh)
     611                 :            :   {                                /* do the shifting */
     612                 :    3031893 :     shc = BITS_IN_LONG - sh;
     613         [ +  + ]:    3031893 :     if (lz)
     614                 :            :     {                                /* dividend longer than divisor */
     615                 :     859340 :       dd1 = (*d1 >> shc);
     616         [ +  + ]:     859340 :       if (!(HIGHMASK & dd1)) return 0;  /* overflow detected */
     617         [ +  + ]:     645616 :       if (ld1 > 3)
     618                 :      15154 :         dd1lo = (*d1 << sh) + (d1m1 >> shc);
     619                 :            :       else
     620                 :     630462 :         dd1lo = (*d1 << sh);
     621                 :            :     }
     622                 :            :     else
     623                 :            :     {                                /* dividend and divisor have the same length */
     624                 :    2172553 :       dd1 = (*d1 << sh);
     625         [ +  + ]:    2172553 :       if (!(HIGHMASK & dd1)) return 0;
     626         [ +  - ]:    2170504 :       if (ld1 > 3)
     627                 :            :       {
     628                 :    2170504 :         dd1 += (d1m1 >> shc);
     629         [ +  + ]:    2170504 :         if (ld1 > 4)
     630                 :    1027694 :           dd1lo = (d1m1 << sh) + (d1m2 >> shc);
     631                 :            :         else
     632                 :    1142810 :           dd1lo = (d1m1 << sh);
     633                 :            :       }
     634                 :            :     }
     635                 :            :     /* following lines assume d to have 2 or more significant words */
     636                 :    2816120 :     dd = (*d << sh) + (dm1 >> shc);
     637         [ +  + ]:    2816120 :     if (ld > 4)
     638                 :    1042848 :       ddlo = (dm1 << sh) + (dm2 >> shc);
     639                 :            :     else
     640                 :    1773272 :       ddlo = (dm1 << sh);
     641                 :            :   }
     642                 :            :   else
     643                 :            :   {                                /* no shift needed */
     644         [ +  + ]:     272621 :     if (lz) return 0;                /* div'd longer than div'r: o'flow automatic */
     645                 :     268640 :     dd1 = *d1;
     646         [ +  + ]:     268640 :     if (!(HIGHMASK & dd1)) return 0;
     647         [ +  - ]:     268323 :     if(ld1 > 3) dd1lo = d1m1;
     648                 :            :     /* assume again that d has another significant word */
     649                 :     268323 :     dd = *d; ddlo = dm1;
     650                 :            :   }
     651                 :            : #ifdef DEBUG_LEHMER
     652                 :            :   err_printf("  %lx:%lx, %lx:%lx\n", dd, ddlo, dd1, dd1lo);
     653                 :            : #endif
     654                 :            : 
     655                 :            :   /* First subtraction/division stage.  (If a subtraction initially suffices,
     656                 :            :    * we don't divide at all.)  If a Jebelean condition is violated, and we
     657                 :            :    * can't fix it even by looking at the low-order bits in ddlo,dd1lo, we
     658                 :            :    * give up and ask for a full division.  Otherwise we commit the result,
     659                 :            :    * possibly deciding to re-shift immediately afterwards.
     660                 :            :    */
     661                 :    3084443 :   dd -= dd1;
     662         [ +  + ]:    3084443 :   if (dd < dd1)
     663                 :            :   {                                /* first quotient known to be == 1 */
     664                 :    1001195 :     xv1 = 1UL;
     665         [ +  + ]:    1001195 :     if (!dd)                        /* !(Jebelean condition), extraspecial case */
     666                 :            :     {                                /* note this can actually happen...  Now
     667                 :            :                                      * q==1 is known, but we underflow already.
     668                 :            :                                  * OTOH we've just shortened d by a whole word.
     669                 :            :                                  * Thus we feel pleased with ourselves and
     670                 :            :                                  * return.  (The re-shift code below would
     671                 :            :                                  * do so anyway.) */
     672                 :      33407 :       *u = 0; *v = *u1 = *v1 = 1UL;
     673                 :      33407 :       return -1;                /* Next step will be a full division. */
     674                 :            :     } /* if !(Jebelean) then */
     675                 :            :   }
     676                 :            :   else
     677                 :            :   {                                /* division indicated */
     678                 :    2083248 :     hiremainder = 0;
     679 [ +  - ][ #  # ]:    2083248 :     xv1 = 1 + divll(dd, dd1);        /* xv1: alternative spelling of `q', here ;) */
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
                 [ #  # ]
     680                 :    2083248 :     dd = hiremainder;
     681 [ +  + ][ +  + ]:    2083248 :     if (dd < xv1)                /* !(Jebelean cond'), non-extra special case */
     682                 :            :     {                                /* Attempt to complete the division using the
     683                 :            :                                  * less significant bits, before skipping right
     684                 :            :                                  * past the 1st loop to the reshift stage. */
     685                 :      23836 :       ddlo = subll(ddlo, mulll(xv1, dd1lo));
     686                 :      23836 :       dd = subllx(dd, hiremainder);
     687                 :            : 
     688                 :            :       /* If we now have an overflow, q was _certainly_ too large.  Thanks to
     689                 :            :        * our decision not to get here unless the original dd1 had bits set in
     690                 :            :        * the upper half of the word, however, we now do know that the correct
     691                 :            :        * quotient is in fact q-1.  Adjust our data accordingly. */
     692 [ +  + ][ +  + ]:      23836 :       if (overflow)
     693                 :            :       {
     694                 :       3679 :         xv1--;
     695                 :       3679 :         ddlo = addll(ddlo,dd1lo);
     696                 :       3679 :         dd = addllx(dd,dd1);        /* overflows again which cancels the borrow */
     697                 :            :         /* ...and fall through to skip=1 below */
     698                 :            :       }
     699                 :            :       else
     700                 :            :       /* Test Jebelean condition anew, at this point using _all_ the extracted
     701                 :            :        * bits we have.  This is clutching at straws; we have a more or less
     702                 :            :        * even chance of succeeding this time.  Note that if we fail, we really
     703                 :            :        * do not know whether the correct quotient would have been q or some
     704                 :            :        * smaller value. */
     705 [ +  + ][ +  + ]:      20157 :         if (!dd && ddlo < xv1) return 0;
     706                 :            : 
     707                 :            :       /* Otherwise, we now know that q is correct, but we cannot go into the
     708                 :            :        * 1st loop.  Raise a flag so we'll remember to skip past the loop.
     709                 :            :        * Get here also after the q-1 adjustment case. */
     710                 :       8421 :       skip = 1;
     711                 :            :     } /* if !(Jebelean) then */
     712                 :            :   }
     713                 :    3035621 :   res = 1;
     714                 :            : #ifdef DEBUG_LEHMER
     715                 :            :   err_printf("  q = %ld, %lx, %lx\n", xv1, dd1, dd);
     716                 :            : #endif
     717         [ +  + ]:    3035621 :   if (xv1 > vmax)
     718                 :            :   {                                /* gone past the bound already */
     719                 :        898 :     *u = 0UL; *u1 = 1UL; *v = 1UL; *v1 = xv1;
     720                 :        898 :     return res;
     721                 :            :   }
     722                 :    3034723 :   xu = 0UL; xv = xu1 = 1UL;
     723                 :            : 
     724                 :            :   /* Some invariants from here across the first loop:
     725                 :            :    *
     726                 :            :    * At this point, and again after we are finished with the first loop and
     727                 :            :    * subsequent conditional, a division and the associated update of the
     728                 :            :    * recurrence matrix have just been carried out completely.  The matrix
     729                 :            :    * xu,xu1;xv,xv1 has been initialized (or updated, possibly with permuted
     730                 :            :    * columns), and the current remainder == next divisor (dd at the moment)
     731                 :            :    * is nonzero (it might be zero here, but then skip will have been set).
     732                 :            :    *
     733                 :            :    * After the first loop, or when skip is set already, it will also be the
     734                 :            :    * case that there aren't sufficiently many bits to continue without re-
     735                 :            :    * shifting.  If the divisor after reshifting is zero, or indeed if it
     736                 :            :    * doesn't have more than half a word's worth of bits, we will have to
     737                 :            :    * return at that point.  Otherwise, we proceed into the second loop.
     738                 :            :    *
     739                 :            :    * Furthermore, when we reach the re-shift stage, dd:ddlo and dd1:dd1lo will
     740                 :            :    * already reflect the result of applying the current matrix to the old
     741                 :            :    * ddorig:ddlo and dd1orig:dd1lo.  (For the first iteration above, this
     742                 :            :    * was easy to achieve, and we didn't even need to peek into the (now
     743                 :            :    * no longer existent!) saved words.  After the loop, we'll stop for a
     744                 :            :    * moment to merge in the ddlo,dd1lo contributions.)
     745                 :            :    *
     746                 :            :    * Note that after the first division, even an a priori quotient of 1 cannot
     747                 :            :    * be trusted until we've checked Jebelean's condition -- it cannot be too
     748                 :            :    * large, of course, but it might be too small.
     749                 :            :    */
     750                 :            : 
     751         [ +  + ]:    3034723 :   if (!skip)
     752                 :            :   {
     753                 :            :     for(;;)
     754                 :            :     {
     755                 :            :       /* First half of loop divides dd into dd1, and leaves the recurrence
     756                 :            :        * matrix xu,...,xv1 groomed the wrong way round (xu,xv will be the newer
     757                 :            :        * entries) when successful. */
     758                 :   25843891 :       tmpd = dd1 - dd;
     759         [ +  + ]:   25843891 :       if (tmpd < dd)
     760                 :            :       {                                /* quotient suspected to be 1 */
     761                 :            : #ifdef DEBUG_LEHMER
     762                 :            :         q = 1;
     763                 :            : #endif
     764                 :   10877201 :         tmpu = xu + xu1;        /* cannot overflow -- everything bounded by
     765                 :            :                                  * the original dd during first loop */
     766                 :   10877201 :         tmpv = xv + xv1;
     767                 :            :       }
     768                 :            :       else
     769                 :            :       {                                /* division indicated */
     770                 :   14966690 :         hiremainder = 0;
     771 [ +  - ][ #  # ]:   14966690 :         q = 1 + divll(tmpd, dd);
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
                 [ #  # ]
     772                 :   14966690 :         tmpd = hiremainder;
     773                 :   14966690 :         tmpu = xu + q*xu1;        /* can't overflow, but may need to be undone */
     774                 :   14966690 :         tmpv = xv + q*xv1;
     775                 :            :       }
     776                 :            : 
     777                 :   25843891 :       tmp0 = addll(tmpv, xv1);
     778 [ +  + ][ +  - ]:   25843891 :       if ((tmpd < tmpu) || overflow ||
         [ +  + ][ +  + ]
         [ +  - ][ +  + ]
     779                 :   25214313 :           (dd - tmpd < tmp0))        /* !(Jebelean cond.) */
     780                 :            :         break;                        /* skip ahead to reshift stage */
     781                 :            :       else
     782                 :            :       {                                /* commit dd1, xu, xv */
     783                 :   24330151 :         res++;
     784                 :   24330151 :         dd1 = tmpd; xu = tmpu; xv = tmpv;
     785                 :            : #ifdef DEBUG_LEHMER
     786                 :            :         err_printf("  q = %ld, %lx, %lx [%lu,%lu;%lu,%lu]\n",
     787                 :            :                    q, dd, dd1, xu1, xu, xv1, xv);
     788                 :            : #endif
     789         [ +  + ]:   24330151 :         if (xv > vmax)
     790                 :            :         {                        /* time to return */
     791                 :       1989 :           *u = xu1; *u1 = xu; *v = xv1; *v1 = xv;
     792                 :       1989 :           return res;
     793                 :            :         }
     794                 :            :       }
     795                 :            : 
     796                 :            :       /* Second half of loop divides dd1 into dd, and the matrix returns to its
     797                 :            :        * normal arrangement. */
     798                 :   24328162 :       tmpd = dd - dd1;
     799         [ +  + ]:   24328162 :       if (tmpd < dd1)
     800                 :            :       {                                /* quotient suspected to be 1 */
     801                 :            : #ifdef DEBUG_LEHMER
     802                 :            :         q = 1;
     803                 :            : #endif
     804                 :    9859187 :         tmpu = xu1 + xu;        /* cannot overflow */
     805                 :    9859187 :         tmpv = xv1 + xv;
     806                 :            :       }
     807                 :            :       else
     808                 :            :       {                                /* division indicated */
     809                 :   14468975 :         hiremainder = 0;
     810 [ +  - ][ #  # ]:   14468975 :         q = 1 + divll(tmpd, dd1);
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
                 [ #  # ]
     811                 :   14468975 :         tmpd = hiremainder;
     812                 :   14468975 :         tmpu = xu1 + q*xu;
     813                 :   14468975 :         tmpv = xv1 + q*xv;
     814                 :            :       }
     815                 :            : 
     816                 :   24328162 :       tmp0 = addll(tmpu, xu);
     817 [ +  + ][ +  - ]:   24328162 :       if ((tmpd < tmpv) || overflow ||
         [ +  + ][ +  + ]
         [ +  - ][ +  + ]
     818                 :   23008547 :           (dd1 - tmpd < tmp0))        /* !(Jebelean cond.) */
     819                 :            :         break;                        /* skip ahead to reshift stage */
     820                 :            :       else
     821                 :            :       {                                /* commit dd, xu1, xv1 */
     822                 :   22818472 :         res++;
     823                 :   22818472 :         dd = tmpd; xu1 = tmpu; xv1 = tmpv;
     824                 :            : #ifdef DEBUG_LEHMER
     825                 :            :         err_printf("  q = %ld, %lx, %lx [%lu,%lu;%lu,%lu]\n",
     826                 :            :                 q, dd1, dd, xu, xu1, xv, xv1);
     827                 :            : #endif
     828         [ +  + ]:   22818472 :         if (xv1 > vmax)
     829                 :            :         {                        /* time to return */
     830                 :        885 :           *u = xu; *u1 = xu1; *v = xv; *v1 = xv1;
     831                 :        885 :           return res;
     832                 :            :         }
     833                 :            :       }
     834                 :            : 
     835                 :   22817587 :     } /* end of first loop */
     836                 :            : 
     837                 :            :     /* Intermezzo: update dd:ddlo, dd1:dd1lo.  (But not if skip is set.) */
     838                 :            : 
     839         [ +  + ]:    3023430 :     if (res&1)
     840                 :            :     {
     841                 :            :       /* after failed division in 1st half of loop:
     842                 :            :        * [dd1:dd1lo,dd:ddlo] = [ddorig:ddlo,dd1orig:dd1lo]
     843                 :            :        *                       * [ -xu, xu1 ; xv, -xv1 ]
     844                 :            :        * (Actually, we only multiply [ddlo,dd1lo] onto the matrix and
     845                 :            :        * add the high-order remainders + overflows onto [dd1,dd].)
     846                 :            :        */
     847                 :    1513740 :       tmp1 = mulll(ddlo, xu); tmp0 = hiremainder;
     848                 :    1513740 :       tmp1 = subll(mulll(dd1lo,xv), tmp1);
     849                 :    1513740 :       dd1 += subllx(hiremainder, tmp0);
     850                 :    1513740 :       tmp2 = mulll(ddlo, xu1); tmp0 = hiremainder;
     851                 :    1513740 :       ddlo = subll(tmp2, mulll(dd1lo,xv1));
     852                 :    1513740 :       dd += subllx(tmp0, hiremainder);
     853                 :    1513740 :       dd1lo = tmp1;
     854                 :            :     }
     855                 :            :     else
     856                 :            :     {
     857                 :            :       /* after failed division in 2nd half of loop:
     858                 :            :        * [dd:ddlo,dd1:dd1lo] = [ddorig:ddlo,dd1orig:dd1lo]
     859                 :            :        *                       * [ xu1, -xu ; -xv1, xv ]
     860                 :            :        * (Actually, we only multiply [ddlo,dd1lo] onto the matrix and
     861                 :            :        * add the high-order remainders + overflows onto [dd,dd1].)
     862                 :            :        */
     863                 :    1509690 :       tmp1 = mulll(ddlo, xu1); tmp0 = hiremainder;
     864                 :    1509690 :       tmp1 = subll(tmp1, mulll(dd1lo,xv1));
     865                 :    1509690 :       dd += subllx(tmp0, hiremainder);
     866                 :    1509690 :       tmp2 = mulll(ddlo, xu); tmp0 = hiremainder;
     867                 :    1509690 :       dd1lo = subll(mulll(dd1lo,xv), tmp2);
     868                 :    1509690 :       dd1 += subllx(hiremainder, tmp0);
     869                 :    1509690 :       ddlo = tmp1;
     870                 :            :     }
     871                 :            : #ifdef DEBUG_LEHMER
     872                 :            :     err_printf("  %lx:%lx, %lx:%lx\n", dd, ddlo, dd1, dd1lo);
     873                 :            : #endif
     874                 :            :   } /* end of skip-pable section:  get here also, with res==1, when there
     875                 :            :      * was a problem immediately after the very first division. */
     876                 :            : 
     877                 :            :   /* Re-shift.  Note:  the shift count _can_ be zero, viz. under the following
     878                 :            :    * precise conditions:  The original dd1 had its topmost bit set, so the 1st
     879                 :            :    * q was 1, and after subtraction, dd had its topmost bit unset.  If now
     880                 :            :    * dd==0, we'd have taken the return exit already, so we couldn't have got
     881                 :            :    * here.  If not, then it must have been the second division which has gone
     882                 :            :    * amiss  (because dd1 was very close to an exact multiple of the remainder
     883                 :            :    * dd value, so this will be very rare).  At this point, we'd have a fairly
     884                 :            :    * slim chance of fixing things by re-examining dd1:dd1lo vs. dd:ddlo, but
     885                 :            :    * this is not guaranteed to work.  Instead of trying, we return at once.
     886                 :            :    * The caller will see to it that the initial subtraction is re-done using
     887                 :            :    * _all_ the bits of both operands, which already helps, and the next round
     888                 :            :    * will either be a full division  (if dd occupied a halfword or less),  or
     889                 :            :    * another llgcdii() first step.  In the latter case, since we try a little
     890                 :            :    * harder during our first step, we may actually be able to fix the problem,
     891                 :            :    * and get here again with improved low-order bits and with another step
     892                 :            :    * under our belt.  Otherwise we'll have given up above and forced a full-
     893                 :            :    * blown division.
     894                 :            :    *
     895                 :            :    * If res is even, the shift count _cannot_ be zero.  (The first step forces
     896                 :            :    * a zero into the remainder's MSB, and all subsequent remainders will have
     897                 :            :    * inherited it.)
     898                 :            :    *
     899                 :            :    * The re-shift stage exits if the next divisor has at most half a word's
     900                 :            :    * worth of bits.
     901                 :            :    *
     902                 :            :    * For didactic reasons, the second loop will be arranged in the same way
     903                 :            :    * as the first -- beginning with the division of dd into dd1, as if res
     904                 :            :    * was odd.  To cater for this, if res is actually even, we swap things
     905                 :            :    * around during reshifting.  (During the second loop, the parity of res
     906                 :            :    * does not matter;  we know in which half of the loop we are when we decide
     907                 :            :    * to return.)
     908                 :            :    */
     909                 :            : #ifdef DEBUG_LEHMER
     910                 :            :   err_printf("(sh)");
     911                 :            : #endif
     912                 :            : 
     913         [ +  + ]:    3031849 :   if (res&1)
     914                 :            :   {                                /* after odd number of division(s) */
     915 [ +  - ][ +  + ]:    1522159 :     if (dd1 && (sh = bfffo(dd1)))
         [ +  + ][ +  + ]
         [ +  + ][ +  + ]
                    [ + ]
     916                 :            :     {
     917                 :    1501159 :       shc = BITS_IN_LONG - sh;
     918                 :    1501159 :       dd = (ddlo >> shc) + (dd << sh);
     919         [ +  + ]:    1501159 :       if (!(HIGHMASK & dd))
     920                 :            :       {
     921                 :       9161 :         *u = xu; *u1 = xu1; *v = xv; *v1 = xv1;
     922                 :       9161 :         return -res;                /* full division asked for */
     923                 :            :       }
     924                 :    1491998 :       dd1 = (dd1lo >> shc) + (dd1 << sh);
     925                 :            :     }
     926                 :            :     else
     927                 :            :     {                                /* time to return: <= 1 word left, or sh==0 */
     928                 :      21000 :       *u = xu; *u1 = xu1; *v = xv; *v1 = xv1;
     929                 :      21000 :       return res;
     930                 :            :     }
     931                 :            :   }
     932                 :            :   else
     933                 :            :   {                                /* after even number of divisions */
     934         [ +  - ]:    1509690 :     if (dd)
     935                 :            :     {
     936 [ +  + ][ +  + ]:    1509690 :       sh = bfffo(dd);                /* Known to be positive. */
         [ +  + ][ +  + ]
     937                 :    1509690 :       shc = BITS_IN_LONG - sh;
     938                 :            :                                 /* dd:ddlo will become the new dd1, and v.v. */
     939                 :    1509690 :       tmpd = (ddlo >> shc) + (dd << sh);
     940                 :    1509690 :       dd = (dd1lo >> shc) + (dd1 << sh);
     941                 :    1509690 :       dd1 = tmpd;
     942                 :            :       /* This has completed the swap;  now dd is again the current divisor.
     943                 :            :        * The following test originally inspected dd1 -- a most subtle and
     944                 :            :        * most annoying bug. The Management. */
     945 [ +  + ][ +  + ]:    1509690 :       if (HIGHMASK & dd)
     946                 :            :       {
     947                 :            :         /* recurrence matrix is the wrong way round;  swap it. */
     948                 :    1505410 :         tmp0 = xu; xu = xu1; xu1 = tmp0;
     949                 :    1505410 :         tmp0 = xv; xv = xv1; xv1 = tmp0;
     950                 :            :       }
     951                 :            :       else
     952                 :            :       {
     953                 :            :         /* recurrence matrix is the wrong way round;  fix this. */
     954                 :       4280 :         *u = xu1; *u1 = xu; *v = xv1; *v1 = xv;
     955                 :       4280 :         return -res;                /* full division asked for */
     956                 :            :       }
     957                 :            :     }
     958                 :            :     else
     959                 :            :     {                                /* time to return: <= 1 word left */
     960                 :          0 :       *u = xu1; *u1 = xu; *v = xv1; *v1 = xv;
     961                 :          0 :       return res;
     962                 :            :     }
     963                 :            :   } /* end reshift */
     964                 :            : 
     965                 :            : #ifdef DEBUG_LEHMER
     966                 :            :   err_printf("  %lx:%lx, %lx:%lx\n", dd, ddlo, dd1, dd1lo);
     967                 :            : #endif
     968                 :            : 
     969                 :            :   /* The Second Loop.  Rip-off of the first, but we now check for overflow
     970                 :            :    * in the recurrences.  Returns instead of breaking when we cannot fix the
     971                 :            :    * quotient any longer.
     972                 :            :    */
     973                 :            : 
     974                 :            :   for(;;)
     975                 :            :   {
     976                 :            :     /* First half of loop divides dd into dd1, and leaves the recurrence
     977                 :            :      * matrix xu,...,xv1 groomed the wrong way round (xu,xv will be the newer
     978                 :            :      * entries) when successful. */
     979                 :   14450915 :     tmpd = dd1 - dd;
     980         [ +  + ]:   14450915 :     if (tmpd < dd)
     981                 :            :     {                                /* quotient suspected to be 1 */
     982                 :            : #ifdef DEBUG_LEHMER
     983                 :            :       q = 1;
     984                 :            : #endif
     985                 :    5028653 :       tmpu = xu + xu1;
     986                 :    5028653 :       tmpv = addll(xv, xv1);        /* xv,xv1 will overflow first */
     987                 :    5028653 :       tmp1 = overflow;
     988                 :            :     }
     989                 :            :     else
     990                 :            :     {                                /* division indicated */
     991                 :    9422262 :       hiremainder = 0;
     992 [ +  - ][ #  # ]:    9422262 :       q = 1 + divll(tmpd, dd);
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
                 [ #  # ]
     993                 :    9422262 :       tmpd = hiremainder;
     994                 :    9422262 :       tmpu = xu + q*xu1;
     995                 :    9422262 :       tmpv = addll(xv, mulll(q,xv1));
     996                 :    9422262 :       tmp1 = overflow | hiremainder;
     997                 :            :     }
     998                 :            : 
     999                 :   14450915 :     tmp0 = addll(tmpv, xv1);
    1000 [ +  + ][ +  - ]:   14450915 :     if ((tmpd < tmpu) || overflow || tmp1 ||
         [ +  - ][ +  + ]
         [ +  + ][ +  - ]
         [ +  - ][ +  + ]
    1001                 :   13823212 :         (dd - tmpd < tmp0))        /* !(Jebelean cond.) */
    1002                 :            :     {
    1003                 :            :       /* The recurrence matrix has not yet been warped... */
    1004                 :    1491002 :       *u = xu; *u1 = xu1; *v = xv; *v1 = xv1;
    1005                 :    1491002 :       break;
    1006                 :            :     }
    1007                 :            :     /* commit dd1, xu, xv */
    1008                 :   12959913 :     res++;
    1009                 :   12959913 :     dd1 = tmpd; xu = tmpu; xv = tmpv;
    1010                 :            : #ifdef DEBUG_LEHMER
    1011                 :            :     err_printf("  q = %ld, %lx, %lx\n", q, dd, dd1);
    1012                 :            : #endif
    1013         [ +  + ]:   12959913 :     if (xv > vmax)
    1014                 :            :     {                                /* time to return */
    1015                 :      19987 :       *u = xu1; *u1 = xu; *v = xv1; *v1 = xv;
    1016                 :      19987 :       return res;
    1017                 :            :     }
    1018                 :            : 
    1019                 :            :     /* Second half of loop divides dd1 into dd, and the matrix returns to its
    1020                 :            :      * normal arrangement. */
    1021                 :   12939926 :     tmpd = dd - dd1;
    1022         [ +  + ]:   12939926 :     if (tmpd < dd1)
    1023                 :            :     {                                /* quotient suspected to be 1 */
    1024                 :            : #ifdef DEBUG_LEHMER
    1025                 :            :       q = 1;
    1026                 :            : #endif
    1027                 :    5497305 :       tmpu = xu1 + xu;
    1028                 :    5497305 :       tmpv = addll(xv1, xv);
    1029                 :    5497305 :       tmp1 = overflow;
    1030                 :            :     }
    1031                 :            :     else
    1032                 :            :     {                                /* division indicated */
    1033                 :    7442621 :       hiremainder = 0;
    1034 [ +  - ][ #  # ]:    7442621 :       q = 1 + divll(tmpd, dd1);
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
                 [ #  # ]
    1035                 :    7442621 :       tmpd = hiremainder;
    1036                 :    7442621 :       tmpu = xu1 + q*xu;
    1037                 :    7442621 :       tmpv = addll(xv1, mulll(q, xv));
    1038                 :    7442621 :       tmp1 = overflow | hiremainder;
    1039                 :            :     }
    1040                 :            : 
    1041                 :   12939926 :     tmp0 = addll(tmpu, xu);
    1042 [ +  + ][ +  - ]:   12939926 :     if ((tmpd < tmpv) || overflow || tmp1 ||
         [ +  - ][ +  + ]
         [ +  + ][ +  - ]
         [ +  - ][ +  + ]
    1043                 :   11654622 :         (dd1 - tmpd < tmp0))        /* !(Jebelean cond.) */
    1044                 :            :     {
    1045                 :            :       /* The recurrence matrix has not yet been unwarped, so it is
    1046                 :            :        * the wrong way round;  fix this. */
    1047                 :    1468499 :       *u = xu1; *u1 = xu; *v = xv1; *v1 = xv;
    1048                 :    1468499 :       break;
    1049                 :            :     }
    1050                 :            : 
    1051                 :   11471427 :     res++; /* commit dd, xu1, xv1 */
    1052                 :   11471427 :     dd = tmpd; xu1 = tmpu; xv1 = tmpv;
    1053                 :            : #ifdef DEBUG_LEHMER
    1054                 :            :     err_printf("  q = %ld, %lx, %lx\n", q, dd1, dd);
    1055                 :            : #endif
    1056         [ +  + ]:   11471427 :     if (xv1 > vmax)
    1057                 :            :     {                                /* time to return */
    1058                 :      17920 :       *u = xu; *u1 = xu1; *v = xv; *v1 = xv1;
    1059                 :      17920 :       return res;
    1060                 :            :     }
    1061                 :   11453507 :   } /* end of second loop */
    1062                 :            : 
    1063                 :    3329558 :   return res;
    1064                 :            : }
    1065                 :            : 
    1066                 :            : /* 1 / Mod(x,p). Assume x < p */
    1067                 :            : ulong
    1068                 :   90234562 : Fl_invsafe(ulong x, ulong p)
    1069                 :            : {
    1070                 :            :   long s;
    1071                 :   90234562 :   ulong xv, xv1, g = xgcduu(p, x, 1, &xv, &xv1, &s);
    1072         [ +  + ]:   90234562 :   if (g != 1UL) return 0UL;
    1073         [ +  + ]:   86171338 :   xv = xv1 % p; if (s < 0) xv = p - xv;
    1074                 :   90234562 :   return xv;
    1075                 :            : }
    1076                 :            : 
    1077                 :            : /* assume 0 < x < p; return u such that u x = gcd(x,p) (mod p);
    1078                 :            :  * set *pg = gcd(x,p) */
    1079                 :            : ulong
    1080                 :    4018355 : Fl_invgen(ulong x, ulong p, ulong *pg)
    1081                 :            : {
    1082                 :            :   ulong v, v1;
    1083                 :            :   long s;
    1084                 :    4018355 :   *pg = xgcduu(p, x, 0, &v, &v1, &s);
    1085         [ +  + ]:    4018355 :   if (s > 0) v = p - v;
    1086                 :    4018355 :   return v;
    1087                 :            : }
    1088                 :            : 
    1089                 :            : /* 1 / Mod(x,p). Assume x < p */
    1090                 :            : ulong
    1091                 :   66485167 : Fl_inv(ulong x, ulong p)
    1092                 :            : {
    1093                 :   66485167 :   ulong xv  = Fl_invsafe(x, p);
    1094 [ +  + ][ +  + ]:   66485167 :   if (!xv && p!=1UL) pari_err_INV("Fl_inv", mkintmod(utoi(x), utoi(p)));
    1095                 :   66485023 :   return xv;
    1096                 :            : }

Generated by: LCOV version 1.9