Ilya Zakharevich on Sat, 25 Apr 1998 22:58:17 +0200

 Calculate primes up to 436M in a glance

```This implementation of initprimes is as quick as the old one for
sizes below 5e6, becomes slightly (50%) quickier when you go to 50e6
(on my machine), and becomes infinitely quickier when primelimit is
twice the simultaneously avalaible core - or more.

Timing (P5/133, 32M memory):

Old			New

0.5e6	  60ms			    60ms
5e6	  500ms			   470ms
50e6	6 280ms			 4 690ms
60e6	infinity		 5 650ms
120e6	---			11 650ms
240e6	---			24 220ms
430e6	---			46 970ms

The reason why the old algorithm is so slow for big limits is a horrible
memory locality.  If you can put 40M in memory, and want to calculate
primes up to n>80e6, you will move approx. 1/3*n^(3/2) bytes to the swapfile
and back.

The new algorithm requires swap if 1.09*n/log(n) + 0.5M > available memory,
and runs over it only once.  So, with 25M of available memory, it will run
up to the current limit 436e6 without going to swap at all, and if you
do not have that much memory, you need to move this amount to swap only once.

Enjoy,
Ilya

--- ./src/basemath/arith2.c~.ini	Sat Feb  7 11:11:30 1998
+++ ./src/basemath/arith2.c	Sat Apr 25 16:21:54 1998
@@ -52,14 +52,12 @@ primes(long n)
return y;
}

-byteptr
-initprimes(long maxnum)
+static byteptr
+initprimes1(long maxnum, long *lenp, long *lastp)
{
-  long k,size = (maxnum+257) >> 1;
+  long k,size = (maxnum+1) >> 1;
byteptr q,r,s,fin, p = (byteptr) gpmalloc(size+1);

-  if (size > 218136500)
-    err(talker,"impossible to have prestored primes > 436273009");
memset(p, 0, size + 1); fin = p + size;
for (r=q=p,k=1; r<fin; )
{
@@ -74,7 +72,118 @@ initprimes(long maxnum)
*r++ = (q-s) << 1;
}
*r++=0;
-  return (byteptr) gprealloc(p,r-p,size);
+  *lenp = r - p;
+  *lastp = ((s - p) << 1) + 1;
+  return (byteptr) gprealloc(p,r-p,size + 1);
+}
+
+#define PRIME_ARENA (512 * 1024)
+
+byteptr
+initprimes0(long maxnum, long *lenp, long *lastp)
+{
+  long k, size, alloced, asize, psize, rootnum, curlow, last, remains, need;
+  byteptr q,r,s,fin, p, p1, fin1, plast, curdiff;
+
+  if (maxnum > 436273009)
+      err(talker,"impossible to have prestored primes > 436273009");
+  if (maxnum <= 64*1024)		/* Arbitrary. */
+      return initprimes1(maxnum, lenp, lastp);
+
+  if (!(maxnum % 2))
+      maxnum--;				/* Make it odd. */
+  /* Checked to be enough up to 40e6, achieved at 155893. */
+  size = 1.09 * maxnum/log(maxnum) + 145;
+  p1 = (byteptr) gpmalloc(size);
+  {
+      byteptr p2;
+
+      p2 = initprimes0(rootnum = sqrt(maxnum), &psize, &last);
+      memcpy(p1, p2, psize);
+      free(p2);
+  }
+  fin1 = p1 + psize - 1;
+  if (!(rootnum % 2))
+      rootnum--;			/* Make it odd - checked up to. */
+  remains = (maxnum - rootnum) >> 1;	/* Odd numbers to check */
+  need = 100 * rootnum;			/* Make % overhead negligeable. */
+  if (need < PRIME_ARENA)
+      need = PRIME_ARENA;
+  if (avma - bot < need/2) {
+      asize = need;
+      alloced = 1;
+  } else {
+      p = (byteptr) bot;
+      asize = avma - bot;
+  }
+  if (asize > remains)
+      asize = remains + 1;		/* Guard at end. */
+  if (alloced)
+      p = (byteptr) gpmalloc(asize);
+  fin = p + asize - 1;			/* Leave 0-guard at fin. */
+  curlow = rootnum + 2;			/* Know below this odd number. */
+  curdiff = fin1;
+
+  /* During each iteration p..fin-1 represents a range of odd
+     numbers.  plast is a pointer which represent last prime seen,
+     it may point before p..fin-1. */
+  plast = p - ((rootnum - last) >> 1) - 1;
+  while (remains) {
+      if (asize > remains) {
+	  asize = remains + 1;
+	  fin = p + asize - 1;
+      }
+      memset(p, 0, asize);
+      /* p corresponds to curlow.  q runs over primediffs.  */
+      for (q = p1 + 2, k = 3; q <= fin1; ) {
+	  /* First odd number >= curlow divisible by p is (if curlow > p)
+	     last odd number <= curlow + 2p - 1 divisible by p is
+	     p plus last even number <= curlow + p - 1 divisible by p is
+	     p plus last even number <= curlow + p - 2 divisible by p is
+	     curlow + p - 2 - (curlow + p - 2)) % 2p + p.
+	   */
+	  long k2 = k * k - curlow;
+
+	  if (k2 > 0) {
+	      r = p + (k2 = k2 >> 1);
+	      if (k2 > remains)		/* Guard against an address wrap.  */
+		  r = fin;
+	  } else
+	      r = p - ( ( (curlow + k - 2) % (2*k) ) >> 1 ) + k - 1;
+	  for (s = r; s < fin; s += k)
+	      *s = 1;
+	  k += *q++;
+      }
+      /* q runs over addresses corresponding to primes */
+      for (q = p; ; plast = q++) {
+	  while (*q)
+	      q++;			/* Use 0-guard at end */
+	  if (q >= fin)
+	      break;
+	  *curdiff++ = (q - plast) << 1;
+      }
+      plast -= asize - 1;
+      remains -= asize - 1;
+      curlow += ((asize - 1)<<1);
+  }
+  last = curlow - ( (p - plast) << 1 );
+  *curdiff++ = 0;
+  *lenp = curdiff - p1;
+  *lastp = last;
+  if (alloced)
+      free(p);
+  return (byteptr) gprealloc(p1, *lenp, size);
+}
+
+byteptr
+initprimes(long maxnum)	/* Backward compatibility */
+{
+    long len, last;
+    if (maxnum + 256 > 436273009)
+	err(talker,"impossible to have prestored primes > 436273009");
+    /*	The storage algorithm needs to know next prime after maxnum
+	to be sure that no primes up to maxnum are skipped.  */
+    return initprimes0(maxnum + 256, &len, &last);
}

static void
```

• Follow-Ups:
• alpha/linux
• From: "John Cremona (Maths)" <cremona@noether.ex.ac.uk>