/*
5 Prime counting functions implementations based on Linnik's Identity, including the
star here, an implementation that runs in O(n^2/3 ln n) time and O(n^1/3 ln n) space
(search for the function countprimes5, below).
Nathan McKenzie, 8-9-2011
nathan _AT_ icecreambreakfast.com
See http://www.icecreambreakfast.com/primecount/primecounting.html for descriptions
of the theory behind all of this stuff.
See http://www.icecreambreakfast.com/primecount/primecounting.html#1_1 for a basic
description of Linnik's identity,and
http://www.icecreambreakfast.com/primecount/primecounting.html#ch2 for a bit more
elaboration how this identity can be used to compute the prime counting function.
As a general note, 4 of the 5 of these algorithms could be sped up considerably by
making use of a suitably sized wheel - the fourth algorithm (countprimes4) already
makes use of such a wheel.
*/
/* !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
NOTE! For easy exploration, change the following line
*/
#define primeCountingFunction countprimes5
/* to any of the following values : countprimes1, countprimes2, countprimes3,
countprimes4, countprimes5 to try out the different prime counting algorithms.
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
*/
#include "stdio.h"
#include "stdlib.h"
#include "math.h"
#include "conio.h"
#include "time.h"
typedef long long BigInt;
const int g_wheelLargestPrime = 19;
int scaleNum = 10;
/* ________________________________________________________________________________________
This is the main timing loop, running the prime counting function for increasingly
large input.
Change the #define "primeCountingFunction" to the various "countprime" functions to
run and performance time them.
________________________________________________________________________________________
*/
BigInt countprimes1( BigInt n );
BigInt countprimes2( BigInt n );
BigInt countprimes3( BigInt n );
BigInt countprimes4( BigInt n );
BigInt countprimes5( BigInt n );
void MakeWheel( int );
int main(int argc, char* argv[]){
if( primeCountingFunction == countprimes4 ) MakeWheel( g_wheelLargestPrime );
int oldClock = (int)clock();
int lastDif = 0;
printf(" ");
printf( "Time\n");
printf(" ");
printf( "Increase\n");
printf(" ");
printf( "for x%d\n", scaleNum);
printf(" ");
printf( "__ Input Number __ __ Output Number __ _ MSec _ _ Sec _ Input\n");
printf( " \n");
for( BigInt i = scaleNum; i <= 1000000000000000000; i *= scaleNum ){
printf( "%17I64d(10^%4.1f): ", i, log( (double)i )/log(10.0) );
BigInt total = (BigInt)(primeCountingFunction( i )+.00001);
int newClock = (int)clock();
printf( " %20I64d %8d : %4d: x%f\n",
total, newClock - oldClock, ( newClock - oldClock ) / CLK_TCK,
( lastDif ) ? (double)( newClock - oldClock ) / (double)lastDif : 0.0 );
lastDif = newClock - oldClock;
oldClock = newClock;
}
getch();
return 0;
}
static BigInt mu[] = { 0, 1, -1, -1, 0, -1, 1, -1, 0, 0, 1, -1, 0, -1, 1, 1, 0, -1, 0,
-1, 0, 1, 1, -1, 0, 0, 1, 0, 0, -1, -1, -1, 0, 1, 1, 1, 0, -1, 1, 1, 0, -1, -1, -1,
0, 0, 1, -1, 0, 0, 0, 1, 0, -1, 0, 1, 0, 1, 1, -1, 0, -1, 1, 0, 0, 1, -1, -1, 0, 1,
-1, -1, 0, -1, 1, 0, 0, 1, -1, -1, 0, 0 };
/* ________________________________________________________________________________________
ALGORITHM 1 : countprimes1
This is the first implementation of the prime counting function. It is not at all
fast, but it is at least rather simple. It runs in something like O(n^1.7) time and
O(episolon) space - which makes it worse, in fact, that trial division.
See http://www.icecreambreakfast.com/primecount/primecounting.html#3_1 for the source
identity of countdivisors.
See http://www.icecreambreakfast.com/primecount/primecounting.html#2_4 for the body of
countprimes1
________________________________________________________________________________________
*/
double countdivisors(BigInt n, BigInt k){
if (k == 0) return 1;
if (k == 1) return n - 1;
BigInt t = 0;
for (BigInt i = 2; i <= n; i++) t += countdivisors( n / i, k - 1 );
return t;
}
BigInt countprimes1( BigInt n){
double t = 0.0;
for (BigInt j = 1; j < log((double)n) / log(2.0); j++)
for (BigInt k = 1; k < log( pow( n, 1.0 / j ) ) / log(2.0); k++)
t += pow( -1.0, (double)k+1 ) * countdivisors(pow( n, 1.0 / j ), k)
/ k / j * mu[j];
return (t+.1);
}
/* ________________________________________________________________________________________
ALGORITHM 2 : countprimes2
This is a recursive implementation of countprimes1. It's ultimate quite similar,
including its lousy performance.
See http://www.icecreambreakfast.com/primecount/primecounting.html#3_2 for the
source identity of recurseprimes.
See http://www.icecreambreakfast.com/primecount/primecounting.html#2_3 for the body
of countprimes2
________________________________________________________________________________________
*/
double recurseprimes( BigInt n, BigInt k ){
double t= 0;
for( BigInt j = 2; j <= n; j++ )t += 1.0 / k - recurseprimes( n/j, k+1 );
return t;
}
BigInt countprimes2( BigInt n ){
double t = 0;
for (BigInt j = 1; j < log((double)n) / log(2.0); j++)
t += recurseprimes(pow( n, 1.0 / j ), 1) / j * mu[j];
return t+.1;
}
/* ________________________________________________________________________________________
ALGORITHM 3 : countprimes3
This is another, substantially faster, implementation of the prime counting function
via Linnik's Identity. It relies on internal symmetries of the number of divisors
summatory function to be sped up. For an algorithm that uses O(epsilon) memory, it
runs pretty quickly, appearing to clock in just a bit below the O(n) mark.
This algorithm can be sped up massively, in constant time terms, by using a suitably
large wheel.
See http://www.icecreambreakfast.com/primecount/primecounting.html#3_3 for the
source identity of countdivisorsfast
See http://www.icecreambreakfast.com/primecount/primecounting.html#2_4 for the
body of countprimes3
________________________________________________________________________________________
*/
int inversepow( int n, int k) {
return pow(n, 1.0 / k) + .00000001;
}
double factorial(BigInt val){
double total = 1.0;
for (int i = 1; i <= val; i++) total *= i;
return total;
}
double binomial_(BigInt val, BigInt div) {
return factorial(val) / (factorial(div) * factorial(val - div));
}
double countdivisorsfast( BigInt n, BigInt k, BigInt a ){
if( k == 0 )return 1;
if( k == 1 )return n - a + 1;
double t = 0;
for( BigInt m = a; m <= inversepow(n,k); m++ )
for( BigInt j = 0; j < k; j++ )
t += countdivisorsfast( n / pow( (double)m, (double)k - j ), j, m+1 )
* binomial_( k, j );
return t;
}
BigInt countprimes3(BigInt n){
double t = 0.0;
for (BigInt j = 1; j < log((double)n) / log(2.0); j++)
for (BigInt k = 1; k < log( pow( n, 1.0 / j ) ) / log(2.0); k++)
t += pow( -1.0, (double)k + 1 ) * countdivisorsfast(inversepow( n, j ), k, 2 )
/ k / j * mu[j];
return t + .1;
}
/* ________________________________________________________________________________________
ALGORITHM 4 : countprimes4
Although it might be difficult to tell, the following code implements the same
algorithm, essentially, as ALGORITHM 3. So, its core identities are found in
http://www.icecreambreakfast.com/primecount/primecounting.html#3_3
and http://www.icecreambreakfast.com/primecount/primecounting.html#2_4
The main difference here is that a lot of integration has happened for performance
reasons (so, for example, partially computed binomials are kept around as computation
happens), and, especially, a large wheel has been implemented, which speeds up
computation drastically.
This is a pretty fast prime counting algorithm considering it uses O(episilon) memory.
I've tried, from a number of different angles, to come up with ways of trading off
memory usage to increase its runtime performance, but I haven't found any such approaches.
I'm not entirely sure what the Big O runtime performance of this algorithm is,
ultimately... Up to around 10^15, it seems to be running in something like O(n^4/5),
but it also seems to be taking slightly longer each power of 10.
________________________________________________________________________________________
*/
const double EPSILON = .00000000001;
typedef long long BigInt;
/* A wheel including 19 has 9.6 million entries. */
/* Used for the construction of the wheel - include more primes as needed,
but this is already enough primes to consume over 75 gigs of RAM */
const int g_primes[] ={ 2, 3, 5, 7, 11, 13, 17, 19, 23, 29 };
int g_wheelCycleEntries;
int g_wheelCyclePeriod;
int g_wheelFirstPrime;
int g_wheelBasePrimes;
int* g_wheelTranslation = 0;
int* g_wheelOffsets = 0;
BigInt g_latticepoints;
BigInt g_minVarValue;
BigInt g_boundary;
BigInt g_scale;
BigInt g_divisor;
BigInt g_lastMax;
int g_variablesLeft;
BigInt g_lastScaleDivisor;
BigInt g_scaleVal;
int g_mu[] = {
0, 1, -1, -1, 0, -1, 1, -1, 0, 0,
1, -1, 0, -1, 1, 1, 0, -1, 0, -1,
0, 1, 1, -1, 0, 0, 1, 0, 0, -1,
-1, -1, 0, 1, 1, 1, 0, -1, 1, 1,
0, -1, -1, -1, 0, 0, 1, -1, 0, 0,
0, 1, 0, -1, 0, 1, 0, 1, 1, -1,
0, -1, 1, 0, 0, 1, -1, -1, 0, 1,
-1, -1, 0, -1, 1, 0, 0, 1, -1, -1,
0, 0, 1, -1, 0, 1, 1, 1, 0, -1,
0, 1, 0, 1, 1, 1, 0, -1, 0, 0,
0, -1, -1, -1, 0, -1, 1, -1, 0, -1,
-1, 1, 0, -1, -1, 1, 0, 0, 1, 1,
0, 0, 1, 1, 0, 0, 0, -1, 0, 1,
-1, -1, 0, 1, 1, 0, 0, -1, -1, -1,
0, 1, 1, 1, 0, 1, 1, 0, 0, -1,
0, -1, 0, 0, -1, 1, 0, -1, 1, 1,
0, 1, 0, -1, 0, -1, 1, -1, 0, 0,
-1, 0, 0, -1, -1, 0, 0, 1, 1, -1,
0, -1, -1, 1, 0, 1, -1, 1, 0, 0,
-1, -1, 0, -1, 1, -1, 0, -1, 0, -1,
0, 1, 1, 1, 0, 1, 1, 0, 0, 1,
1, -1, 0, 1, 1, 1, 0, 1, 1, 1,
0, 1, -1, -1, 0, 0, 1, -1, 0, -1,
-1, -1, 0, -1, 0, 1, 0, 1, -1, -1,
0, -1, 0, 0, 0, 0, -1, 1, 0, 1,
0, -1, 0, 1, 1, -1, 0,
};
/* Note that with 64 bit ints, we can't go above factorial( 20 ) anyway. */
BigInt g_factorial[] = {
0, 1, 2, 6, 24, 120, 720, 5040, 40320, 362880, 3628800, 39916800, 479001600,
6227020800, 87178291200, 1307674368000, 20922789888000, 355687428096000,
6402373705728000, 121645100408832000, 2432902008176640000
};
inline BigInt InversePower( BigInt x, BigInt y ){
return ( (BigInt)( pow( (double)x + EPSILON, ( 1.0 / (double)y ) ) + EPSILON ) );
}
inline int GetwheelCyclePeriod( int cap ){
int val = 1;
int i = 0;
while( g_primes[ i ] <= cap ){
val *= g_primes[ i ];
i++;
}
return val;
}
inline int GetFirstIncludedPrime( int cap ){
int i = 0;
while( g_primes[ i ] <= cap ){
i++;
}
return g_primes[ i ];
}
inline int GetBasePrimes( int cap ){
int i = 0;
while( g_primes[ i ] <= cap ){
i++;
}
return i;
}
inline void IncrementWheel( int &offset ){
offset++;
if( offset >= g_wheelCycleEntries ){
offset = 0;
}
}
void MakeWheel( int cap ){
g_wheelBasePrimes = GetBasePrimes( cap );
g_wheelCyclePeriod = GetwheelCyclePeriod( cap );
g_wheelCycleEntries = 0;
int cur = 0;
int offset = -1;
int* wheelBase = 0;
wheelBase = ( int* )malloc( g_wheelCyclePeriod * sizeof( int ) );
g_wheelTranslation = ( int* )malloc( ( g_wheelCyclePeriod + 1 ) * sizeof( int ) );
g_wheelOffsets = ( int * )malloc( g_wheelCyclePeriod * sizeof( int ) );
g_wheelFirstPrime = GetFirstIncludedPrime( cap );
for( int i = 0; i < g_wheelCyclePeriod; i++ ){
wheelBase[ i ] = 1;
for( int j = 2; j <= cap; j++ ){
if( !( ( i + 1 ) % j ) ){
wheelBase[ i ] = 0;
break;
}
}
}
while( cur < g_wheelCyclePeriod ){
if( wheelBase[ cur ] && cur != 0 ){
g_wheelOffsets[ g_wheelCycleEntries ] = offset + 1;
offset = 0;
g_wheelCycleEntries++;
}else{
offset++;
}
cur++;
}
g_wheelOffsets[ g_wheelCycleEntries ] = 2;
g_wheelCycleEntries++;
int total = 0;
g_wheelTranslation[ 0 ] = 0;
for( BigInt i = 0; i < g_wheelCyclePeriod; i++ ){
if( i && wheelBase[ i - 1 ] ){
total++;
}
g_wheelTranslation [ i + 1 ] = total;
}
free( wheelBase );
}
/* This function calculates how many entries the wheel leaves
in the range from (rangeStart, rangeStop].*/
inline BigInt CountWheelEntries( BigInt rangeStart, BigInt rangeEnd ){
rangeEnd++;
int a = rangeStart % g_wheelCyclePeriod;
int b = rangeEnd % g_wheelCyclePeriod;
return ( rangeEnd - b - rangeStart + a ) / g_wheelCyclePeriod *
g_wheelCycleEntries + g_wheelTranslation[ b ] - g_wheelTranslation[ a ] ;
}
void CountHyperbolaLattice_2_Variables( void ){
BigInt finalBoundary = g_boundary / g_minVarValue;
BigInt boundaryRoot = (BigInt)( sqrt( (double)finalBoundary ) + EPSILON );
/* For if the final two digits happen to be the same. */
g_latticepoints += g_scale / ( g_divisor * ( g_divisor + 1 ) );
/* Leading digit is same, final digit is not. */
g_latticepoints +=
( CountWheelEntries( g_minVarValue, finalBoundary / g_minVarValue ) - 1 )
* ( g_scale / g_divisor );
/* For if the final two digits happen to be the same,
but both differ from the previous. */
g_latticepoints +=
( CountWheelEntries( g_minVarValue, boundaryRoot ) - 1 ) * ( g_scale / 2 );
/* Both digits differ from all other digits - This is the hellish evil
loop of portentious doom. */
int curWheelOffset = g_wheelTranslation[ g_minVarValue % g_wheelCyclePeriod ];
BigInt curLeadingVar = g_minVarValue + g_wheelOffsets[ curWheelOffset ];
BigInt subTotal = 0;
IncrementWheel( curWheelOffset );
while( curLeadingVar <= boundaryRoot ){
subTotal += CountWheelEntries( curLeadingVar, finalBoundary / curLeadingVar ) - 1;
curLeadingVar += g_wheelOffsets[ curWheelOffset ];
IncrementWheel( curWheelOffset );
}
g_latticepoints += subTotal * g_scale;
}
void CountHyperbolaLattice_3_Variables( BigInt hyperbolaBoundary, BigInt minVarValue ){
BigInt maxVarValue = InversePower( hyperbolaBoundary, 3 );
int curWheelOffset = g_wheelTranslation[ minVarValue % g_wheelCyclePeriod ];
g_boundary = hyperbolaBoundary;
g_minVarValue = minVarValue;
g_scale = g_scaleVal / g_lastScaleDivisor;
g_divisor = g_lastScaleDivisor + 1;
CountHyperbolaLattice_2_Variables();
g_minVarValue += g_wheelOffsets[ curWheelOffset ];
IncrementWheel( curWheelOffset );
g_scale = g_scaleVal;
g_divisor = 2;
while( g_minVarValue <= maxVarValue ){
CountHyperbolaLattice_2_Variables();
g_minVarValue += g_wheelOffsets[ curWheelOffset ];
IncrementWheel( curWheelOffset );
}
}
void CountHyperbolaLattice_X_Variables( BigInt hyperbolaBoundary, BigInt minVarValue ){
BigInt maxVarValue = InversePower( hyperbolaBoundary, g_variablesLeft );
/* Save global variables that will be restored at end of function */
BigInt scaleVal = g_scaleVal;
BigInt lastScaleDivisor = g_lastScaleDivisor;
int curWheelOffset = g_wheelTranslation[ minVarValue % g_wheelCyclePeriod ];
g_variablesLeft--;
g_lastScaleDivisor = lastScaleDivisor + 1;
g_scaleVal = scaleVal / lastScaleDivisor;
if( g_variablesLeft == 3 ){
CountHyperbolaLattice_3_Variables( hyperbolaBoundary / minVarValue, minVarValue );
}else{
CountHyperbolaLattice_X_Variables( hyperbolaBoundary / minVarValue, minVarValue );
}
g_lastScaleDivisor = 2;
g_scaleVal = scaleVal;
minVarValue += g_wheelOffsets[ curWheelOffset ];
IncrementWheel( curWheelOffset );
while( minVarValue <= maxVarValue ){
if( g_variablesLeft == 3 ){
CountHyperbolaLattice_3_Variables( hyperbolaBoundary / minVarValue, minVarValue );
}else{
CountHyperbolaLattice_X_Variables( hyperbolaBoundary / minVarValue, minVarValue );
}
minVarValue += g_wheelOffsets[ curWheelOffset ];
IncrementWheel( curWheelOffset );
}
/* Restore global variables */
g_lastScaleDivisor = lastScaleDivisor;
g_variablesLeft++;
}
BigInt CountHyperbolaLattice( BigInt hyperbolaBoundary, int hyperbolaVariables ){
g_latticepoints = 0;
g_variablesLeft = hyperbolaVariables;
g_lastScaleDivisor = 1;
g_scaleVal = g_factorial[ hyperbolaVariables ];
if( hyperbolaBoundary <
(BigInt)pow( (double)g_wheelFirstPrime, (double)hyperbolaVariables ) ){
return 0;
}
switch( hyperbolaVariables ){
case 1:
g_latticepoints = CountWheelEntries( g_wheelFirstPrime, hyperbolaBoundary );
break;
case 2:
/* CountHyperbolaLattice_2_Variables expects a number of global variables
to be initialized when it is called, which generally happens in
CountHyperbolaLattice_3_Variables. We have to do it manually here. */
g_minVarValue = g_wheelFirstPrime;
g_boundary = g_wheelFirstPrime * hyperbolaBoundary;
g_scale = 2;
g_divisor = 1;
CountHyperbolaLattice_2_Variables();
break;
case 3:
CountHyperbolaLattice_3_Variables( hyperbolaBoundary, g_wheelFirstPrime );
break;
default:
CountHyperbolaLattice_X_Variables( hyperbolaBoundary, g_wheelFirstPrime );
break;
}
return g_latticepoints;
}
BigInt countprimes4( BigInt n ){
int maxPower = ( int )( log( ( double )n + EPSILON )
/ log ( ( double )g_wheelFirstPrime + EPSILON ) + EPSILON ) + 1;
double total = 0.0;
int oldClock = (int)clock();
int totalTime = 0;
for( int curPower = 1; curPower < maxPower; curPower++ ){
if( !g_mu[ curPower ] ){
continue;
}
BigInt curMax = InversePower( n, curPower );
double subTotal = 0.0;
BigInt hyperbolaEntries = 1;
double sign = 1;
while( 1 ){
double temp = sign / hyperbolaEntries;
sign *= -1;
double v2 = (double)CountHyperbolaLattice( curMax, hyperbolaEntries );
temp *= v2;
if( temp == 0.0 ){
break;
}
subTotal += temp;
hyperbolaEntries++;
int newClock = (int)clock();
totalTime += newClock - oldClock;
oldClock = newClock;
}
subTotal /= curPower * g_mu[ curPower ];
total += subTotal;
}
total += g_wheelBasePrimes;
/* the .5 is to prevent truncation errors - but it's clearly sloppy */
return (BigInt)( total + 0.5 );
}
/* ________________________________________________________________________________________
ALGORITHM 5
This is the fastest, in terms of O() runtime, and by far most complex, implementation
of a prime counting algorithm based on Linnik's identity that I have found.
Its performance is none too shabby - it runs in roughly O( n^2/3 ln n ) time and
O( n^1/3 ln n ) space, which puts it in spitting distance of some of the faster methods
for counting primes.
See http://www.icecreambreakfast.com/primecount/primecounting.html#ch4 ,
http://www.icecreambreakfast.com/primecount/primecounting.html#ch5 , and
http://www.icecreambreakfast.com/primecount/primecounting.html#ch6
for general descriptions of the algorithm implemented here.
As currently implemented here, the code stops working for relatively low input values
(say, 10^11 ish) - this is a consequence of precision and overflow issues, not anything
to do with the underlying math. Obviously those concerns would have to be addressed to
use it for higher numbers.
As repeatedly mentioned above, this algorithm should be sped up considerably if a wheel
is implemeneted, as was done in ALGORITHM 4 / countprimes4. To date, I haven't written
such an implementation. A suitable wheel will come with the side bit of making precision
issues somewhat more tractable.
I haven't gone out of my way to do much in the way of constant time performance
optimization here - it's already unreadable enough without fiddling with the code
thusly, and integrating a wheel ought to come first anyway. But there are plenty
of opportunities.
I'm really curious how fast this approach can be sped up.
The fuzzy transitions between double and BigInt are a cause of imprecision and
consternation.
________________________________________________________________________________________
*/
static BigInt* binomials; /* This is used as a doubly subscripted array, 128x128.
Indexing is done manually.*/
static BigInt nToTheThird;
static BigInt logn;
static BigInt numPrimes;
static BigInt* primes;
static BigInt* factorsMultiplied;
static BigInt* totalFactors;
static BigInt* factors; /* This is used as a doubly subscripted array, n^1/3 x ln n.
Indexing is done manually.*/
static BigInt* numPrimeBases;
static BigInt* DPrime; /*This is used as a doubly subscripted array, n^1/3 x ln n.
Indexing is done manually.*/
static BigInt curBlockBase;
static double t;
static BigInt nToTheHalf;
static BigInt numDPowers;
static double* dPrime;
static BigInt S1Val;
static BigInt S1Mode;
static BigInt* S3Vals;
static BigInt* S3Modes;
static bool ended;
static BigInt maxSieveValue;
static BigInt ceilval;
static BigInt n;
BigInt binomial( double n, int k ){
double t = 1;
for( int i = 1; i <= k; i++ ){
t *= ( n - ( k - i ) ) / i;
}
return BigInt( t + .1 );
}
static BigInt invpow(double n, double k) {
return (BigInt)(pow(n, 1.0 / k) + .00000001);
}
/* See http://www.icecreambreakfast.com/primecount/primecounting.html#ch5 for a description
of calculating d_k'(n) from a complete factorization of a number n. */
static BigInt d1(BigInt* a, BigInt o, BigInt k, BigInt l){
BigInt t = 1;
for (BigInt j = 0; j < l; j++) t *= binomials[(a[o*logn+ j] - 1 + k)*128 + a[o*logn+ j]];
return t;
}
/* See http://www.icecreambreakfast.com/primecount/primecounting.html#ch5 for a description
of calculating d_k'(n) from a complete factorization of a number n.*/
static BigInt d2(BigInt* a, BigInt o, BigInt k, BigInt l, BigInt numfacts ){
if (numfacts < k) return 0;
BigInt t = 0;
for (BigInt j = 1; j <= k; j++) t += ( ( k - j ) % 2 == 1 ? -1:1 )
* binomials[k * 128 + j] * d1(a, o, j, l);
return (BigInt)t;
}
static void allocPools( BigInt n ){
nToTheThird = (BigInt)pow(n, 1.0 / 3);
logn = (BigInt)(log(pow(n, 2.00001 / 3)) / log(2.0)) + 1;
factorsMultiplied = new BigInt[nToTheThird];
totalFactors = new BigInt[nToTheThird];
factors = new BigInt[nToTheThird * logn];
numPrimeBases = new BigInt[nToTheThird];
DPrime = new BigInt[(nToTheThird + 1) * logn];
binomials = new BigInt[128*128+ 128];
for (BigInt j = 0; j < 128; j++)
for (BigInt k = 0; k <= j; k++)
binomials[j * 128 + k]= binomial(j, k);
for (BigInt j = 0; j < logn; j++) DPrime[j] = 0;
curBlockBase = 0;
t = n - 1;
nToTheHalf = (BigInt)pow(n, 1.0 / 2);
numDPowers = (BigInt)(log(pow(n, 2.00001 / 3)) / log(2.0)) + 1;
dPrime = new double[(nToTheThird + 1) * (numDPowers + 1)];
S1Val = 1;
S1Mode = 0;
S3Vals = new BigInt[nToTheThird + 1];
S3Modes = new BigInt[nToTheThird + 1];
ended = false;
maxSieveValue = (BigInt)(pow(n, 2.00001 / 3));
for (BigInt j = 2; j < nToTheThird + 1; j++){
S3Modes[j] = 0;
S3Vals[j] = 1;
}
}
static void deallocPools(){
delete factorsMultiplied;
delete totalFactors;
delete factors;
delete numPrimeBases;
delete DPrime;
delete binomials;
delete dPrime;
delete S3Vals;
delete S3Modes;
delete primes;
}
/* This finds all the primes less than n^1/3, which will be used for sieving and
generating complete factorizations of numbers up to n^2/3*/
static void fillPrimes(){
BigInt* primesieve = new BigInt[nToTheThird + 1];
primes = new BigInt[nToTheThird + 1];
numPrimes = 0;
for (BigInt j = 0; j <= nToTheThird; j++) primesieve[j] = 1;
for (BigInt k = 2; k <= nToTheThird; k++){
BigInt cur = k;
if (primesieve[k] == 1){
primes[numPrimes] = k;
numPrimes++;
while (cur <= nToTheThird){
primesieve[cur] = 0;
cur += k;
}
}
}
delete primesieve;
}
/*This resets some state used for the sieving and factoring process.*/
static void clearPools(){
for (BigInt j = 0; j < nToTheThird; j++){
numPrimeBases[j] = -1;
factorsMultiplied[j] = 1;
totalFactors[j] = 0;
}
}
/* We can use sieving on our current n^1/3 sized block of numbers to
get their complete prime factorization signatures, with which we can then
quickly compute d_k' values.*/
static void factorRange(){
for (BigInt j = 0; j < numPrimes; j++){
/*mark everything divided by each prime, adding a new entry.*/
BigInt curPrime = primes[j];
if (curPrime * curPrime > curBlockBase + nToTheThird) break;
BigInt curEntry = ( curBlockBase % curPrime == 0 ) ? 0:curPrime
- (curBlockBase % curPrime);
while (curEntry < nToTheThird){
if( curEntry+curBlockBase != 0 ){
factorsMultiplied[curEntry] *= curPrime;
totalFactors[curEntry]++;
numPrimeBases[curEntry]++;
factors[curEntry*logn+ numPrimeBases[curEntry]] = 1;
}
curEntry += curPrime;
}
/*mark everything divided by each prime power*/
BigInt cap = (BigInt)( log((double)(nToTheThird+curBlockBase))
/ log((double)curPrime) + 1 );
BigInt curbase = curPrime;
for (BigInt k = 2; k < cap; k++){
curPrime *= curbase;
curEntry = (curBlockBase % curPrime == 0) ? 0 : curPrime
- (curBlockBase % curPrime);
while (curEntry < nToTheThird){
factorsMultiplied[curEntry] *= curbase;
totalFactors[curEntry]++;
if (curEntry + curBlockBase != 0)
factors[curEntry*logn+ numPrimeBases[curEntry]] = k;
curEntry += curPrime;
}
}
}
/*account for prime factors > n^1/3*/
for (BigInt j = 0; j < nToTheThird; j++){
if (factorsMultiplied[j] < j+curBlockBase){
numPrimeBases[j]++;
totalFactors[j]++;
factors[j*logn+ numPrimeBases[j]] = 1;
}
}
}
/* By this point, we have already factored, through sieving, all the numbers in the
current n^1/3 sized block we are looking at. With a complete factorization, we
can calculate d_k'(n) for a number. Then, D_k'(n) = d_k'(n) + D_k'(n-1).*/
static void buildDivisorSums(){
for (BigInt j = 1; j < nToTheThird+1; j++){
if (j + curBlockBase == 1 || j + curBlockBase == 2) continue;
for (BigInt k = 0; k < logn; k++){
DPrime[j * logn + k] = DPrime[(j - 1) * logn + k] +
d2(factors, j - 1, k, numPrimeBases[j - 1] + 1, totalFactors[j - 1]);
}
}
for (BigInt j = 0; j < logn; j++) DPrime[j] = DPrime[nToTheThird*logn+ j];
}
/* This general algorithm relies on values of D_k' <= n^2/3 and d_k' <= n^1/3.
This function calculates those values of d_k'.*/
static void find_dVals(){
curBlockBase = 1;
clearPools();
factorRange();
buildDivisorSums();
for (BigInt j = 2; j <= nToTheThird; j++){
for (BigInt m = 1; m < numDPowers; m++){
double s = 0;
for (BigInt r = 1; r < numDPowers; r++)
s += pow(-1.0, (double)( r + m )) * (1.0 / (r + m + 1)) *
(DPrime[j * logn + r] - DPrime[(j - 1) * logn + r]);
dPrime[j*(numDPowers + 1)+ m] = s;
}
}
}
static void resetDPrimeVals(){
curBlockBase = 0;
for (BigInt k = 0; k < nToTheThird + 1; k++)
for (BigInt j = 0; j < logn; j++)
DPrime[k * logn + j] = 0;
}
/* This function is calculating the first two sums of
http://www.icecreambreakfast.com/primecount/primecounting.html#4_4
It is written to rely on values of D_k' from smallest to greatest,
to use the segmented sieve.*/
static void calcS1(){
if (S1Mode == 0){
while (S1Val <= ceilval){
BigInt cnt = (n / S1Val - n / (S1Val + 1));
for (BigInt m = 1; m < numDPowers; m++)
t += cnt * (m % 2 == 1 ? -1 : 1) * (1.0 / (m + 1)) *
DPrime[(S1Val - curBlockBase + 1) * logn + m];
S1Val++;
if (S1Val >= n / nToTheHalf){
S1Mode = 1;
S1Val = nToTheHalf;
break;
}
}
}
if (S1Mode == 1){
while (n / S1Val <= ceilval){
for (BigInt m = 1; m < numDPowers; m++)
t += (m % 2 == 1 ? -1 : 1) * (1.0 / (m + 1)) *
DPrime[(n / S1Val - curBlockBase + 1) * logn + m];
S1Val--;
if (S1Val < nToTheThird + 1){
S1Mode = 2;
break;
}
}
}
}
/* This loop is calculating the 3rd term that runs from 2 to n^1/3 in
http://www.icecreambreakfast.com/primecount/primecounting.html#4_4 */
static void calcS2(){
for (BigInt j = 2; j <= nToTheThird; j++)
for (BigInt k = 1; k < numDPowers; k++)
t += (n / j - 1) * pow(-1.0, (double)k) * (1.0 / (k + 1)) *
(DPrime[j * logn + k] - DPrime[(j - 1) * logn + k]);
}
/* This loop is calculating the two double sums in
http://www.icecreambreakfast.com/primecount/primecounting.html#4_4
It is written to rely on values of D_k' from smallest to greatest,
to use the segmented sieve.*/
static void calcS3(){
for (BigInt j = 2; j <= nToTheThird; j++){
if (S3Modes[j] == 0){
BigInt endsq = (BigInt)(pow(n / j, .5));
BigInt endVal = (n / j) / endsq;
while (S3Vals[j] <= ceilval){
BigInt cnt = (n / (j * S3Vals[j]) - n / (j * (S3Vals[j] + 1)));
for (BigInt m = 1; m < numDPowers; m++)
t += cnt * DPrime[(S3Vals[j] - curBlockBase + 1) * logn + m] *
dPrime[j*(numDPowers + 1)+ m];
S3Vals[j]++;
if (S3Vals[j] >= endVal){
S3Modes[j] = 1;
S3Vals[j] = endsq;
break;
}
}
}
if (S3Modes[j] == 1){
while (n / (j * S3Vals[j]) <= ceilval){
for (BigInt m = 1; m < numDPowers; m++)
t += DPrime[(n / (j * S3Vals[j]) - curBlockBase + 1) * logn + m] *
dPrime[j * (numDPowers + 1) + m];
S3Vals[j]--;
if (S3Vals[j] < nToTheThird / j + 1){
S3Modes[j] = 2;
break;
}
}
}
}
}
/* This is the most important function here. How it works:
first we allocate our n^1/3 ln n sized pools and other variables.
Then we go ahead and sieve to get our primes up to n^1/3
We also calculate, through one pass of sieving, values of d_k'(n) up to n^1/3
Then we go ahead and calculate the loop S2 (check the description of the algorithm),
which only requires values of d_k'(n) up to n^1/3, which we already have.
Now we're ready for the main loop.
We do the following roughly n^1/3 times.
First we clear our sieving variables.
Then we factor, entirely, all of the numbers in the current block sized n^1/3 that
we're looking at.
Using our factorization information, we calculate the values for d_k'(n) for the
entire range we're looking, and then sum those together to have a rolling set
of D_k'(n) values
Now we have values for D_k'(n) for this block sized n^1/3
First we see if any of the values of S1 that we need to compute are in this block. We
can do this by (see the paper) walking through the two S1 loops backwards, which
will use the D_k'(n) values in order from smallest to greatest
We then do the same thing will all of the S3 values
Once we have completed this loop, we will have calculated the prime power function for n.
This loop is essentially calculating
http://www.icecreambreakfast.com/primecount/primecounting.html#4_4
*/
static double calcPrimePowerCount(BigInt nVal){
n = nVal;
allocPools(n);
fillPrimes();
find_dVals();
calcS2();
resetDPrimeVals();
for (curBlockBase = 0; curBlockBase <= maxSieveValue; curBlockBase += nToTheThird ){
clearPools();
factorRange();
buildDivisorSums();
ceilval = curBlockBase + nToTheThird - 1;
if (ceilval > maxSieveValue) {
ceilval = maxSieveValue;
ended = true;
}
calcS1();
calcS3();
if (ended) break;
}
deallocPools();
return t;
}
/*Make sure to read the header comments labeled "ALGORITHM 5", above*/
static BigInt countprimes5(BigInt num) {
double total = 0.0;
for (BigInt i = 1; i < log((double)num) / log(2.0); i++) {
double val = calcPrimePowerCount( invpow(num, i)) / (double)i * mu[i];
total += val;
}
return total+.1;
}