Coding Challenge I

Pages: 1 2 3 4 5 6 7 8 9

Solution from NB

“Using OS X, gcc, and a nifty algorithm for generating magic numbers, I’ve eliminated all divides from the main run loop. Execution sped up 21% on my 800Mhz G4. I might download cygwin and see if i can figure out the correct x86 instruction (there’s one asm op in the program, using gcc inline assembly) and get you a windows binary. I’ve attatched the source, so that you can see that there’s no special hand waving go on here :) You could compile it if you had access to a Mac OS X machine.

Followup with MSVC compatible source: I don’t have Visual Studio, so i’m not 100% sure this will work, but looking at other samples of Visual Studio inline assembly, I have hope that it will

#include <stdlib.h>
#include <stdio.h>

struct ms {
    int m;	/* magic number */ 
    int s;	/* shift amount */ 
};

/* must have 2 <= d <= 2^31-1 or -2^31 <= d <= -2 */ 
struct ms magic(int d)	
{ 
	int p; 
	unsigned int ad, anc, delta, q1, r1, q2, r2, t; 
	unsigned int two31 = 1 << 31;	/* 2^31 */ 
	struct ms mag; 
	
	ad = abs(d); 
	t = two31 + ((unsigned int)d >> 31); 
	anc = t – 1 – t%ad;	/* absolute value of nc */ 
	p = 31;	/* initialize p */ 
	q1 = two31/anc;	/* initialize q1 = 2p/abs(nc) */ 
	r1 = two31 – q1*anc;	/* initialize r1 = rem(2p,abs(nc)) */ 
	q2 = two31/ad;	/* initialize q2 = 2p/abs(d) */ 
	r2 = two31 – q2*ad;	/* initialize r2 = rem(2p,abs(d)) */ 
    do { 
	p = p + 1; 
	q1 = 2*q1;	/* update q1 = 2p/abs(nc) */ 
	r1 = 2*r1;	/* update r1 = rem(2p/abs(nc)) */ 
	if (r1 >= anc) {	/* must be unsigned comparison */ 
	q1 = q1 + 1;	
	r1 = r1 – anc;	
	} 
	q2 = 2*q2;	/* update q2 = 2p/abs(d) */ 
	r2 = 2*r2;	/* update r2 = rem(2p/abs(d)) */ 
	if (r2 >= ad) {	/* must be unsigned comparison */ 
	q2 = q2 + 1;	
	r2 = r2 – ad;	
	}	
	delta = ad – r2;	
    } while (q1 < delta || (q1 == delta && r1 == 0)); 
			
    mag.m = q2 + 1;	
    if (d < 0)
        mag.m = -mag.m;	/* resulting magic number */ 
    mag.s = p – 32;	/* resulting shift */ 
    return mag; 
} 
        
void ComputeMs(int alength, struct ms *b)
{
    int i;
    int j;
    
    for (i = 1, j = 3; i < alength; i++, j += 2)
    {
        b[i] = magic(j);
    }
}

void ComputeTorte(int numdigits, int alength, char* pi, int *a, struct ms *b)
{
    int piLength = 0;
    int nines = 0;
    int predigit = 0;
    int i, j;
    for(i = 0; i < alength; ++i)
        a[i] = 20;

    for (j = 0; j < numdigits; ++j)
    {
        unsigned int q = 0;
        unsigned int p = 2 * alength – 1;

        for (i = alength – 1; i >= 1; i–)
        {
            int t;
            int n = a[i] + q *(i + 1);
            int m = b[i].m;
            int s = b[i].s;
            
            __asm{
            mov eax, n
            imul m
            mov q, edx
            }

            t = (m >> 31) & n;
            q += t;
            q >>= s;

            a[i] = (n – (q * p)) * 10;
            p -= 2;
        }

        q = a[0] + q;
        a[0] = (q % 10) * 10;
        q /= 10;
            
        if (q == 9)
            ++nines;
        else if (q == 10)
        {
            int k;
            pi[piLength] = (char) (predigit + 1 + ‘0’);
            for (k = 1; k <= nines; ++k)
                pi[piLength+k] = ‘0’;
            piLength += nines + 1;
            predigit = 0;
            nines = 0;
        }
        else
        {
            int k;
            pi[piLength] = (char)(predigit + ‘0’);
            predigit = q;
            for (k = 1; k <= nines; ++k)
                pi[piLength + k] = ‘9’;
            piLength += nines + 1;
            nines = 0;
        }
    }
    pi[piLength] = (char)(predigit + ‘0’);
    pi[piLength+1] = ‘\0’;
}

int main(int argc, char** argv)
{
    int alength;
    int* a;
    struct ms *c;
    int numdigits;
    char* pi;

    if (argc <= 1)
    {
        fprintf(stderr, “usage: pi #DIGITS [FILE]”);
        return 1;
    }

    numdigits = atoi(argv[1]);
    pi = (char*) malloc(numdigits+1);
    
    alength = 10 * numdigits / 3;
    a = (int*) malloc(alength * sizeof(int));
    c = (struct ms*) malloc(alength * sizeof(struct ms));
    ComputeMs(alength, c);
    ComputeTorte(numdigits, alength, pi, a, c);
    free(a);
    free(c);

    if (argc > 2)
    {
        FILE* fp = fopen(argv[2], “w”);
        if (fp == NULL)
        {
           fprintf(stderr, “Cannot open %s\n”, argv[2]);
           return 2;
        }
        fputs(pi, fp);
        fputc(‘\n’, fp);
        fclose(fp);
    }
    else
        puts(pi);

    free(pi);

    return 0;
}

Pages: « Prev   1 2 3 4 5 6 7 8 9   Next »

Discuss (16 comments)