/*
 * Copyright 1996 Thierry Bousch
 * Licensed under the Gnu Public License, Version 2
 *
 * $Id: multi.c,v 1.2 1996/09/14 09:41:36 bousch Exp $
 *
 * Low-level multiprecision operations, used by Integer.c and Float.c.
 * Big numbers are represented as little-endian arrays of 32-bit words,
 * and the sized is passed as an additional argument.
 */

#include <stdlib.h>
#include <string.h>
#include "mp-arch.h"
#include "multi.h"
#define TEST

/*
 * Addition: d1 and d2 have m1 and m2 limbs, with m1>=m2, and d points to
 * an array with m1 entries, where the sum will be put. The carry (0 or 1)
 * is returned.
 */

__u32 mp_add (__u32 *d1, int m1, __u32 *d2, int m2, __u32 *d)
{
	__u32 th, tl, carry=0;

	/* First, add the common limbs */
	m1 = m1-m2;
	while (m2) {
		add_ssaaaa(th, tl, 0, *d1, 0, *d2);
		add_ssaaaa(carry, *d, th, tl, 0, carry);
		d1++; d2++; d++; m2--;
	}
	/* Then the limbs appearing only in d1 */
	while (m1) {
		add_ssaaaa(carry, *d, 0, *d1, 0, carry);
		d1++; d++; m1--;
	}
	return carry;
}
		
/*
 * Substraction: d1 has m1 limbs, d2 has m2 limbs, with m1>=m2
 * and d1>=d2. The result is put in d, an array to m1 limbs.
 * The carry (0 or 1) is returned.
 */

__u32 mp_substract (__u32 *d1, int m1, __u32 *d2, int m2, __u32 *d)
{
	__u32 th, tl, cc, carry=0;

	m1 = m1-m2;
	while (m2) {
		sub_ddmmss(th, tl, 1, *d1, 0, *d2);
		sub_ddmmss(cc, *d, th, tl, 0, carry);
		carry = 1 - cc;
		d1++; d2++; d++; m2--;
	}
	while (m1) {
		sub_ddmmss(cc, *d, 1, *d1, 0, carry);
		carry = 1 - cc;
		d1++; d++; m1--;
	}
	return carry;
}

/*
 * Comparison: d1 and d2 have the same number of limbs, m.
 */

int mp_compare (__u32 *d1, __u32 *d2, int m)
{
	d1 = &d1[m-1];
	d2 = &d2[m-1];
	while (m) {
		if (*d1 == *d2) {
			d1--; d2--; m--;
			continue;
		}
		return (*d1 > *d2) ? +1 : -1;
	}
	return 0;
}

/*
 * Multiplication: d1 has m1 limbs, d2 has m2 limbs, and d points to an
 * array of m1+m2 limbs, where the product will be put.
 * For efficiency, it is better to have m1 <= m2.
 */

void mp_mul1 (__u32 *d1, int m1, __u32 *d2, int m2, __u32 *d)
{
	int i, j;
	__u32 th, tl, f, carry, *p2, *p;

	memset(d, 0, (m1+m2) * sizeof(__u32));
	for (i = 0; i < m1; i++) {
		f = d1[i];
		p2 = &d2[0]; p = &d[i];
		carry = 0;
		for (j = m2; j; j--) {
			umul_ppmm(th, tl, f, *p2);
			add_ssaaaa(th, tl, th, tl, 0, *p);
			add_ssaaaa(carry, *p, th, tl, 0, carry);
			p2++; p++;
		}
		*p = carry;
	}
}

/* Multiplication a la Karatsuba */
#define KARATSUBA_THRESHOLD 24

void mp_karatsuba (__u32 *d1, __u32 *d2, int m, __u32 *d)
{
	int m0, m1;
	__u32 *s1, *s2, *s12;

	if (m < KARATSUBA_THRESHOLD) {
		mp_mul1(d1, m, d2, m, d);
		return;
	}
	m0 = (m+1)/2; m1 = m-m0;
	mp_karatsuba(d1, d2, m0, d);
	mp_karatsuba(d1+m0, d2+m0, m1, d+2*m0);
	s1 = alloca((m0+1) * sizeof(__u32));
	s2 = alloca((m0+1) * sizeof(__u32));
	s12 = alloca(2*(m0+1) * sizeof(__u32));
	s1[m0] = mp_add(d1, m0, d1+m0, m1, s1);
	s2[m0] = mp_add(d2, m0, d2+m0, m1, s2);
	mp_karatsuba(s1, s2, m0+1, s12);
	mp_substract(s12, 2*(m0+1), d, 2*m0, s12);
	mp_substract(s12, 2*(m0+1), d+2*m0, 2*m1, s12);
	mp_add(d+m0, m0+2*m1, s12, 2*(m0+1), d+m0);
}

#ifdef TEST
#include <assert.h>
#include <time.h>

int main (int argc, char **argv)
{
	int i, limbs=100, loops=100;
	__u32 *d1, *d2, *d, d1m, d2m, dm, tmp;

	if (argc > 1)
		limbs = atoi(argv[1]);
	if (argc > 2)
		loops = atoi(argv[2]);

	d1 = malloc(limbs * sizeof(__u32));
	d2 = malloc(limbs * sizeof(__u32));
	d = malloc(2 * limbs * sizeof(__u32));

	srand(time(0));
	while (loops--) {
		d1m = d2m = dm = 0;
		for (i = 0; i < limbs; i++) {
			tmp = rand();
			d1[i] = tmp;
			d1m += tmp % 65537;
			tmp = rand();
			d2[i] = tmp;
			d2m += tmp % 65537;
		}
		d1m %= 65537;
		d2m %= 65537;
		mp_karatsuba(d1, d2, limbs, d);
		for (i = 0; i < 2*limbs; i++)
			dm += d[i] % 65537;
		dm %= 65537;
		d1m = ((__u64)d1m * d2m) % 65537;
		assert(d1m == dm);
	}
	return 0;
}

#endif
