//
// Copyright (c) 2003 by Stephen C. Dewhurst
// Permission to use, copy, modify, distribute and sell this software for any purpose is
// hereby granted without fee, provided that the above copyright notice appears in all copies
// and that both that copyright notice and this permission notice appear in supporting documentation.
// The author makes no representations about the suitability of this software for any purpose.
// It is provided "as is" without express or implied warranty.
//
// Unoptimized and optimized typeint multiplication implementations
//
// Described in CUJ online experts column, April 2003
// 
#ifndef TYPEINT_MUL_H
#define TYPEINT_MUL_H

#include "utils.h"
#include <iostream>

//
// Number of bits
//
template <typename T>
struct Len
	{ enum { r = Len<typename Deref<T>::R>::r + 1 }; };

template <>
struct Len<char>
	{ enum { r = 0 }; };

//
//	Addition
//
// Technique:  use a compile time switch
template <typename A, typename B>
struct Add;

template <int n, typename A, typename B>
struct AddImpl;

template <typename A, typename B>
struct AddImpl<3,A,B> { // neither A nor B is 0
	// Approach:  find the sum of more significant bits
	// then modify to take into account least significant bit pos
    typedef typename Deref<A>::R DA;
    typedef typename Deref<B>::R DB;
    typedef typename Add<DA,DB>::R Temp;
    typedef
    	typename Select<
    		IsConst<A>::r&&IsConst<B>::r, // if both bits are 1
    		typename Add<Temp,char *const>::R *, // add one to Temp, lower bit is 0
    		typename Select<
    			IsConst<A>::r||IsConst<B>::r, // else if one bit is 1
    			Temp *const, // lower bit is one
    			Temp * // else it's 0
    		>::R
    	>::R R;
};

template <typename A, typename B>
struct AddImpl<2,A,B> {
    typedef A R; // A+0=A
};

template <typename A, typename B>
struct AddImpl<1,A,B> {
    typedef B R; // 0+B=B
};

template <typename A, typename B>
struct AddImpl<0,A,B> {
    typedef char R; // 0+0=0
};

template <typename A, typename B>
struct Add {
	enum { switchval = IsPtr<A>::r*2 + IsPtr<B>::r };
	typedef typename AddImpl<switchval,A,B>::R R;
};

//
//	Left shift
//

template <typename T, int n>
struct LShift
	{ typedef typename LShift<T,n-1>::R *R; };

template <typename T>
struct LShift<T,0>
	{ typedef T R; };

// need to avoid shifting zeros
template <int n>
struct LShift<char,n>
	{ typedef char R; };

template <>
struct LShift<char,0>
	{ typedef char R; };

//
//	Right shift
//

template <typename T, int n>
struct RShift
	// Note: Deref has no effect if not a ref, so OK if n>#refs
	{ typedef typename Deref<typename RShift<T,n-1>::R>::R R; };

template <typename T>
struct RShift<T,0>
	{ typedef T R; };

//
//	Translating type-integers that represent small integers to integers
//
typedef unsigned long Int;
const Int IntLen = std::numeric_limits<Int>::digits;

template <typename T>
struct Value
	{ enum { r = 0 }; };

template <typename T>
struct Value<T *>
	{ enum { r = Value<T>::r*2 }; };

template <typename T>
struct Value<T * const>
	{ enum { r = Value<T>::r*2 + 1 }; };

//
//	Translating small integers to type-integers
//
template <Int n>
struct TypeInt
	{ typedef typename Select<n&1,typename TypeInt<n>>1>::R *const, typename TypeInt<n>>1>::R *>::R R; };

template <>
struct TypeInt<0>
	{ typedef char R; };

//
//	Printing big typeints
//
// Just a handy utility for debugging, and is not more generally useful.
// Prints a binary representation, but always prints a leading zero.
template <typename T>
struct PrintTypeint;

template <>
struct PrintTypeint<char> {
	static void print() {
		std::cout << '0';
	}
};

template <typename T>
struct PrintTypeint<T *> {
	static void print() {
		PrintTypeint<T>::print();
		std::cout << '0';
	}
};

template <typename T>
struct PrintTypeint<T *const> {
	static void print() {
		PrintTypeint<T>::print();
		std::cout << '1';
	}
};

//
//	Simple and Slow Multiplication
//
// As noted in the article, this multiplication algorithm is simple, but
// has quadratic complexity.
template <typename A, typename B>
struct Mul; // no definition of primary

template <typename A>
struct Mul<A,char> // A * 0 == 0
{ typedef char R; };

template <typename A, typename B>
struct Mul<A,B *> // 0 digit, just shift: A*B0 == (A*B)<<1
{ typedef typename Mul<A,B>::R *R; };

template <typename A, typename B>
struct Mul<A,B *const> { // 1 digit, shift and add: A*B1 == ((A*B)<<1)+A
	typedef typename Mul<A,B>::R *Tmp;
	typedef typename Add<Tmp,A>::R R;
};

template <typename A>
struct Mul<A,char *const> // for high-order bit, to avoid a char * typeint
{ typedef A R; };

//
// Subtract
//
// Note:  This is unsigned subtraction with the assumption
// that A >= B!!!
template <typename A, typename B>
struct Sub;

template <typename A, typename B>
struct Sub<A *const,B *const> {
	typedef typename Sub<A,B>::R *R; // A1-B1 == (A-B)0
};

template <typename A, typename B>
struct Sub<A *const,B *> {
	typedef typename Sub<A,B>::R *const R; // A1-B0 == (A-B)1
};

template <typename A, typename B>
struct Sub<A *,B *> {
	typedef typename Sub<A,B>::R *R; // A0-B0 == (A-B)0
};

template <typename A>
struct Sub<A,char> {
	typedef A R; // A-0 == A
};

template <>
struct Sub<char,char> {
	typedef char R; // 0-0 == 0
};

// Borrow scans back to find a 1 bit, zeros it,
// and sets all the preceding 0 bits to 1.
template <typename T>
struct Borrow;

template <typename T>
struct Borrow<T *const> {
	// make sure there's no leading 0
	typedef typename Select<IsPtr<T>::r,T *,T>::R R;
};

template <typename T>
struct Borrow<T *> {
	typedef typename Borrow<T>::R *const R;
};

template <typename A, typename B>
struct Sub<A *,B *const> {
	// Need to borrow.
	typedef typename Borrow<A>::R T1;
	typedef typename Sub<T1,B>::R *const R;
};

//
//	Scouts that are used to look ahead for runs
//	of like bits.
//
template <typename T>
struct Count0s
	{ enum { r = 0 }; };

template <typename T>
struct Count0s<T *>
	{ enum { r = Count0s<T>::r + 1 }; };

template <typename T>
struct Count1s
	{ enum { r = 0 }; };

template <typename T>
struct Count1s<T *const>
	{ enum { r = Count1s<T>::r + 1 }; };

//
//	An optimized multiplication algorithm.
//	The complexity is still quadratic, but the actual
//	compile time is typically much better than that of
//	the unoptimized algorithm.
//
template <typename A, typename B>
struct Mulopt;

template <int zeros, int ones, typename A, typename B>
struct M4;

template <int n, typename A, typename B>
struct M4<0,n,A,B> { // B has run of 1s
	typedef typename RShift<B,n>::R T1;
	typedef typename Mulopt<A,T1>::R T2;
	typedef typename LShift<T2,n>::R T3;
	typedef typename LShift<A,n>::R Ax2n;
	typedef typename Sub<Ax2n,A>::R Ax2n_1;
	typedef typename Add<T3,Ax2n_1>::R R;
};

template <int n, typename A, typename B>
struct M4<n,0,A,B> { // B has run of 0s
	typedef typename RShift<B,n>::R T1;
	typedef typename Mulopt<A,T1>::R T2;
	typedef typename LShift<T2,n>::R R;
};

template <typename A, typename B>
struct M4<0,0,A,B> { // nothing left of B
	// specialization of Mulopt assures us no zero args
	// so we can return 1 here
	typedef char * const R;
};

// M3 counts runs of 0s and 1s and goes to the appropriate place.
template <typename A, typename B>
struct M3 {
	enum {
		ones = Count1s<B>::r,
		zeros = Count0s<B>::r
	};
	typedef typename M4<zeros,ones,A,B>::R R;
};

// M2 switches the args, if necessary, so the longer arg is second arg.
template <bool, typename A, typename B>
struct M2 {
	typedef typename M3<A,B>::R R;
};

template <typename A, typename B>
struct M2<false,A,B> {
	typedef typename M3<B,A>::R R;
};

template <typename A, typename B>
struct Mulopt {
	// the second arg should be the larger of the two
	enum { alen = Len<A>::r, blen = Len<B>::r };
	typedef typename M2<(alen<=blen),A,B>::R R;
};

// three specializations for zero args
template <typename A>
struct Mulopt<A,char> {
	typedef char R;
};

template <typename B>
struct Mulopt<char,B> {
	typedef char R;
};

template <>
struct Mulopt<char,char> {
	typedef char R;
};

#endif
