//	Roast+ License

//	SIMD

#ifndef __SFJP_OPENMGL_roast_simd_lambda_HPP__
#define __SFJP_OPENMGL_roast_simd_lambda_HPP__

#include "roast/simd_lambda/simd_core.hpp"
#include "roast/std/parallelable.hpp"
#include <string>
#include <memory.h>

namespace roast{

	class simd_lambda_exeption
	{
	protected:
		std::string m_msg;
	public:
		simd_lambda_exeption(const char* msg){ m_msg = msg; }
		const char* get(){ return m_msg.c_str(); }
	};

	//////////////////////////////////////////////////

	template <int _SIMD_Ty=ROAST_PARALLELABLE_TYPE_SIMD_SSE4>	/*	_SIMD_Ty = ROAST_PARALLELABLE_TYPE_` (by roast/std/parallelable.hpp)	*/
	class simd_lambda
	{
	protected:
		enum { _XMM_MAX = 16 };	/*	XMM̍ő吔B8ɂ悤ǂ悩ȁ[	*/

		/*	XMM̏ԃtOz	*/
		int m_xmm_state[_XMM_MAX];
		enum {
			_XMM_STATE_EMPTY = 0,		//	
			_XMM_STATE_INT = 1,			//	lpXMMƂĊ蓖
			_XMM_STATE_FLOAT = 2,		//	_pXMMƂĊ蓖
			_XMM_STATE_DOUBLE = 3		//	doublepXMMƂĊ蓖
		};

		/*
		int select_empty_xmm(){
			//if ( m_xmm_state[0] == 
			for(int i=0; i<_XMM_MAX; i++)
			{
				if ( m_xmm_state[i] == _XMM_STATE_EMPTY )
					return i+1;
			}
			throw simd_lambda_exeption("select_empty_xmm: Not enough XMM.");
		};
		*/

		int select_empty_xmm(int length)
		{
			//if ( m_xmm_state[0] == 
			int xmm_index = -1;
			int count = 0;
			for(int i=0; i<_XMM_MAX; i++)
			{
				if ( m_xmm_state[i] == _XMM_STATE_EMPTY ){
					count++;
					if ( count == length )
						return xmm_index;
				}
				else {
					count = 0;
					xmm_index = -1;
				}
			}

			//if ( xmm_index == -1 || length != 0 )
			throw simd_lambda_exeption("select_empty_xmm: Not enough XMM.");
		};


		simd_lambda* m_p_simd_lamda_left;	/*	̂ނ	*/
		int m_low_index;

		/////////////////////////////////////////////////////////

	public:
		enum {
			XMM_START_TAIL = 0xfffffff0
		};

		/////////////////////////////////////////////////////////

	public:
		//	Constructor/Destructor
		simd_lambda(){
			::memset(m_xmm_state,0x00,sizeof(m_xmm_state));
			m_p_simd_lamda_left = NULL;
			m_low_index = 0;
		}
		simd_lambda(float *f_array_4, unsigned int float_count, int xmm_start=XMM_START_TAIL)
		{
			simd_lambda();
			load_floats( f_array_4, float_count, xmm_start );
		}
		virtual ~simd_lambda(){}

		/////////////////////////////////////////////////////////

		//	float^[h܂B
		//	- float_count4̔{łKv܂
		//	- xmm_startXMM_START_TAILw肷邩ȗƁAŋ󂢂ĂxmmTA蓖Ă܂B
		//	  ̏ꍇxቺ邽߁AŔԍw肷̂D܂łBȂ1`8̊ԂŎw肵܂B
		//	- XMMfloat_countfloati[邽߂̘A󂫗̈悪ȂƁAsimd_lambda_exeption O𓊂܂B
		inline simd_lambda& load_floats(float *f_array_4, unsigned int float_count, int xmm_start=XMM_START_TAIL)
		{
			//	SSE1ȍ~Ŏgp\
			if ( _SIMD_Ty >= ROAST_PARALLELABLE_TYPE_SIMD_SSE &&
				 _SIMD_Ty <= ROAST_PARALLELABLE_TYPE_SIMD_SSE_END )
			{
				/*	xmm_start  XMM_START_TAIL ȂXMMT	*/
				if ( xmm_start == XMM_START_TAIL )
					//xmm_start = select_empty_xmm();
					xmm_start = select_empty_xmm(float_count/4);

				if ( xmm_start+(float_count/4)-1 > _XMM_MAX )
					throw simd_lambda_exeption("float_loads: Not enough XMM.");

				///////////////////////////////////////////////////////

				if ( float_count >= 4 )
					simd::sse::movups(xmm_start+0, f_array_4);
				if ( float_count >= 8 )
					simd::sse::movups(xmm_start+1, f_array_4+4);
				if ( float_count >= 12 )
					simd::sse::movups(xmm_start+2, f_array_4+8);
				if ( float_count >= 16 )
					simd::sse::movups(xmm_start+3, f_array_4+12);
				/*
				for(unsigned int i=0; i<float_count/4; i++)
					sse::movaps(i+1, f_array_4);
				*/
			}

			return *this;
		}

		/////////////////////////////////////////////////////////////////////////////

		class simd_lambda_ary
		{

		};

		/*simd_lambda& operator ,(const simd_lambda& left/*, const simd_lambda& right*)
		{
			m_low_index = left.m_low_index + 1;
			return *this;
		}*/
		simd_lambda& operator ,(const simd_lambda& left/*, const simd_lambda& right*/)
		{
			m_low_index = left.m_low_index + 1;
			return *this;
		}
	};

	typedef simd_lambda<ROAST_PARALLELABLE_TYPE_SIMD_SSE> simd_lambda_sse;
	typedef simd_lambda<ROAST_PARALLELABLE_TYPE_SIMD_SSE2> simd_lambda_sse2;
	typedef simd_lambda<ROAST_PARALLELABLE_TYPE_SIMD_SSE3> simd_lambda_sse3;
	typedef simd_lambda<ROAST_PARALLELABLE_TYPE_SIMD_SSE4> simd_lambda_sse4;
}

#endif//__SFJP_OPENMGL_roast_simd_lambda_HPP__
