//======================================================================
//-----------------------------------------------------------------------
/**
 * @file		FndUTF8.cpp
 * @brief		UTF8 R[h t@C
 *
 * @author		t.sirayanagi
 * @version		1.0
 *
 * @par			copyright
 * Copyright (C) 2011 Takazumi Shirayanagi\n
 * The new BSD License is applied to this software.
 * see iris_LICENSE.txt
*/
//-----------------------------------------------------------------------
//======================================================================
#define INCG_IRIS_FndUTF8_CPP_

//======================================================================
// include
#include "FndUTF8.h"
#include "FndASCII.h"
#include "../../iris_debug.h"

namespace iris {
namespace fnd
{

//======================================================================
// class
/**********************************************************************//**
 *
 * UTF8 }`oCg̑1oCgǂ
 *
 * @note	0xC0`0xDF	: 2oCg
 *			0xE0`0xEF	: 3oCg
 *			0xF0`0xF7	: 4oCg
 *
 ----------------------------------------------------------------------
 * @param [in]	code	= ʂ镶
 * @retval	0 = U
 * @return	\oCg
*//***********************************************************************/
int CUTF8::IsLeadByte(BYTE code)
{
	if( code < 0xC0u ) return 0;
	if( code < 0xE0u ) return 2;
	if( code < 0xF0u ) return 3;
	if( code <= 0xF7u ) return 4;
	return 0;
}

/**********************************************************************//**
 *
 * UTF8 }`oCg2oCgڈȍ~ǂ
 *
 * @note	[0x80,0xBF]
 *
 ----------------------------------------------------------------------
 * @param [in]	code	= ʂ镶
 * @return	^Ul
*//***********************************************************************/
bool CUTF8::IsTrailByte(BYTE code)
{
	return (code & 0xc0) == 0x80;
}

/**********************************************************************//**
 *
 * UTF8 }`oCg2oCgڈȍ~ǂ
 *
 ----------------------------------------------------------------------
 * @param [in]	buf	= obt@
 * @param [in]	len	= oCg
 * @return	^Ul
*//***********************************************************************/
bool CUTF8::IsTrailByte(const BYTE* buf, size_t len)
{
	switch( len )
	{
	case 2:
		return IsTrailByte(buf[1]);
	case 3:
		{
			WORD code = (buf[0] & 0x0f) << 12;
			code |= (buf[1] & 0x3f) << 6;
			code |= (buf[2] & 0x3f);
			if( ((buf[1] & 0xc0) == 0x80)
				&& ((buf[2] & 0xc0) == 0x80)
				&& ( code >= 0x0100 )
				)
				return true;
		}
		break;
	case 4:
		{
			DWORD code = (buf[0] & 0x0f) << 18;
			code |= (buf[1] & 0x3f) << 12;
			code |= (buf[2] & 0x3f) << 6;
			code |= (buf[3] & 0x3f);
			if( ((buf[1] & 0xc0) == 0x80)
				&& ((buf[2] & 0xc0) == 0x80)
				&& ((buf[3] & 0xc0) == 0x80)
				&& ( code >= 0x10000 && code <= 0x1FFFFF )
				)
				return true;
		}
		break;
	}
	return false;
}

/**********************************************************************//**
 *
 * UTF8 ̕\Ȃǂ
 *
 ----------------------------------------------------------------------
 * @param [in]	code	= ʂ镶
 * @return	^Ul
*//***********************************************************************/
bool CUTF8::IsMatch(BYTE code)
{
	if( code > 0xF7u ) return false;
	return true;
}

/**********************************************************************//**
 *
 * UTF8 \ǂ
 *
 ----------------------------------------------------------------------
 * @param [in]	lpBuffer	= ʂobt@
 * @param [in]	size		= obt@TCY
 * @param [out]	weight		= d݁ĩGR[fBOƂ̈vx̔rɎg܂j
 * @return	^Ul
*//***********************************************************************/
bool CUTF8::IsMatch(const void* lpBuffer, size_t size, int& weight)
{
	const BYTE*	buf = static_cast<const BYTE*>(lpBuffer);
	weight = 0;

	bool ret = true;
	for( size_t i=0; i < size; ++i, ++buf )
	{
		BYTE ch1 = *buf;
		int len = IsLeadByte(ch1);
		if( len == 0 )
		{
			if( CASCII::IsMatch(ch1) )
			{
				++weight;
			}
			else
			{
				ret = false;
			}
		}
		else if( i+len < size )
		{
			if( IsTrailByte(buf, len) )
			{
				weight += len;
				if( len > 2 ) ++weight;
			}
			buf += len;
			i += len;
		}
	}
	return ret;
}

/**********************************************************************//**
 *
 * UTF8 \ǂ
 *
 ----------------------------------------------------------------------
 * @param [in]	lpBuffer	= ʂobt@
 * @param [in]	size		= obt@TCY
 * @param [out]	weight		= d݁ĩGR[fBOƂ̈vx̔rɎg܂j
 * @param [out]	negative	= sv̏d݁ĩGR[fBOƂ̈vx̔rɎg܂j
 * @return	^Ul
*//***********************************************************************/
bool CUTF8::IsMatch(const void* lpBuffer, size_t size, int& weight, int& negative)
{
	const BYTE*	buf = static_cast<const BYTE*>(lpBuffer);
	weight = 0;
	negative = 0;

	for( size_t i=0; i < size; ++buf, ++i )
	{
		BYTE ch1 = *buf;
		int len = IsLeadByte(ch1);
		if( len == 0 )
		{
			if( CASCII::IsMatch(ch1) )
			{
				if( ch1 == 0 && i < size-1 )
					++negative;	// obt@̓r '\0' ͕sR
			}
			else
			{
				negative += 2;
			}
		}
		else if( i+len < size )
		{
			if( IsTrailByte(buf, len) )
			{
				weight += len;
				if( len > 2 ) ++weight;
			}
			else
			{
				negative += len*2;
			}
			buf += len;
			i += len;
		}
		else 
		{
			// obt@s
			int rest=size-i;
			int w=1;
			++i;
			for( ; i < size; ++i, ++w )
			{
				if( !IsTrailByte(*buf) )
				{
					negative += rest;
					w = 0;
					break;
				}
			}
			weight += w;
			negative += rest;
		}
	}
	if( negative > 0 && (negative >= weight) ) return false;
	return true;
}

}	// end of namespace fnd
}	// end of namespace iris
