//======================================================================
//-----------------------------------------------------------------------
/**
 * @file		FndEncoding.cpp
 * @brief		R[h t@C
 *
 * @author		t.sirayanagi
 * @version		1.0
 *
 * @par			copyright
 * Copyright (C) 2011 Takazumi Shirayanagi\n
 * The new BSD License is applied to this software.
 * see iris_LICENSE.txt
*/
//-----------------------------------------------------------------------
//======================================================================
#define INCG_IRIS_FndEncoding_CPP_

//======================================================================
// include
#include "FndEncoding.h"
#include "FndASCII.h"
#include "FndUTF8.h"
#include "FndUTF16.h"
#include "FndEUC.h"
#include "FndSJIS.h"
#include "../../iris_debug.h"

namespace iris {
namespace fnd
{

//======================================================================
// class
/**********************************************************************//**
 *
 * Encoding ǂ
 *
 ----------------------------------------------------------------------
 * @param [in]	lpBuffer	= ʂobt@
 * @param [in]	size		= obt@TCY
 * @return	^Ul
*//***********************************************************************/
CODEPAGE CEncoding::GetEncoding(const void* lpBuffer, size_t size)
{
	IRIS_ASSERT( lpBuffer != nullptr );
	IRIS_ASSERT( size > 0 );

	const BYTE	cEscape = 0x1B;	// ESC
	const BYTE	cAt		= 0x40;	// @
	const BYTE	cDoller	= 0x24;	// $
	const BYTE	cAnd	= 0x26;	// &
	const BYTE	cOpen	= 0x28;	// (
	const BYTE	cB		= 0x42;	// B
	const BYTE	cD		= 0x44;	// D
	const BYTE	cJ		= 0x4A;	// J
	const BYTE	cI		= 0x49;	// I

	bool binary = false;
	bool ascii = true;
	int utf16le=0, utf16be=0;

	{
		const u16* buf = static_cast<const u16*>(lpBuffer);
		if( *buf == CUTF16::BOM ) return ENC_CP_UTF16LE;
		if( *buf == CUTF16BE::BOM ) return ENC_CP_UTF16BE;
	}

	// binary / ASCII / Unicode / JIS
	{
		const BYTE*	buf = static_cast<const BYTE*>(lpBuffer);
		for( size_t i=0; i < size; ++i, ++buf )
		{
			BYTE b1 = *buf;
			if( b1 <= 0x6u || b1 == 0x7Fu || b1 == 0xFFu )
			{
				binary = true;
				if( b1 == 0x00 && i < size -1 )
				{
					if( (i&1) == 0 )
					{
						if( b1 == 0x00 && ((*(buf+1)) <= 0x7F) )	// Unicode ASCII
							utf16be += 2;
					}
					else
					{
						if( b1 == 0x00 && (i > 0 && (*(buf-1)) <= 0x7F) )		// Unicode ASCII
							utf16le += 2;
					}
					ascii = false;
				}
				continue;
			}

			if( b1 == cEscape )
			{
				if( i < size - 2 )
				{
					BYTE b2 = *(buf+1);
					BYTE b3 = *(buf+2);
					if( b2 == cDoller )
					{
						if( b3 == cAt		// JIS X 0208-1978
							|| b3 == cB		// JIS X 0208-1983
							)
						{
							return ENC_CP_JIS;
						}
						if( i < size - 3 )
						{
							BYTE b4 = *(buf+3);
							if( b3 == cOpen && b4 == cD )	// JIS X 0212-1990iJIS⏕j
							{
								return ENC_CP_JIS;
							}
						}
					}
					else if( b2 == cOpen )
					{
						if( b3 == cB	// ASCII
							|| b3 == cJ	// JIS X 0201-1976 Roman Set
							|| b3 == cI	// JIS X 0201-1976 Љ
							)
						{
							return ENC_CP_JIS;
						}
					}
					else if( b2 == cAnd && b3 == cAt )
					{
						if( i < size - 5 )
						{
							BYTE b4 = *(buf+3);
							BYTE b5 = *(buf+4);
							BYTE b6 = *(buf+5);
							if( b4 == cEscape && b5 == cDoller && b6 == cB )	// JIS X 0208-1990
							{
								return ENC_CP_JIS;
							}
						}
					}
				}
			}
			else if( b1 >= 0x80 )
			{
				ascii = false;
			}
		}
	}

	if( ascii ) return ENC_CP_ASCII;	// US ASCII
	int utf16=utf16le+utf16be;
	if( binary && utf16 == 0 ) return ENC_CP_UNKOWN;	// oCi͕sԂ

	// SJIS / UTF8 / EUC
	if( size & 1 )
	{
		swap(utf16le, utf16be);
	}
	int sjis=0, utf8=0, euc=0;
	int neg_sjis=0;
	int neg_utf8=0;
	int neg_euc=0;

	// SJIS
	if( !CSJIS::IsMatch(lpBuffer, size, sjis, neg_sjis) )
	{
		sjis = 0;
	}
	// UTF8
	if( !CUTF8::IsMatch(lpBuffer, size, utf8, neg_utf8) )
	{
		utf8 = 0;
	}
	// EUC
	if( !CEUC::IsMatch(lpBuffer, size, euc, neg_euc) )
	{
		euc = 0;
	}

	if( utf16 > 0
		&& utf16 >= sjis
		&& utf16 >= utf8
		&& utf16 >= euc
		)
	{
		if( utf16le >= utf16be )
			return ENC_CP_UTF16LE;
		else
			return ENC_CP_UTF16BE;
	}
	else
	{
		sjis -= neg_sjis;
		utf8 -= neg_utf8;
		euc -= neg_euc;
		if( utf8 > 0
			&& utf8 >= utf16
			&& utf8 >= sjis
			&& utf8 >= euc
			)
		{
			return ENC_CP_UTF8;
		}
		else if( sjis > 0
			&& sjis >= utf16
			&& sjis >= utf8
			&& sjis >= euc
			)
		{
			return ENC_CP_W31J;
		}
		else if( euc > 0
			&& euc >= utf16
			&& euc >= sjis
			&& euc >= utf8
			)
		{
			return ENC_CP_EUC;
		}
	}

	return ENC_CP_UNKOWN;
}

}	// end of namespace fnd
}	// end of namespace iris
