// Encoder.h
// (c) 2004 exeal

#ifndef _ENCODER_H_
#define _ENCODER_H_

#include "..\AscensionCommon.h"
#include <cassert>
#include <set>


namespace Manah {
namespace Text {

///	Windows R[hy[W
typedef unsigned int CodePage;

//	Windows R[hy[WɖA͎Ŏ镶R[h
///	UTF-16
const CodePage	CPEX_UNICODE_UTF16LE		= 1200;
///	UTF-16 big endian
const CodePage	CPEX_UNICODE_UTF16BE		= 1201;
///	UTF-32
const CodePage	CPEX_UNICODE_UTF32LE		= 12000;
///	UTF-32 big endian
const CodePage	CPEX_UNICODE_UTF32BE		= 12001;
///	
const CodePage	CPEX_AUTODETECT				= 50001;
///	{ ()
const CodePage	CPEX_JAPANESE_AUTODETECT	= 50932;
///	؍ ()
const CodePage	CPEX_KOREAN_AUTODETECT		= 50949;
///	VXěꂩ玩
const CodePage	CPEX_AUTODETECT_SYSTEMLANG	= 70000;
///	[Ǔꂩ玩
const CodePage	CPEX_AUTODETECT_USERLANG	= 70001;
///	Unicode ()
const CodePage	CPEX_UNICODE_AUTODETECT		= 70010;
///	UTF-5
const CodePage	CPEX_UNICODE_UTF5			= 70011;
///	AjA ()
const CodePage	CPEX_ARMENIAN_AUTODETECT	= 70020;
///	AjA (ARMSCII-7)
const CodePage	CPEX_ARMENIAN_ARMSCII7		= 70021;
///	AjA (ARMSCII-8)
const CodePage	CPEX_ARMENIAN_ARMSCII8		= 70022;
///	AjA (ARMSCII-8A)
const CodePage	CPEX_ARMENIAN_ARMSCII8A		= 70023;
///	xgi ()
const CodePage	CPEX_VIETNAMESE_AUTODETECT	= 70030;
///	xgi (TCVN)
const CodePage	CPEX_VIETNAMESE_TCVN		= 70031;
///	xgi (VISCII)
const CodePage	CPEX_VIETNAMESE_VISCII		= 70032;
///	xgi (VPN)
const CodePage	CPEX_VIETNAMESE_VPN			= 70033;
///	{ (ISO-2022-JP)
const CodePage	CPEX_JAPANESE_ISO2022JP		= 70040;
///	{ (Vtg JIS)
const CodePage	CPEX_JAPANESE_SHIFTJIS		= 70041;
///	{ (ISO-2022-JP-1)
const CodePage	CPEX_JAPANESE_ISO2022JP1	= 70042;
///	{ (ISO-2022-JP-2)
const CodePage	CPEX_JAPANESE_ISO2022JP2	= 70043;
///	{ (EUC)
const CodePage	CPEX_JAPANESE_EUC			= 70044;
///	{ (ISO-2022-JP-2004)
const CodePage	CPEX_JAPANESE_ISO2022JP2004	= 70045;
///	{ (Shift_JIS-2004)
const CodePage	CPEX_JAPANESE_SHIFTJIS2004	= 70046;
///	{ (EUC-JIS-2004)
const CodePage	CPEX_JAPANESE_EUCJIS2004	= 70047;

///	oCgI[_}[N
struct ByteOrderMark {
	///	oCg
	std::size_t	cBytes;
	///	V[PX
	const char*	pszBOM;
};


// CEncoder class definition
/////////////////////////////////////////////////////////////////////////////

// ϊłȂꍇ̊̕ ([Ǔꂩ擾قȂ)
#define __DEFAULT_CHAR	'?'

#define CFU_ARGLIST											\
	char* pszDest, std::size_t cchDest,						\
	const wchar_t* pwszSrc, std::size_t cchSrc /* = -1 */,	\
	bool(*pCallBack)(void) /* = 0 */

#define CTU_ARGLIST											\
	wchar_t* pwszDest, std::size_t cchDest,					\
	const char* pszSrc, std::size_t cchSrc /* = -1 */,		\
	bool(*pCallBack)(void) /* = 0 */

#define CFU_CHECKARGS()						\
	assert(pszDest != 0 && pwszSrc != 0);	\
	if(cchSrc == -1)						\
		cchSrc = wcslen(pwszSrc);

#define CTU_CHECKARGS()						\
	assert(pwszDest != 0 && pszSrc != 0);	\
	if(cchSrc == -1)						\
		cchSrc = strlen(pszSrc);

static std::set<CodePage>*	_g_pWorking;

///	GR[_
class CEncoder {
	// RXgN^
protected:
	CEncoder() {}
public:
	virtual ~CEncoder() {}
private:
	CEncoder(const CEncoder& rhs);
	operator =(const CEncoder& rhs);

	// \bh
public:
	/**
	 *	UTF-16 ϊ
	 *	@param pszDest		[out] ϊ
	 *	@param cchDest		ϊ̒
	 *	@param pwszSrc		ϊ
	 *	@param cchSrc		ϊ̕
	 *	@param pCallBack	ϊłȂƂɌĂяoR[obN֐B
	 *						Ăяô͏̂݁Bnull Ɗ̕ɒuB
	 *						R[obN֐ true ԂƕϊłȂ̕ɁA
	 *						false ԂƂ̃\bh͂ɏԂ (0Ԃ)
	 *	@return				ϊ̕
	 */
	virtual std::size_t ConvertFromUnicode(
							char* pszDest, std::size_t cchDest,
							const wchar_t* pwszSrc, std::size_t cchSrc = -1,
							bool(*pCallBack)(void) = 0) = 0;
	/**
	 *	UTF-16 ɕϊ
	 *	@param pwszDest		[out] ϊ
	 *	@param cchDest		ϊ̕
	 *	@param pszSrc		ϊ
	 *	@param cchSrc		ϊ̕
	 *	@param pCallBack	ϊłȂƂɌĂяoR[obN֐B
	 *						CEncoder::ConvertFromUnicode \bhƓ
	 *	@return				ϊ̕
	 */
	virtual std::size_t ConvertToUnicode(
							wchar_t* pwszDest, std::size_t cchDest,
							const char* pszSrc, std::size_t cchSrc = -1,
							bool(*pCallBack)(void) = 0) = 0;
	/**
	 *	GR[_쐬BR[hy[WȂƂ null Ԃ
	 *	@param cp	R[hy[W
	 */
	static CEncoder* Create(CodePage cp);
	/**
	 *	
	 *	@param psz		
	 *	@param cch		ׂoCg
	 *	@param language	ʂɎgp錾
	 */
	static CodePage DetectCodePage(const char* psz, std::size_t cch, CodePage cp);
	///	p\ȃR[hy[W
	static void EnumCodePages(std::set<CodePage>& setCodePages);
	///	oCgI[_}[NԂ
	virtual const ByteOrderMark* GetByteOrderMark() const = 0;
	///	1̍őoCgԂ
	virtual uchar GetMaxCharacterLength() const = 0;
	///	ʂ̂߂̃R[hy[W
	static bool IsCodePageForAutoDetection(CodePage cp);
	///	LȃR[hy[W
	static bool IsValidCodePage(CodePage cp);
private:
	static BOOL CALLBACK _EnumCodePages(LPTSTR cp);
protected:
	CodePage	m_nCodePage;
};


// ƎGR[_
template<CodePage cp>
class CExternalEncoder : public CEncoder {
private:
	CExternalEncoder();
public:
	std::size_t ConvertFromUnicode(CFU_ARGLIST);
	std::size_t ConvertToUnicode(CTU_ARGLIST);
	const ByteOrderMark* GetByteOrderMark() const;
	uchar GetMaxCharacterLength() const;

	friend class CEncoder;
};


// Windows ϊe[û܂܎gpGR[_
class CWindowsEncoder : public CEncoder {
private:
	CWindowsEncoder(CodePage cp);
public:
	std::size_t ConvertFromUnicode(CFU_ARGLIST);
	std::size_t ConvertToUnicode(CTU_ARGLIST);
	const ByteOrderMark* GetByteOrderMark() const;
	uchar GetMaxCharacterLength() const;

	friend class CEncoder;
};


// ꖈ̎
template<CodePage cp>
void DetectCodePageImpl(const char* psz, std::size_t cch, CodePage& cpResult, std::size_t& cchConvertable);

} // namespace Text
} // namespace Manah

#include "Encodings/Unicode.h"
//#include "Encodings/Armenian.h"
//#include "Encodings/Iscii.h"
#include "Encodings/Japanese.h"


namespace Manah {
namespace Text {

// CEncoder class partial implementation
/////////////////////////////////////////////////////////////////////////////

inline CEncoder* CEncoder::Create(CodePage cp) {
	try {
		switch(cp) {
		case CPEX_UNICODE_UTF5:			return new CExternalEncoder<CPEX_UNICODE_UTF5>;
		case CP_UTF8:					return new CExternalEncoder<CP_UTF8>;
		case CPEX_UNICODE_UTF16LE:		return new CExternalEncoder<CPEX_UNICODE_UTF16LE>;
		case CPEX_UNICODE_UTF16BE:		return new CExternalEncoder<CPEX_UNICODE_UTF16BE>;
		case CPEX_UNICODE_UTF32LE:		return new CExternalEncoder<CPEX_UNICODE_UTF32LE>;
		case CPEX_UNICODE_UTF32BE:		return new CExternalEncoder<CPEX_UNICODE_UTF32BE>;
//		case CPEX_ARMENIAN_ARMSCII7:	return new CExternalEncoder<CPEX_ARMENIAN_ARMSCII7>;
//		case CPEX_ARMENIAN_ARMSCII8:	return new CExternalEncoder<CPEX_ARMENIAN_ARMSCII8>;
//		case CPEX_ARMENIAN_ARMSCII8A:	return new CExternalEncoder<CPEX_ARMENIAN_ARMSCII8A>;
		case 51932:						return new CExternalEncoder<51932>;
		case CPEX_JAPANESE_ISO2022JP:	return new CExternalEncoder<CPEX_JAPANESE_ISO2022JP>;
		case CPEX_JAPANESE_SHIFTJIS:	return new CExternalEncoder<CPEX_JAPANESE_SHIFTJIS>;
		case CPEX_JAPANESE_ISO2022JP1:	return new CExternalEncoder<CPEX_JAPANESE_ISO2022JP1>;
		case CPEX_JAPANESE_EUC:			return new CExternalEncoder<CPEX_JAPANESE_EUC>;
		default:						return new CWindowsEncoder(cp);
		}
	} catch(...) {
		return 0;
	}
}

inline CodePage CEncoder::DetectCodePage(const char* psz, std::size_t cch, CodePage cp) {
	assert(psz != 0);

	if(!IsCodePageForAutoDetection(cp))
		return cp;

	CodePage	cpDetected;
	std::size_t	nScore;

	if(cp == CPEX_AUTODETECT_SYSTEMLANG || cp == CPEX_AUTODETECT_USERLANG) {
		const LANGID	langId = (cp == CPEX_AUTODETECT_SYSTEMLANG) ? ::GetSystemDefaultLangID() : ::GetUserDefaultLangID();
		switch(PRIMARYLANGID(langId)) {
		case LANG_JAPANESE:	cp = CPEX_JAPANESE_AUTODETECT;	break;
//		case LANG_KOREAN:	cp = CPEX_KOREAN_AUTODETECT;	break;
		default:			cp = 20127;
		}
	}

	switch(cp) {
//	case CPEX_ARMENIAN_AUTODETECT:	DetectCodePageImpl<CPEX_ARMENIAN_AUTODETECT>(psz, cch, cpDetected, nScore);	break;
//	case CPEX_CHINESE_AUTODETECT:	DetectCodePageImpl<CPEX_CHINESE_AUTODETECT>(psz, cch, cpDetected, nScore);	break;
//	case CPEX_EASTASIA_AUTODETECT:	DetectCodePageImpl<CPEX_EASTASIA_AUTODETECT>(psz, cch, cpDetected, nScore);	break;
	case CPEX_JAPANESE_AUTODETECT:	DetectCodePageImpl<CPEX_JAPANESE_AUTODETECT>(psz, cch, cpDetected, nScore);	break;
//	case CPEX_KOREAN_AUTODETECT:	DetectCodePageImpl<CPEX_KOREAN_AUTODETECT>(psz, cch, cpDetected, nScore);	break;
//	case CPEX_RUSSIAN_AUTODETECT:	DetectCodePageImpl<CPEX_RUSSIAN_AUTODETECT>(psz, cch, cpDetected, nScore);	break;
	case CPEX_UNICODE_AUTODETECT:	DetectCodePageImpl<CPEX_UNICODE_AUTODETECT>(psz, cch, cpDetected, nScore);	break;
	}
	return (nScore != 0) ? cpDetected : ::GetACP();
}

inline void CEncoder::EnumCodePages(std::set<CodePage>& setCodePages) {
	static CodePage	arrCPs[] = {
		CPEX_UNICODE_UTF16LE, CPEX_UNICODE_UTF16BE, CPEX_UNICODE_UTF32LE, CPEX_UNICODE_UTF32BE,
//		CPEX_AUTODETECT,
		CPEX_JAPANESE_AUTODETECT, 51932,
//		CPEX_KOREAN_AUTODETECT,
		CPEX_AUTODETECT_SYSTEMLANG, CPEX_AUTODETECT_USERLANG, CPEX_UNICODE_AUTODETECT, CPEX_UNICODE_UTF5,
//		CPEX_ARMENIAN_AUTODETECT, CPEX_ARMENIAN_ARMSCII7, CPEX_ARMENIAN_ARMSCII8, CPEX_ARMENIAN_ARMSCII8A,
//		CPEX_VIETNAMESE_AUTODETECT, CPEX_VIETNAMESE_TCVN, CPEX_VIETNAMESE_VISCII, CPEX_VIETNAMESE_VPN,
		CPEX_JAPANESE_ISO2022JP, CPEX_JAPANESE_SHIFTJIS,
		CPEX_JAPANESE_ISO2022JP1, /*CPEX_JAPANESE_ISO2022JP2,*/ CPEX_JAPANESE_EUC,
//		CPEX_JAPANESE_ISO2022JP2004, CPEX_JAPANESE_SHIFTJIS2004, CPEX_JAPANESE_EUCJIS2004
	};

	setCodePages.clear();
	for(std::size_t i = 0; i < sizeof(arrCPs) / sizeof(CodePage); ++i)
		setCodePages.insert(arrCPs[i]);

	_g_pWorking = &setCodePages;
	::EnumSystemCodePages(_EnumCodePages, CP_INSTALLED);
}

inline BOOL CEncoder::_EnumCodePages(LPTSTR lpstrCp) {
	const CodePage	cp = wcstoul(lpstrCp, 0, 10);
	if(::IsValidCodePage(cp))
		_g_pWorking->insert(cp);
	return TRUE;
}

inline bool CEncoder::IsCodePageForAutoDetection(CodePage cp) {
	return cp == CPEX_AUTODETECT
		|| cp == CPEX_JAPANESE_AUTODETECT
		|| cp == CPEX_KOREAN_AUTODETECT
		|| cp == CPEX_AUTODETECT_SYSTEMLANG
		|| cp == CPEX_AUTODETECT_USERLANG
		|| cp == CPEX_UNICODE_AUTODETECT
		|| cp == CPEX_ARMENIAN_AUTODETECT
		|| cp == CPEX_VIETNAMESE_AUTODETECT
		;
}

inline bool CEncoder::IsValidCodePage(CodePage cp) {
	return ::IsValidCodePage(cp)
		|| IsCodePageForAutoDetection(cp)
		|| cp == CPEX_UNICODE_UTF16LE
		|| cp == CPEX_UNICODE_UTF16BE
		|| cp == CPEX_UNICODE_UTF32LE
		|| cp == CPEX_UNICODE_UTF32BE
//		|| cp == CPEX_ARMENIAN_ARMSCII7
//		|| cp == CPEX_ARMENIAN_ARMSCII8
//		|| cp == CPEX_ARMENIAN_ARMSCII8A
		|| cp == 51932
		|| cp == CPEX_UNICODE_UTF5
//		|| cp == CPEX_VIETNAMESE_TCVN
//		|| cp == CPEX_VIETNAMESE_VISCII
//		|| cp == CPEX_VIETNAMESE_VPN
		|| cp == CPEX_JAPANESE_ISO2022JP
		|| cp == CPEX_JAPANESE_SHIFTJIS
		|| cp == CPEX_JAPANESE_ISO2022JP1
		|| cp == CPEX_JAPANESE_EUC
		;
}

template<CodePage cp>
inline CExternalEncoder<cp>::CExternalEncoder() {
	m_nCodePage = cp;
}

template<CodePage cp>
inline const ByteOrderMark* CExternalEncoder<cp>::GetByteOrderMark() const {
	static const ByteOrderMark	bom = {0, ""};
	return &bom;
}

inline CWindowsEncoder::CWindowsEncoder(CodePage cp) {
	if(!::IsValidCodePage(cp))
		throw std::invalid_argument("Specified code page is not supported.");
	m_nCodePage = cp;
}

inline std::size_t CWindowsEncoder::ConvertFromUnicode(CFU_ARGLIST) {
	const int	result = ::WideCharToMultiByte(m_nCodePage, 0, pwszSrc, cchSrc, pszDest, cchDest, 0, 0);
	if(result == 0) {
		if(pCallBack == 0 || (*pCallBack)())
			return ::WideCharToMultiByte(m_nCodePage, WC_DEFAULTCHAR, pwszSrc, cchSrc, pszDest, cchDest, 0, 0);
		else
			return 0;
	}
	return result;
}

inline std::size_t CWindowsEncoder::ConvertToUnicode(CTU_ARGLIST) {
	const int	result = ::MultiByteToWideChar(m_nCodePage, MB_ERR_INVALID_CHARS, pszSrc, cchSrc, pwszDest, cchDest);
	if(result == 0) {
		if(pCallBack == 0 || (*pCallBack)())
			return ::MultiByteToWideChar(m_nCodePage, 0, pszSrc, cchSrc, pwszDest, cchDest);
		else
			return 0;
	}
	return result;
}

inline const ByteOrderMark* CWindowsEncoder::GetByteOrderMark() const {
	static const ByteOrderMark	bom = {0, ""};
	return &bom;
}

inline uchar CWindowsEncoder::GetMaxCharacterLength() const {
	CPINFO	cpi;
	if(!::GetCPInfo(m_nCodePage, &cpi))
		return 0;
	return cpi.MaxCharSize;
}

} // namespace Text
} // namespace Manah

#undef __DEFAULT_CHAR

#endif /* _ENCODER_H_ */

/* [EOF] */