// Encoder.h
// (c) 2004 exeal

#ifndef _ENCODER_H_
#define _ENCODER_H_

#include "Object.h"
#include <cassert>
#include <set>


namespace Manah {
namespace Text {

///	Windows R[hy[W
typedef unsigned int CodePage;

//	Windows R[hy[WɖA͎Ŏ镶R[h
///	UTF-16
const CodePage	CPEX_UNICODE_UTF16LE		= 1200;
///	UTF-16 big endian
const CodePage	CPEX_UNICODE_UTF16BE		= 1201;
///	UTF-32
const CodePage	CPEX_UNICODE_UTF32LE		= 12000;
///	UTF-32 big endian
const CodePage	CPEX_UNICODE_UTF32BE		= 12001;
///	
const CodePage	CPEX_AUTODETECT				= 50001;
///	{ ()
const CodePage	CPEX_JAPANESE_AUTODETECT	= 50932;
///	؍ ()
const CodePage	CPEX_KOREAN_AUTODETECT		= 50949;
///	VXěꂩ玩
const CodePage	CPEX_AUTODETECT_SYSTEMLANG	= 70000;
///	[Ǔꂩ玩
const CodePage	CPEX_AUTODETECT_USERLANG	= 70001;
///	Unicode ()
const CodePage	CPEX_UNICODE_AUTODETECT		= 70010;
///	UTF-5
const CodePage	CPEX_UNICODE_UTF5			= 70011;
///	AjA ()
const CodePage	CPEX_ARMENIAN_AUTODETECT	= 70020;
///	AjA (ARMSCII-7)
const CodePage	CPEX_ARMENIAN_ARMSCII7		= 70021;
///	AjA (ARMSCII-8)
const CodePage	CPEX_ARMENIAN_ARMSCII8		= 70022;
///	AjA (ARMSCII-8A)
const CodePage	CPEX_ARMENIAN_ARMSCII8A		= 70023;
///	xgi ()
const CodePage	CPEX_VIETNAMESE_AUTODETECT	= 70030;
///	xgi (TCVN)
const CodePage	CPEX_VIETNAMESE_TCVN		= 70031;
///	xgi (VISCII)
const CodePage	CPEX_VIETNAMESE_VISCII		= 70032;
///	xgi (VPN)
const CodePage	CPEX_VIETNAMESE_VPN			= 70033;
///	{ (JIS X 0208-1998 & 0212-1998)
const CodePage	CPEX_JAPANESE_JISX0212		= 70040;
///	{ (JIS X 0208-1998 & 0213-2004)
const CodePage	CPEX_JAPANESE_JISX0213		= 70041;
///	{ (Vtg JIS X0212)
const CodePage	CPEX_JAPANESE_SJISX0212		= 70042;
///	{ (Vtg JIS X0213)
const CodePage	CPEX_JAPANESE_SJISX0213		= 70043;
///	{ (EUC X0212)
const CodePage	CPEX_JAPANESE_EUCX0212		= 70044;
///	{ (EUC X0213)
const CodePage	CPEX_JAPANESE_EUCX0213		= 70045;

///	oCgI[_}[N
struct ByteOrderMark {
	///	oCg
	std::size_t	cBytes;
	///	V[PX
	const char*	pszBOM;
};


// CEncoder class definition
/////////////////////////////////////////////////////////////////////////////

#define CFU_ARGLIST											\
	char* pszDest, std::size_t cchDest,						\
	const wchar_t* pwszSrc, std::size_t cchSrc /* = -1 */,	\
	bool* pbFoundUnconvertableChar /* = 0 */

#define CTU_ARGLIST											\
	wchar_t* pwszDest, std::size_t cchDest,					\
	const char* pszSrc, std::size_t cchSrc /* = -1 */

#define CFU_CHECKARGS()						\
	assert(pszDest != 0 && pwszSrc != 0);	\
	if(cchSrc == -1)						\
		cchSrc = wcslen(pwszSrc);

#define CTU_CHECKARGS()						\
	assert(pwszDest != 0 && pszSrc != 0);	\
	if(cchSrc == -1)						\
		cchSrc = strlen(pszSrc);

static std::set<CodePage>*	_g_pWorking;

/**
 *	@brief	GR[_
 *
 *	UTF-16 ƃ}`oCgZbg̑ݕϊɂāAϊłȂƂA
 *	SetDefaultChar \bhŊ̕g悤ɐݒ肵ĂΊ̕ŒuB
 *	łȂꍇ͕ϊ͓rŏI (r܂ŕϊ񂪕Ԃ)
 */
class CEncoder {
	// RXgN^
protected:
	CEncoder() : m_bUseDefaultChar(false) {}
public:
	virtual ~CEncoder() {}
private:
	CEncoder(const CEncoder& rhs);
	operator =(const CEncoder& rhs);

	// \bh
public:
	/**
	 *	UTF-16 ϊ
	 *	@param pszDest	[out] ϊ
	 *	@param cchDest	ϊ̒
	 *	@param pwszSrc	ϊ
	 *	@param cchSrc	ϊ̕
	 *	@param pbFoundUnconvertableChar	[out] ϊłȂ܂܂Ă
	 *	@return			ϊ̕
	 */
	virtual std::size_t ConvertFromUnicode(
							char* pszDest, std::size_t cchDest,
							const wchar_t* pwszSrc, std::size_t cchSrc = -1,
							bool* pbFoundUnconvertableChar = 0) = 0;
	/**
	 *	UTF-16 ɕϊ
	 *	@param pwszDest	[out] ϊ
	 *	@param cchDest	ϊ̕
	 *	@param pszSrc	ϊ
	 *	@param cchSrc	ϊ̕
	 *	@return			ϊ̕
	 */
	virtual std::size_t ConvertToUnicode(
							wchar_t* pwszDest, std::size_t cchDest,
							const char* pszSrc, std::size_t cchSrc = -1) = 0;
	/**
	 *	GR[_쐬BR[hy[WȂƂ null Ԃ
	 *	@param cp	R[hy[W
	 */
	static CEncoder* Create(CodePage cp);
	/**
	 *	
	 *	@param psz		
	 *	@param cch		ׂoCg
	 *	@param language	ʂɎgp錾
	 */
	static CodePage DetectCodePage(const char* psz, std::size_t cch, CodePage cp);
	///	p\ȃR[hy[W
	static void EnumCodePages(std::set<CodePage>& setCodePages);
	///	oCgI[_}[NԂ
	virtual const ByteOrderMark* GetByteOrderMark() const = 0;
	///	1̍őoCgԂ
	virtual uchar GetMaxCharacterLength() const = 0;
	///	ʂ̂߂̃R[hy[W
	static bool IsCodePageForAutoDetection(CodePage cp);
	///	LȃR[hy[W
	static bool IsValidCodePage(CodePage cp);
	///	ϊłȂ̕ɕϊ邩ݒ肷
	void UseDefaultCharacter(bool bUse = true);
private:
	static BOOL CALLBACK _EnumCodePages(LPTSTR cp);
protected:
	CodePage	m_nCodePage;
	bool		m_bUseDefaultChar;
};


// ƎGR[_
template<CodePage cp>
class CExternalEncoder : public CEncoder {
private:
	CExternalEncoder();
public:
	std::size_t ConvertFromUnicode(
					char* pszDest, std::size_t cchDest,
					const wchar_t* pwszSrc, std::size_t cchSrc = -1,
					bool* pbFoundUnconvertableChar = 0);
	std::size_t ConvertToUnicode(
					wchar_t* pwszDest, std::size_t cchDest,
					const char* pszSrc, std::size_t cchSrc = -1);
	const ByteOrderMark* GetByteOrderMark() const;
	uchar GetMaxCharacterLength() const;

	friend class CEncoder;
};


// Windows ϊe[û܂܎gpGR[_
class CWindowsEncoder : public CEncoder {
private:
	CWindowsEncoder(CodePage cp);
public:
	std::size_t ConvertFromUnicode(
					char* pszDest, std::size_t cchDest,
					const wchar_t* pwszSrc, std::size_t cchSrc = -1,
					bool* pbFoundUnconvertableChar = 0);
	std::size_t ConvertToUnicode(
					wchar_t* pwszDest, std::size_t cchDest,
					const char* pszSrc, std::size_t cchSrc = -1);
	const ByteOrderMark* GetByteOrderMark() const;
	uchar GetMaxCharacterLength() const;

	friend class CEncoder;
};


// ꖈ̎
template<CodePage cp>
void DetectCodePageImpl(const char* psz, std::size_t cch, CodePage& cpResult, std::size_t& cchConvertable);

} // namespace Text
} // namespace Manah

#include "Encodings/Unicode.h"
#include "Encodings/Armenian.h"
//#include "Encodings/Iscii.h"
#include "Encodings/Japanese.h"


namespace Manah {
namespace Text {

// CEncoder class partial implementation
/////////////////////////////////////////////////////////////////////////////

inline CEncoder* CEncoder::Create(CodePage cp) {
	try {
		switch(cp) {
		case CPEX_UNICODE_UTF5:			return new CExternalEncoder<CPEX_UNICODE_UTF5>;
		case CP_UTF8:					return new CExternalEncoder<CP_UTF8>;
		case CPEX_UNICODE_UTF16LE:		return new CExternalEncoder<CPEX_UNICODE_UTF16LE>;
		case CPEX_UNICODE_UTF16BE:		return new CExternalEncoder<CPEX_UNICODE_UTF16BE>;
		case CPEX_UNICODE_UTF32LE:		return new CExternalEncoder<CPEX_UNICODE_UTF32LE>;
		case CPEX_UNICODE_UTF32BE:		return new CExternalEncoder<CPEX_UNICODE_UTF32BE>;
//		case CPEX_ARMENIAN_ARMSCII7:	return new CExternalEncoder<CPEX_ARMENIAN_ARMSCII7>;
//		case CPEX_ARMENIAN_ARMSCII8:	return new CExternalEncoder<CPEX_ARMENIAN_ARMSCII8>;
//		case CPEX_ARMENIAN_ARMSCII8A:	return new CExternalEncoder<CPEX_ARMENIAN_ARMSCII8A>;
		case 51932:						return new CExternalEncoder<51932>;
		default:						return new CWindowsEncoder(cp);
		}
	} catch(...) {
		return 0;
	}
}

inline CodePage CEncoder::DetectCodePage(const char* psz, std::size_t cch, CodePage cp) {
	assert(psz != 0);

	if(!IsCodePageForAutoDetection(cp))
		return cp;

	CodePage	cpDetected;
	std::size_t	nScore;

	if(cp == CPEX_AUTODETECT_SYSTEMLANG || cp == CPEX_AUTODETECT_USERLANG) {
		const LANGID	langId = (cp == CPEX_AUTODETECT_SYSTEMLANG) ? ::GetSystemDefaultLangID() : ::GetUserDefaultLangID();
		switch(PRIMARYLANGID(langId)) {
		case LANG_JAPANESE:	cp = CPEX_JAPANESE_AUTODETECT;	break;
//		case LANG_KOREAN:	cp = CPEX_KOREAN_AUTODETECT;	break;
		default:			cp = 20127;
		}
	}

	switch(cp) {
//	case CPEX_ARMENIAN_AUTODETECT:	DetectCodePageImpl<CPEX_ARMENIAN_AUTODETECT>(psz, cch, cpDetected, nScore);	break;
//	case CPEX_CHINESE_AUTODETECT:	DetectCodePageImpl<CPEX_CHINESE_AUTODETECT>(psz, cch, cpDetected, nScore);	break;
//	case CPEX_EASTASIA_AUTODETECT:	DetectCodePageImpl<CPEX_EASTASIA_AUTODETECT>(psz, cch, cpDetected, nScore);	break;
	case CPEX_JAPANESE_AUTODETECT:	DetectCodePageImpl<CPEX_JAPANESE_AUTODETECT>(psz, cch, cpDetected, nScore);	break;
//	case CPEX_KOREAN_AUTODETECT:	DetectCodePageImpl<CPEX_KOREAN_AUTODETECT>(psz, cch, cpDetected, nScore);	break;
//	case CPEX_RUSSIAN_AUTODETECT:	DetectCodePageImpl<CPEX_RUSSIAN_AUTODETECT>(psz, cch, cpDetected, nScore);	break;
	case CPEX_UNICODE_AUTODETECT:	DetectCodePageImpl<CPEX_UNICODE_AUTODETECT>(psz, cch, cpDetected, nScore);	break;
	}
	return (nScore != 0) ? cpDetected : ::GetACP();
}

inline void CEncoder::EnumCodePages(std::set<CodePage>& setCodePages) {
	setCodePages.clear();
	setCodePages.insert(CPEX_UNICODE_UTF16LE);
	setCodePages.insert(CPEX_UNICODE_UTF16BE);
	setCodePages.insert(CPEX_UNICODE_UTF32LE);
	setCodePages.insert(CPEX_UNICODE_UTF32BE);
//	setCodePages.insert(CPEX_AUTODETECT);
	setCodePages.insert(CPEX_JAPANESE_AUTODETECT);
	setCodePages.insert(51932);
//	setCodePages.insert(CPEX_KOREAN_AUTODETECT);
	setCodePages.insert(CPEX_AUTODETECT_SYSTEMLANG);
	setCodePages.insert(CPEX_AUTODETECT_USERLANG);
	setCodePages.insert(CPEX_UNICODE_AUTODETECT);
	setCodePages.insert(CPEX_UNICODE_UTF5);
/*	setCodePages.insert(CPEX_ARMENIAN_AUTODETECT);
	setCodePages.insert(CPEX_ARMENIAN_ARMSCII7);
	setCodePages.insert(CPEX_ARMENIAN_ARMSCII8);
	setCodePages.insert(CPEX_ARMENIAN_ARMSCII8A);
	setCodePages.insert(CPEX_VIETNAMESE_AUTODETECT);
	setCodePages.insert(CPEX_VIETNAMESE_TCVN);
	setCodePages.insert(CPEX_VIETNAMESE_VISCII);
	setCodePages.insert(CPEX_VIETNAMESE_VPN);
*/
	_g_pWorking = &setCodePages;
	::EnumSystemCodePages(_EnumCodePages, CP_INSTALLED);
}

inline BOOL CEncoder::_EnumCodePages(LPTSTR lpstrCp) {
	const CodePage	cp = wcstoul(lpstrCp, 0, 10);
	if(::IsValidCodePage(cp))
		_g_pWorking->insert(cp);
	return TRUE;
}

inline bool CEncoder::IsCodePageForAutoDetection(CodePage cp) {
	return cp == CPEX_AUTODETECT
		|| cp == CPEX_JAPANESE_AUTODETECT
		|| cp == CPEX_KOREAN_AUTODETECT
		|| cp == CPEX_AUTODETECT_SYSTEMLANG
		|| cp == CPEX_AUTODETECT_USERLANG
		|| cp == CPEX_UNICODE_AUTODETECT
		|| cp == CPEX_ARMENIAN_AUTODETECT
		|| cp == CPEX_VIETNAMESE_AUTODETECT
		;
}

inline bool CEncoder::IsValidCodePage(CodePage cp) {
	return ::IsValidCodePage(cp)
		|| IsCodePageForAutoDetection(cp)
		|| cp == CPEX_UNICODE_UTF16LE
		|| cp == CPEX_UNICODE_UTF16BE
		|| cp == CPEX_UNICODE_UTF32LE
		|| cp == CPEX_UNICODE_UTF32BE
//		|| cp == CPEX_ARMENIAN_ARMSCII7
//		|| cp == CPEX_ARMENIAN_ARMSCII8
//		|| cp == CPEX_ARMENIAN_ARMSCII8A
		|| cp == 51932
		|| cp == CPEX_UNICODE_UTF5
//		|| cp == CPEX_VIETNAMESE_TCVN
//		|| cp == CPEX_VIETNAMESE_VISCII
//		|| cp == CPEX_VIETNAMESE_VPN
		;
}

inline void CEncoder::UseDefaultCharacter(bool bUse /* = true */) {
	m_bUseDefaultChar = bUse;
}

template<CodePage cp>
inline CExternalEncoder<cp>::CExternalEncoder() {
	m_nCodePage = cp;
}

template<CodePage cp>
inline const ByteOrderMark* CExternalEncoder<cp>::GetByteOrderMark() const {
	static const ByteOrderMark	bom = {0, ""};
	return &bom;
}

inline CWindowsEncoder::CWindowsEncoder(CodePage cp) {
	if(!::IsValidCodePage(cp))
		throw std::invalid_argument("Specified code page is not supported.");
	m_nCodePage = cp;
}

inline std::size_t CWindowsEncoder::ConvertFromUnicode(CFU_ARGLIST) {
	return ::WideCharToMultiByte(m_nCodePage, m_bUseDefaultChar ? WC_DEFAULTCHAR : 0,
		pwszSrc, cchSrc, pszDest, cchDest, 0, reinterpret_cast<BOOL*>(pbFoundUnconvertableChar));
}

inline std::size_t CWindowsEncoder::ConvertToUnicode(CTU_ARGLIST) {
	return ::MultiByteToWideChar(m_nCodePage,
		m_bUseDefaultChar ? 0 : MB_ERR_INVALID_CHARS, pszSrc, cchSrc, pwszDest, cchDest);
}

inline const ByteOrderMark* CWindowsEncoder::GetByteOrderMark() const {
	static const ByteOrderMark	bom = {0, ""};
	return &bom;
}

inline uchar CWindowsEncoder::GetMaxCharacterLength() const {
	CPINFO	cpi;
	if(!::GetCPInfo(m_nCodePage, &cpi))
		return 0;
	return cpi.MaxCharSize;
}

} // namespace Text
} // namespace Manah

#endif /* _ENCODER_H_ */

/* [EOF] */