// UnicodeUtils.h
// (c) 2005-2006 exeal

#ifndef UNICODE_UTILS_H_
#define UNICODE_UTILS_H_

#include "AscensionCommon.h"
#include <cassert>
#include <algorithm>


namespace Ascension {
	/// R[h|Cg
	typedef unsigned long CodePoint;	// uint32_t

	/// UTF-16 TQ[gyȀ
	struct UTF16Surrogates {
		///  UTF-16 ʃTQ[głΐ^Ԃ
		static bool isHighSurrogate(char_t ch) throw() {return ch >= 0xD800 && ch < 0xDC00;}
		///  UTF-16 ʃTQ[głΐ^Ԃ
		static bool isLowSurrogate(char_t ch) throw() {return ch >= 0xDC00 && ch < 0xE000;}
		/**
		 *	TQ[gyAR[h|Cg쐬
		 *	@param p		TQ[gyA
		 *	@param length	@a p ̒
		 *	@return			R[h|Cg
		 */
		static CodePoint decode(const char_t* p, std::size_t length) {
			assert(p != 0 && length != 0);
			return (length > 1 && isHighSurrogate(p[0]) && isLowSurrogate(p[1])) ? 
						0x10000 + (p[0] - 0xD800) * 0x400 + p[1] - 0xDC00 : p[0];
		}
		/**
		 *	R[h|Cg UTF-16 TQ[gyA쐬
		 *	@param cp	R[h|Cg
		 *	@param dest	[out] TQ[gyA
		 *	@return		TQ[gyAƂ^
		 */
		static bool encode(CodePoint cp, char_t* dest) {
			assert(dest != 0);
			if(cp < 0x00010000) {
				dest[0] = static_cast<char_t>(cp & 0xFFFF);
				return false;
			} else if(cp <= 0x0010FFFF) {
				cp -= 0x00010000;
				dest[0] = 0xD800 | static_cast<char_t>(((cp & 0x001FFC00) >> 10) & 0xFFFF);
				dest[1] = 0xDC00 | static_cast<char_t>(((cp & 0x000003FF) >> 0) & 0xFFFF);
				return true;
			}
			return false;
		}
	};

	/// ̕ϊ
	class CharacterFolder : public Manah::Noncopyable {
	public:
		static CodePoint	foldCase(CodePoint cp);
		static void			foldCase(char_t* first, char_t* last);
		static char_t*		foldCase(const char_t* first, const char_t* last);
		static void			foldCase(const char_t* first, const char_t* last, char_t* dest);
		static CodePoint	foldDigit(CodePoint cp);
	private:
		static const char_t CASED_UCS2[], FOLDED_UCS2[];
		static const CodePoint CASED_UCS4[], FOLDED_UCS4[];
	};

	/// Unicode vpeB
	class CharProperty : public Manah::Noncopyable {
		// ^
	public:
#if ASCENSION_UNICODE_VERSION != 0x0410
#error These codes are based on old version of Unicode.
#endif
		/// ʕ
		enum GeneralCategory {
			GC_LETTER_UPPERCASE,			///< Lu
			GC_LETTER_LOWERCASE,			///< Ll
			GC_LETTER_TITLECASE,			///< Lt
			GC_LETTER_MODIFIER,				///< Lm
			GC_LETTER_OTHER,				///< Lo
			GC_MARK_NONSPACING,				///< Mn
			GC_MARK_SPACING_COMBINING,		///< Mc
			GC_MARK_ENCLOSING,				///< Me
			GC_NUMBER_DECIMAL_DIGIT,		///< Nd
			GC_NUMBER_LETTER,				///< Nl
			GC_NUMBER_OTHER,				///< No
			GC_PUNCTUATION_CONNECTOR,		///< Pc
			GC_PUNCTUATION_DASH,			///< Pd
			GC_PUNCTUATION_OPEN,			///< Ps
			GC_PUNCTUATION_CLOSE,			///< Pe
			GC_PUNCTUATION_INITIAL_QUOTE,	///< Pi
			GC_PUNCTUATION_FINAL_QUOTE,		///< Pf
			GC_PUNCTUATION_OTHER,			///< Po
			GC_SYMBOL_MATH,					///< Sm
			GC_SYMBOL_CURRENCY,				///< Sc
			GC_SYMBOL_MODIFIER,				///< Sk
			GC_SYMBOL_OTHER,				///< So
			GC_SEPARATOR_SPACE,				///< Zs
			GC_SEPARATOR_LINE,				///< Zl
			GC_SEPARATOR_PARAGRAPH,			///< Zp
			GC_OTHER_CONTROL,				///< Cc
			GC_OTHER_FORMAT,				///< Cf
			GC_OTHER_SURROGATE,				///< Cs
			GC_OTHER_PRIVATE_USE,			///< Co
			GC_OTHER_NOT_ASSIGNED,			///< Cn
			GC_COUNT
		};

		/// XNvg
		enum Script {
			S_COMMON,
			// Unicode 4.0
			S_LATIN, S_GREEK, S_CYRILLIC, S_ARMENIAN, S_HEBREW, S_ARABIC, S_SYRIAC, S_THAANA,
			S_DEVANAGARI, S_BENGALI, S_GURMUKHI, S_GUJARATI, S_ORIYA, S_TAMIL, S_TELUGU, S_KANNADA,
			S_MALAYALAM, S_SINHALA, S_THAI, S_LAO, S_TIBETAN, S_MYANMAR, S_GEORGIAN, S_HANGUL,
			S_ETHIOPIC, S_CHEROKEE, S_CANADIAN_ABORIGINAL, S_OGHAM, S_RUNIC, S_KHMER, S_MONGOLIAN,
			S_HIRAGANA, S_KATAKANA, S_BOPOMOFO, S_HAN, S_YI, S_OLD_ITALIC, S_GOTHIC, S_DESERET,
			S_INHERITED, S_TAGALOG, S_HANUNOO, S_BUHID, S_TAGBANWA, S_LIMBU, S_TAI_LE,
			S_LINEAR_B, S_UGARITIC, S_SHAVIAN, S_OSMANYA, S_CYPRIOT, S_BRAILLE,
			// Unicode 4.1
			S_BUGINESE, S_COPTIC, S_NEW_TAI_LUE, S_GLAGOLITIC, S_TIFINAGH, S_SYLOTI_NAGRI,
			S_OLD_PERSIAN, S_KHAROSHTHI,
			S_COUNT
		};

		/// R[hubN
		enum CodeBlock {
			CB_BASIC_LATIN, CB_LATIN_1_SUPPLEMENT, CB_LATIN_EXTENDED_A, CB_LATIN_EXTENDED_B,
			CB_IPA_EXTENSIONS, CB_SPACING_MODIFIER_LETTERS, CB_COMBINING_DIACRITICAL_MARKS,
			CB_GREEK_AND_COPTIC, CB_CYRILLIC, CB_CYRILLIC_SUPPLEMENT, CB_ARMENIAN, CB_HEBREW, CB_ARABIC,
			CB_SYRIAC, CB_ARABIC_SUPPLEMENT, CB_THAANA, CB_DEVANAGARI, CB_BENGALI, CB_GURMUKHI, CB_GUJARATI,
			CB_ORIYA, CB_TAMIL, CB_TELUGU, CB_KANNADA, CB_MALAYALAM, CB_SINHALA, CB_THAI, CB_LAO, CB_TIBETAN,
			CB_MYANMAR, CB_GEORGIAN, CB_HANGUL_JAMO, CB_ETHIOPIC, CB_ETHIOPIC_SUPPLEMENT, CB_CHEROKEE,
			CB_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS, CB_OGHAM, CB_RUNIC, CB_TAGALOG, CB_HANUNOO,
			CB_BUHID, CB_TAGBANWA, CB_KHMER, CB_MONGOLIAN, CB_LIMBU, CB_TAI_LE, CB_NEW_TAI_LUE, CB_KHMER_SYMBOLS,
			CB_BUGINESE, CB_PHONETIC_EXTENSIONS, CB_PHONETIC_EXTENSIONS_SUPPLEMENT,
			CB_COMBINING_DIACRITICAL_MARKS_SUPPLEMENT, CB_LATIN_EXTENDED_ADDITIONAL, CB_GREEK_EXTENDED,
			CB_GENERAL_PUNCTUATION, CB_SUPERSCRIPTS_AND_SUBSCRIPTS, CB_CURRENCY_SYMBOLS, CB_COMBINING_DIACRITICAL_MARKS_FOR_SYMBOLS,
			CB_LETTERLIKE_SYMBOLS, CB_NUMBER_FORMS, CB_ARROWS, CB_MATHEMATICAL_OPERATORS,
			CB_MISCELLANEOUS_TECHNICAL, CB_CONTROL_PICTURES, CB_OPTICAL_CHARACTER_RECOGNITION,
			CB_ENCLOSED_ALPHANUMERICS, CB_BOX_DRAWING, CB_BLOCK_ELEMENTS, CB_GEOMETRIC_SHAPES,
			CB_MISCELLANEOUS_SYMBOLS, CB_DINGBATS, CB_MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A,
			CB_SUPPLEMENTAL_ARROWS_A, CB_BRAILLE_PATTERNS, CB_SUPPLEMENTAL_ARROWS_B,
			CB_MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B, CB_SUPPLEMENTAL_MATHEMATICAL_OPERATORS,
			CB_MISCELLANEOUS_SYMBOLS_AND_ARROWS, CB_GLAGOLITIC, CB_COPTIC, CB_GEORGIAN_SUPPLEMENT, CB_TIFINAGH,
			CB_ETHIOPIC_EXTENDED, CB_SUPPLEMENTAL_PUNCTUATION, CB_CJK_RADICALS_SUPPLEMENT, CB_KANGXI_RADICALS,
			CB_IDEOGRAPHIC_DESCRIPTION_CHARACTERS, CB_CJK_SYMBOLS_AND_PUNCTUATION, CB_HIRAGANA, CB_KATAKANA,
			CB_BOPOMOFO, CB_CJK_STROKES, CB_HANGUL_COMPATIBILITY_JAMO, CB_KANBUN, CB_BOPOMOFO_EXTENDED,
			CB_KATAKANA_PHONETIC_EXTENSIONS, CB_ENCLOSED_CJK_LETTERS_AND_MONTHS, CB_CJK_COMPATIBILITY,
			CB_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A, CB_YIJING_HEXAGRAM_SYMBOLS, CB_CJK_UNIFIED_IDEOGRAPHS,
			CB_YI_SYLLABLES, CB_YI_RADICALS, CB_MODIFIER_TONE_LETTERS, CB_SYLOTI_NAGRI, CB_HANGUL_SYLLABLES,
			CB_HIGH_SURROGATES, CB_HIGH_PRIVATE_USE_SURROGATES, CB_LOW_SURROGATES, CB_PRIVATE_USE_AREA,
			CB_CJK_COMPATIBILITY_IDEOGRAPHS, CB_ALPHABETIC_PRESENTATION_FORMS, CB_ARABIC_PRESENTATION_FORMS_A,
			CB_VERTICAL_FORMS, CB_VARIATION_SELECTORS, CB_COMBINING_HALF_MARKS, CB_CJK_COMPATIBILITY_FORMS,
			CB_SMALL_FORM_VARIANTS, CB_ARABIC_PRESENTATION_FORMS_B, CB_HALFWIDTH_AND_FULLWIDTH_FORMS, CB_SPECIALS,
			CB_LINEAR_B_SYLLABARY, CB_LINEAR_B_IDEOGRAMS, CB_AEGEAN_NUMBERS, CB_ANCIENT_GREEK_NUMBERS, CB_OLD_ITALIC,
			CB_GOTHIC, CB_UGARITIC, CB_OLD_PERSIAN, CB_DESERET, CB_SHAVIAN, CB_OSMANYA, CB_CYPRIOT_SYLLABARY, CB_KHAROSHTHI,
			CB_BYZANTINE_MUSICAL_SYMBOLS, CB_MUSICAL_SYMBOLS, CB_ANCIENT_GREEK_MUSICAL_NOTATION,
			CB_TAI_XUAN_JING_SYMBOLS, CB_MATHEMATICAL_ALPHANUMERIC_SYMBOLS, CB_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B,
			CB_CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT, CB_TAGS, CB_VARIATION_SELECTORS_SUPPLEMENT,
			CB_SUPPLEMENTARY_PRIVATE_USE_AREA_A, CB_SUPPLEMENTARY_PRIVATE_USE_AREA_B, CB_NOT_ASSIGNED, CB_COUNT
		};

		/// 2lvpeB
		enum BinaryProperty {
			BP_ALPHABETIC, BP_ASCII_HEX_DIGIT, BP_BIDI_CONTROL, BP_BIDI_MIRRORED, BP_COMPOSITION_EXCLUSION, BP_DASH,
			BP_DEFAULT_IGNORABLE_CODE_POINT, BP_DEPRECATED, BP_DIACRITIC, BP_EXPANDS_ON_NFC, BP_EXPANDS_ON_NFD,
			BP_EXPANDS_ON_NFKC, BP_EXPANDS_ON_NFKD, BP_EXTENDER, BP_FULL_COMPOSITION_EXCLUSION, BP_GRAPHEME_BASE,
			BP_GRAPHEME_EXTEND, BP_GRAPHEME_LINK, BP_HEX_DIGIT, BP_HYPHEN, BP_ID_CONTINUE, BP_ID_START, BP_IDEOGRAPHIC,
			BP_IDS_BINARY_OPERATOR, BP_IDS_TRINARY_OPERATOR, BP_JOIN_CONTROL, BP_LOGICAL_ORDER_EXCEPTION, BP_LOWERCASE,
			BP_MATH, BP_NONCHARACTER_CODE_POINT, BP_OTHER_ALPHABETIC, BP_OTHER_DEFAULT_IGNORABLE_CODE_POINT,
			BP_OTHER_GRAPHEME_EXTEND, BP_OTHER_ID_CONTINUE, BP_OTHER_ID_START, BP_OTHER_LOWERCASE, BP_OTHER_MATH,
			BP_OTHER_UPPERCASE, BP_PATTERN_SYNTAX, BP_PATTERN_WHITE_SPACE, BP_QUOTATION_MARK, BP_RADICAL, BP_SOFT_DOTTED,
			BP_STERM, BP_TERMINAL_PUNCTUATION, BP_UNIFIED_IDEOGRAPH, BP_UPPERCASE, BP_VARIATION_SELECTOR, BP_WHITE_SPACE,
			BP_XID_CONTINUE, BP_XID_START, BP_COUNT
		};

		// \bh
	public:
		static GeneralCategory	getGeneralCategory(CodePoint cp);
		static CodeBlock		getCodeBlock(CodePoint cp);
		static Script			getScript(CodePoint cp);
		template<BinaryProperty property>
		static bool				hasBinaryProperty(CodePoint cp);
		static bool				hasBinaryProperty(CodePoint cp, BinaryProperty property);
		static std::size_t		toNFD(CodePoint cp, CodePoint& first, CodePoint& second);
		static length_t			toNFD(CodePoint cp, char_t* dest);

		// ʕ
		static bool	isLetter(CodePoint cp);
		static bool	isMark(CodePoint cp);
		static bool	isNumber(CodePoint cp);
		static bool	isOther(CodePoint cp);
		static bool	isPunctuation(CodePoint cp);
		static bool	isSeparator(CodePoint cp);
		static bool	isSymbol(CodePoint cp);
	private:
		CharProperty();
#include "code-table\UnicodeProperties_Definition"
	};

#include "code-table\UnicodeProperties_Implementation"

	/// UTF-16  UTF-32 PʂőCe[^
	class UTF16ToUTF32Iterator : public std::iterator<std::bidirectional_iterator_tag, CodePoint> {
	public:
		UTF16ToUTF32Iterator(const char_t* p) : p_(p) {}
		UTF16ToUTF32Iterator& operator ++() {++p_; if(UTF16Surrogates::isLowSurrogate(*p_)) ++p_; return *this;}
		const UTF16ToUTF32Iterator operator ++(int) {UTF16ToUTF32Iterator tmp(*this); ++(*this); return tmp;}
		UTF16ToUTF32Iterator& operator --() {--p_; if(UTF16Surrogates::isHighSurrogate(*p_)) --p_; return *this;}
		const UTF16ToUTF32Iterator operator --(int) {UTF16ToUTF32Iterator tmp(*this); --(*this); return tmp;}
		CodePoint operator *() const {return UTF16Surrogates::decode(p_, 2);}
		bool operator ==(const UTF16ToUTF32Iterator& rhs) const {return p_ == rhs.p_;}
		bool operator !=(const UTF16ToUTF32Iterator& rhs) const {return !(*this == rhs);}
		bool operator <(const UTF16ToUTF32Iterator& rhs) const {return p_ < rhs.p_;}
		bool operator <=(const UTF16ToUTF32Iterator& rhs) const {return *this < rhs || *this == rhs;}
		bool operator >(const UTF16ToUTF32Iterator& rhs) const {return !(p_ <= rhs.p_);}
		bool operator >=(const UTF16ToUTF32Iterator& rhs) const {return *this < rhs;}
		const char_t* tell() const throw() {return p_;}
	private:
		const char_t* p_;
	};

	/// UTF-32  UTF-16 PʂőCe[^
	class UTF32ToUTF16Iterator : public std::iterator<std::bidirectional_iterator_tag, char_t> {
	public:
		UTF32ToUTF16Iterator(const CodePoint* p) : p_(p), high_(true) {}
		UTF32ToUTF16Iterator& operator ++() {
			if(*p_ < 0x10000) ++p_;
			else {high_ = !high_; if(high_) ++p_;}
			return *this;
		}
		const UTF32ToUTF16Iterator operator ++(int) {UTF32ToUTF16Iterator tmp(*this); ++(*this); return tmp;}
		UTF32ToUTF16Iterator& operator --() {
			if(*p_ < 0x10000) --p_;
			else {high_ = !high_; if(!high_) --p_;}
			return *this;
		}
		const UTF32ToUTF16Iterator operator --(int) {UTF32ToUTF16Iterator tmp(*this); --(*this); return tmp;}
		char_t operator *() const {
			if(*p_ < 0x10000) return static_cast<char_t>(*p_ & 0xFFFF);
			else {
				char_t text[2];
				UTF16Surrogates::encode(*p_, text);
				return text[high_ ? 0 : 1];
			}
		}
		bool operator ==(const UTF32ToUTF16Iterator& rhs) const {return p_ == rhs.p_ && high_ == rhs.high_;}
		bool operator !=(const UTF32ToUTF16Iterator& rhs) const {return !(*this == rhs);}
		bool operator <(const UTF32ToUTF16Iterator& rhs) const {return p_ < rhs.p_ || (p_ == rhs.p_ && high_ && !rhs.high_);}
		bool operator <=(const UTF32ToUTF16Iterator& rhs) const {return *this < rhs || *this == rhs;}
		bool operator >(const UTF32ToUTF16Iterator& rhs) const {return !(*this <= rhs);}
		bool operator >=(const UTF32ToUTF16Iterator& rhs) const {return !(*this < rhs);}
		const CodePoint* tell() const throw() {return p_;}
	private:
		const CodePoint* p_;
		bool high_;
	};

	/// Âރ[` (UTS #18: Unicode Regular Expression, Annex C: Compatibility Property )
	struct LegacyCharTypes {
		/// At@xbg
		static bool	isalpha(CodePoint cp) {return CharProperty::hasBinaryProperty<CharProperty::BP_ALPHABETIC>(cp);}
		/// At@xbgѐ
		static bool	isalnum(CodePoint cp) {return isalpha(cp) || isdigit(cp);}
		/// 󔒗ޕ
		static bool	isblank(CodePoint cp) {
			if(cp == L'\n' || cp == L'\v' || cp == L'\f' || cp == L'\r' || cp == 0x0085)	return false;
			else if(CharProperty::hasBinaryProperty<CharProperty::BP_WHITE_SPACE>(cp))	return true;
			const CharProperty::GeneralCategory gc = CharProperty::getGeneralCategory(cp);
			return gc != CharProperty::GC_SEPARATOR_LINE && gc != CharProperty::GC_SEPARATOR_PARAGRAPH;
		}
		/// 䕶
		static bool	iscntrl(CodePoint cp) {return CharProperty::getGeneralCategory(cp) == CharProperty::GC_OTHER_CONTROL;}
		/// \i
		static bool	isdigit(CodePoint cp) {return CharProperty::getGeneralCategory(cp) == CharProperty::GC_NUMBER_DECIMAL_DIGIT;}
		/// 󎚉\
		static bool	isgraph(CodePoint cp) {
			if(isspace(cp))	return false;
			const CharProperty::GeneralCategory gc = CharProperty::getGeneralCategory(cp);
			return gc != CharProperty::GC_OTHER_CONTROL
				&& gc != CharProperty::GC_OTHER_FORMAT
				&& gc != CharProperty::GC_OTHER_SURROGATE
				&& gc != CharProperty::GC_OTHER_NOT_ASSIGNED;
		}
		/// 
		static bool	islower(CodePoint cp) {return CharProperty::hasBinaryProperty<CharProperty::BP_LOWERCASE>(cp);}
		/// 
		static bool	isprint(CodePoint cp) {return (isgraph(cp) || isblank(cp)) && !iscntrl(cp);}
		/// ؂蕶
		static bool	ispunct(CodePoint cp) {return CharProperty::isPunctuation(cp);}
		/// 󔒗ޕ
		static bool	isspace(CodePoint cp) {return CharProperty::hasBinaryProperty<CharProperty::BP_WHITE_SPACE>(cp);}
		/// 啶
		static bool	isupper(CodePoint cp) {return CharProperty::hasBinaryProperty<CharProperty::BP_UPPERCASE>(cp);}
		/// P\
		static bool	isword(CodePoint cp) {
			return isalpha(cp) || isdigit(cp) || CharProperty::isMark(cp)
				|| CharProperty::getGeneralCategory(cp) == CharProperty::GC_PUNCTUATION_CONNECTOR;
		}
		/// \Zi
		static bool	isxdigit(CodePoint cp) {
			return CharProperty::getGeneralCategory(cp) == CharProperty::GC_NUMBER_DECIMAL_DIGIT
				|| CharProperty::hasBinaryProperty<CharProperty::BP_HEX_DIGIT>(cp);
		}
	};


	// inline implementations
	////////////////////////////////////////////////////////////////////////////

	/// P[XtH[fBO
	inline void CharacterFolder::foldCase(char_t* first, char_t* last) {return foldCase(first, last, first);}

	/// P[XtH[fBO
	inline char_t* CharacterFolder::foldCase(const char_t* first, const char_t* last) {
		char_t* p = new char_t[static_cast<std::size_t>(last - first)];
		foldCase(first, last, p);
		return p;
	}

	/// P[XtH[fBO
	inline void CharacterFolder::foldCase(const char_t* first, const char_t* last, char_t* dest) {
		assert(first != 0 && last != 0 && first <= last && dest != 0);
		CodePoint cp;
		for(length_t i = 0; first + i < last; ++i) {
			cp = UTF16Surrogates::decode(first + i, static_cast<std::size_t>(last - first - i));
			cp = foldCase(cp);
			if(cp < 0x10000)
				dest[i] = static_cast<char_t>(cp & 0xFFFF);
			else {
				UTF16Surrogates::encode(cp, dest + i);
				++i;
			}
		}
	}

	/// ArA 0-9 ɕϊ
	inline CodePoint CharacterFolder::foldDigit(CodePoint cp) {
#if ASCENSION_UNICODE_VERSION != 0x0410
#error This code is based on old version of Unicode.
#endif
#define FOLD_DIGIT(lo, up)	if(cp >= lo && cp <= up) return cp - up + L'9'
		// Unicode 4.1  'Nd' ɊÂ
		FOLD_DIGIT(0x0030, 0x0039);		// C0
		FOLD_DIGIT(0x0660, 0x0669);		// Arabic-Indic
		FOLD_DIGIT(0x06F0, 0x06F9);		// Extended Arabic-Indic
		FOLD_DIGIT(0x0966, 0x096F);		// Devanagari
		FOLD_DIGIT(0x09E6, 0x09EF);		// Bengali
		FOLD_DIGIT(0x0A66, 0x0A6F);		// Gurmukhi
		FOLD_DIGIT(0x0AE6, 0x0AEF);		// Gujarati
		FOLD_DIGIT(0x0B66, 0x0B6F);		// Oriya
		FOLD_DIGIT(0x0BE6, 0x0BEF);		// Tamil (zero was introduced in Unicode 4.1)
		FOLD_DIGIT(0x0C66, 0x0C6F);		// Telugu
		FOLD_DIGIT(0x0CE6, 0x0CEF);		// Kannada
		FOLD_DIGIT(0x0D66, 0x0D6F);		// Malayalam
		FOLD_DIGIT(0x0E50, 0x0E59);		// Thai
		FOLD_DIGIT(0x0ED0, 0x0ED9);		// Lao
		FOLD_DIGIT(0x0F20, 0x0F29);		// Tibetan
		FOLD_DIGIT(0x1040, 0x1049);		// Myanmar
//		FOLD_DIGIT(0x1369, 0x1371);		// Ethiopic (0 not found) (removed in Unicode 4.1)
		FOLD_DIGIT(0x17E0, 0x17E9);		// Khmer
		FOLD_DIGIT(0x1810, 0x1819);		// Mongolian
		FOLD_DIGIT(0x1946, 0x194F);		// Limbu
		FOLD_DIGIT(0x19D0, 0x19D9);		// New Tai Lue (Unicode 4.1)
		FOLD_DIGIT(0xFF10, 0xFF19);		// Fullwidth
		FOLD_DIGIT(0x0104A0, 0x0104A9);	// Osmanya
		FOLD_DIGIT(0x01D7CE, 0x01D7D7);	// Mathematical bold
		FOLD_DIGIT(0x01D7D8, 0x01D7E1);	// Mathematical double-struck
		FOLD_DIGIT(0x01D7E2, 0x01D7EB);	// Mathematical sans-serif
		FOLD_DIGIT(0x01D7EC, 0x01D7F5);	// Mathematical sans-serif bold
		FOLD_DIGIT(0x01D7F6, 0x01D7FF);	// Mathematical monospace
		return cp;
	#undef FOLD_DIGIT
	}
} // namespace Ascension

#endif /* UNICODE_UTILS_H_ */

/* [EOF] */