// TextSearcher.cpp
// (c) 2004 exeal

#include "StdAfx.h"
#include "TextSearcher.h"
#include "EditView.h"
#include "Lexer.h"

using namespace Ascension;
using namespace Manah::Text;


namespace {
	template<SearchFlag> class _CComparison {
	public:
		///	1ϊBR[h|Cg𖳎ꍇ
		///	(ႦΕꉹ𖳎ꍇ̌ꉹ)  0xFFFFFFFF Ԃ
		static CodePoint	Collate(CodePoint cp);
	};
	template<> inline CodePoint _CComparison<SF_MATCH_CASE>::Collate(CodePoint cp) {
		return cp;
	}
	template<> inline CodePoint _CComparison<SF_IGNORE_CASE_ASCII>::Collate(CodePoint cp) {
		return (cp < 0x10000) ? towlower(static_cast<wchar_t>(cp)) : cp;
	}
	template<> inline CodePoint _CComparison<SF_IGNORE_CASE_SIMPLE>::Collate(CodePoint cp) {
		return reinterpret_cast<Manah::Text::CodePoint>(::CharLowerW(reinterpret_cast<WCHAR*>(cp)));
	}
	template<> inline CodePoint _CComparison<SF_IGNORE_KANATYPE>::Collate(CodePoint cp) {
		return (cp < 0x10000) ? ToKatakana(static_cast<wchar_t>(cp)) : cp;
	}
	template<> inline CodePoint _CComparison<SF_IGNORE_WIDTH>::Collate(CodePoint cp) {
		static	char_t	wsz[2];
		if(cp >= 0x10000)
			return cp;
		if(::LCMapStringW(LOCALE_USER_DEFAULT, LCMAP_HALFWIDTH, reinterpret_cast<WCHAR*>(&cp), 1, wsz, 1) != 0)
			return wsz[0];
		return cp;
		//return ToHalfWidth(ch);	// pJi͑ΏۊO
	}
	template<> inline CodePoint _CComparison<SF_IGNORE_DIGITTYPE>::Collate(CodePoint cp) {
		return ToAsciiDigit(cp);
	}
	template<> inline CodePoint	_CComparison<SF_IGNORE_ALEFHAMZA>::Collate(CodePoint cp) {
	}
	template<> inline CodePoint	_CComparison<SF_IGNORE_PUNCTUATIONS>::Collate(CodePoint cp) {
		// ʕނ Pc, Pd, Ps, Pe, Pi, Pf, Po łR[h|Cg unicat.pl 萶
		static CodePoint	arrPunctuations[] = {
#if(ASCENSION_UNICODE_VERSION != 0x0400)
#error Included file version differs from Ascension Unicode version. Update correspoding file.
#endif
#include "script\TextSearcher_Punctuation_4_0"
		};
		return binary_search(arrPunctuations,
			arrPunctuations + sizeof(arrPunctuations) / sizeof(CodePoint), cp) ? 0xFFFFFFFF : cp;
	}
	template<> inline CodePoint	_CComparison<SF_IGNORE_SYMBOLS>::Collate(CodePoint cp) {
		// ʕނ Sm, Sc, Sk, So łR[h|Cg unicat.pl 萶
		static CodePoint	arrSymbols[] = {
#if(ASCENSION_UNICODE_VERSION != 0x0400)
#error Included file version differs from Ascension Unicode version. Update correspoding file.
#endif
#include "script\TextSearcher_Symbol_4_0"
		};
		return binary_search(arrSymbols,
			arrSymbols + sizeof(arrSymbols) / sizeof(CodePoint), cp) ? 0xFFFFFFFF : cp;
	}
	template<> inline CodePoint _CComparison<SF_IGNORE_WHITESPACES>::Collate(CodePoint cp) {
		// ʕނ Zs łR[h|Cg unicat.pl 萶
		// Lexer::IsWhiteSpace Q
#if(ASCENSION_UNICODE_VERSION != 0x0400)
#error Included file version differs from Ascension Unicode version. Update correspoding file.
#endif
		static const char_t	arrZs[] = {
			0x0009, 0x0020, 0x00A0, 0x1680, 0x180E, 0x2000, 0x2001, 0x2002,
			0x2003, 0x2004, 0x2005, 0x2006, 0x2007, 0x2008, 0x2009, 0x200A,
			0x200B, 0x202F, 0x205F, 0x3000,
		};
		return binary_search(arrZs, arrZs + sizeof(arrZs) / sizeof(CodePoint), cp) ? 0xFFFFFFFF : cp;
	}
//	template<> inline CodePoint	_CComparison<SF_IGNORE_DIACRITICS>::Collate(CodePoint cp) {
//	}
//	template<> inline CodePoint	_CComparison<SF_IGNORE_VOWELS>::Collate(CodePoint cp) {
//	}
//	template<> inline CodePoint	_CComparison<SF_IGNORE_KASHIDA>::Collate(CodePoint cp) {
//	}
	template<> inline CodePoint	_CComparison<SF_IGNORE_CONTROLS>::Collate(CodePoint cp) {
		// ʕނ Cc, Cf łR[h|Cg unicat.pl 萶
		static CodePoint	arrControls[] = {
#if(ASCENSION_UNICODE_VERSION != 0x0400)
#error Included file version differs from Ascension Unicode version. Update correspoding file.
#endif
#include "script\TextSearcher_Control_4_0"
		};
		return binary_search(arrControls,
			arrControls + sizeof(arrControls) / sizeof(CodePoint), cp) ? 0xFFFFFFFF : cp;
	}
}

CTextSearcher* _CAscensionRegExpTraits::m_pSearcher = 0;


// CTextSearcher class implementation
/////////////////////////////////////////////////////////////////////////////

///	RXgN^
CTextSearcher::CTextSearcher() {
}

/**
 *	ݐݒ肳Ă錟ɊÂāAϊ
 *	@param cp	ϊR[h|Cg
 *	@return		ϊꂽR[h|CgB0xFFFFFFFF Ԃꍇ͂̃R[h|Cg͖ł
 */
inline CodePoint CTextSearcher::_Collate(CodePoint cp) const {
	AssertValid();

	// łR[h|Cg
	if(toBoolean(m_sfFlags & SF_IGNORE_PUNCTUATIONS) && _CComparison<SF_IGNORE_PUNCTUATIONS>::Collate(cp) == 0xFFFFFFFF)
		return 0xFFFFFFFF;
	if(toBoolean(m_sfFlags & SF_IGNORE_SYMBOLS) && _CComparison<SF_IGNORE_SYMBOLS>::Collate(cp) == 0xFFFFFFFF)
		return 0xFFFFFFFF;
	if(toBoolean(m_sfFlags & SF_IGNORE_WHITESPACES) && _CComparison<SF_IGNORE_WHITESPACES>::Collate(cp) == 0xFFFFFFFF)
		return 0xFFFFFFFF;
	if(toBoolean(m_sfFlags & SF_IGNORE_CONTROLS) && _CComparison<SF_IGNORE_CONTROLS>::Collate(cp) == 0xFFFFFFFF)
		return 0xFFFFFFFF;

	// R[h|CgƂ̕ϊ
	if((m_sfFlags & SF_IGNORE_CASE_FULL) == SF_IGNORE_CASE_ASCII)
		cp = _CComparison<SF_IGNORE_CASE_ASCII>::Collate(cp);
	else if((m_sfFlags & SF_IGNORE_CASE_FULL) == SF_IGNORE_CASE_SIMPLE)
		cp = _CComparison<SF_IGNORE_CASE_SIMPLE>::Collate(cp);
	if(toBoolean(m_sfFlags & SF_IGNORE_KANATYPE))
		cp = _CComparison<SF_IGNORE_KANATYPE>::Collate(cp);
	if(toBoolean(m_sfFlags & SF_IGNORE_WIDTH))
		cp = _CComparison<SF_IGNORE_WIDTH>::Collate(cp);
	if(toBoolean(m_sfFlags & SF_IGNORE_DIGITTYPE))
		cp = _CComparison<SF_IGNORE_DIGITTYPE>::Collate(cp);
	return cp;
}

///	ݐݒ肳Ă錟Ԃ
SearchFlag CTextSearcher::GetFlags() const {
	AssertValid();
	return m_sfFlags;
}

///	K\\ǂԂ
bool CTextSearcher::IsRegExpAvailable() const {
	AssertValid();
#ifdef NO_REGEXP
	return false;
#else
	return true;
#endif
}

/**
 *	s
 *	@param strText	Ώە (󕶎񂾂ƏɎs)
 *	@param iBegin	Jnʒu (-1 ƕ̏I[)
 *	@param iFound	[out] ʒu
 *	@param cchFound	[out] ̒
 *	@param boundary	[in] PꋫE
 *	@return			ǂ
 *	@throw EFailedToLoadRegExpEngine
 *					K\GW̓ǂݍ݂Ɏs
 */
bool CTextSearcher::Search(const string_t& strText, length_t iBegin, length_t& iFound,
		length_t& cchFound, const CBoundarySearcher& boundary) throw(EFailedToLoadRegExpEngine, ERegExpPatternIsInvalid) {
	AssertValid();
	if(strText.empty())
		return false;
	if(iBegin == 0 && toBoolean(m_sfFlags & SF_BACKWARD))
		return false;
	if(iBegin == -1)
		iBegin = strText.length();

	// 
	if(toBoolean(m_sfFlags & SF_REGEXP)) {
		if(!IsRegExpAvailable())
			throw EFailedToLoadRegExpEngine();
#ifndef NO_REGEXP
		_CAscensionRegExpTraits::m_pLexer = boundary.GetView().GetLexer();
		_CAscensionRegExpTraits::m_pSearcher = this;
		boost::reg_expression<char_t, _CAscensionRegExpTraits>	pattern;
		try {
			pattern.assign(m_strFindWhat.c_str(), boost::regbase::perl);
		} catch(boost::bad_pattern& e) {
			throw ERegExpPatternIsInvalid(e.what());
			return false;
		}

		boost::match_results<string_t::const_iterator>	results;
		if(!toBoolean(m_sfFlags & SF_BACKWARD)) {	// O
			do {
				if(!boost::regex_search(strText.begin() + iBegin, strText.end(), results, pattern))
					return false;
				iFound = results.position() + iBegin;
				cchFound = results.length();
				if(!toBoolean(m_sfFlags & SF_WHOLEWORD)	// PPʂŒTꍇ
						|| (boundary.HasWordBoundaryAt(strText.c_str(), iFound)
						&& boundary.HasWordBoundaryAt(strText.c_str(), iFound + cchFound)))
					return true;
				iBegin = iFound + cchFound;
			} while(true);
		} else {	// 
			// ܂ōsɃ}b`Ώۂg債
			for(length_t iStart = (iBegin == 0 || strText[0] == L'^') ? 0 : iBegin - 1; iStart != -1; --iStart) {
				if(boost::regex_search(strText.begin() + iStart, strText.end(), results, pattern)
						&& results.position() == 0) {
					iFound = results.position() + iStart;
					cchFound = results.length();
					if(!toBoolean(m_sfFlags & SF_WHOLEWORD)	// PPʂŒTꍇ
							|| (boundary.HasWordBoundaryAt(strText.c_str(), iFound)
							&& boundary.HasWordBoundaryAt(strText.c_str(), iFound + cchFound)))
						return true;
				}
			}
		}
#endif /* NO_REGEXP */
	} else {
		const char_t*	pwszFindWhat = m_strFindWhat.c_str();
		const char_t*	pwszTarget = strText.c_str();
		char_t*			pwszFindWhatCollated = new char_t[m_strFindWhat.length()];
		char_t*			pwszTargetCollated = new char_t[strText.length()];
		CodePoint		cp, cpCollated;
		length_t		iCollated = 0;

		//  collate
		for(length_t i = 0; i < m_strFindWhat.length(); ++i) {
			cp = DecodeUTF16SurrogatePairToCodePoint(pwszFindWhat + i, m_strFindWhat.length() - i);
			cpCollated = _Collate(cp);
			if(cpCollated != 0xFFFFFFFF) {
				if(EncodeCodePointToUTF16SurrogatePair(cpCollated, pwszFindWhatCollated + iCollated))
					iCollated += 2;
				else
					++iCollated;
			}
			if(cp >= 0x10000)
				++i;
		}
		const string_t	strFindWhatCollated(pwszFindWhatCollated, iCollated);

		// Ώە collate
		list<length_t>	listShortenedPositions;
		iCollated = 0;
		for(length_t i = toBoolean(m_sfFlags & SF_BACKWARD) ? 0 : iBegin;
				i < (toBoolean(m_sfFlags & SF_BACKWARD) ? iBegin : strText.length()); ++i) {
			cp = DecodeUTF16SurrogatePairToCodePoint(pwszTarget + i, strText.length() - i);
			cpCollated = _Collate(cp);
			if(cpCollated != 0xFFFFFFFF) {
				if(EncodeCodePointToUTF16SurrogatePair(cpCollated, pwszTargetCollated + iCollated))
					iCollated += 2;
				else
					++iCollated;
			} else
				listShortenedPositions.push_back(i);
			if(cp >= 0x10000)
				++i;
		}
		const string_t	strTargetCollated(pwszTargetCollated, iCollated);

		// 
		length_t	iEnd;	// vI[
		iFound = iBegin;
		do {
			if(toBoolean(m_sfFlags & SF_BACKWARD))	// O
				iFound = strTargetCollated.rfind(strFindWhatCollated, iFound - 1);
			else									// 
				iFound = strTargetCollated.find(strFindWhatCollated, iFound);
			if(iFound == string_t::npos)
				break;

			// vʒu𖳎镶菜ÖʒuɃ}bv
			iEnd = iFound + strFindWhatCollated.length();
			for(list<length_t>::const_iterator it =
					listShortenedPositions.begin(); it != listShortenedPositions.end(); ++it) {
				if(iFound >= *it)
					++iFound;
				if(iEnd > *it)
					++iEnd;
				else
					break;
			}
			if(!toBoolean(m_sfFlags & SF_WHOLEWORD)
					|| (boundary.HasWordBoundaryAt(strText.c_str(), iFound)
					&& boundary.HasWordBoundaryAt(strText.c_str(), iEnd))) {
				cchFound = iEnd - iFound;
				break;
			}
			iFound += (toBoolean(m_sfFlags & SF_BACKWARD) ? -1 : 1);
		} while(true);

		delete[] pwszFindWhatCollated;
		delete[] pwszTargetCollated;
		return iFound != string_t::npos;
	}

	return false;
}

///	tO̐ݒ
void CTextSearcher::SetFlags(SearchFlag flags) {
	AssertValid();
	m_sfFlags = flags;
}

///	̐ݒ
void CTextSearcher::SetText(const string_t& strText) {
	AssertValid();
	m_strFindWhat = strText;
}


// CBoundarySearcher
/////////////////////////////////////////////////////////////////////////////

///	RXgN^
CBoundarySearcher::CBoundarySearcher(CEditView& view) : m_view(view) {
}

/**
 *	@brief	2̃R[h|CgXNvgǂԂ
 *
 *	̃\bh2̃R[h|CgAt@xbgł邱ƂOƂĂA
 *	`FbN͈؍sȂ
 *
 *	X ASCII ͑S Latin XNvgƂĈ
 *
 *	@param cp1, cp2	ׂR[h|Cg (ʒuɒ)
 *	@return			XNvg̏ꍇ true
 */
bool CBoundarySearcher::AreSameScriptType(CodePoint cp1, CodePoint cp2) {
#define IN_SCRIPT_RANGE(cp, start, end, range)	\
	((cp >= start && cp <= end) || range)
#define IN_SCRIPT_RANGE_(cp, start, end)	\
	(cp >= start && cp <= end)
#define IN_SAME_SCRIPT_RANGE(start, end)	\
	(cp1 >= start && cp1 <= end && cp2 >= start && cp2 <= end)

	// UCD  Scripts.txt 쐬 (Unicode 4.0)
#if(ASCENSION_UNICODE_VERSION != 0x0400)
#error This method is based on old version of Unicode.
#endif

	// {̑艼?
	// ЉAEłΒPꋫEł͂Ȃ
	if((::GetUserDefaultLangID() & LANG_JAPANESE) == LANG_JAPANESE) {
		if(IN_SCRIPT_RANGE_(cp2, 0x3041, 0x309F) &&
			IN_SCRIPT_RANGE(cp1, 0x30A1, 0x30FF,	// Katakana
				IN_SCRIPT_RANGE(cp1, 0x31F0, 0x31FF,
					IN_SCRIPT_RANGE(cp1, 0x2E80, 0x2FD5,	// Han
						IN_SCRIPT_RANGE(cp1, 0x3005, 0x303B,
							IN_SCRIPT_RANGE(cp1, 0x3400, 0x4DB5,
								IN_SCRIPT_RANGE(cp1, 0x4E00, 0x9FA5,
									IN_SCRIPT_RANGE(cp1, 0xF900, 0xFA6A,
										IN_SCRIPT_RANGE(cp1, 0x20000, 0x2A6D6,
											IN_SCRIPT_RANGE_(cp1, 0x2F800, 0x2FA1D))))))))))
			return true;
	}

	// o: Latin XNvg̐mȍŏ͈̔͂ U+0041 - U+007A ł

	return
		(IN_SCRIPT_RANGE(cp1, 0x0021, 0x007E,		// Latin
			IN_SCRIPT_RANGE(cp1, 0x00AA, 0x02E4,
				IN_SCRIPT_RANGE(cp1, 0x1D00, 0x1D6B,
					IN_SCRIPT_RANGE(cp1, 0x1E00, 0x1EF9,
						IN_SCRIPT_RANGE(cp1, 0x2071, 0x207F,
							IN_SCRIPT_RANGE(cp1, 0x212A, 0x212B,
								IN_SCRIPT_RANGE_(cp1, 0xFB00, 0xFB06))))))) &&
		(IN_SCRIPT_RANGE(cp2, 0x0021, 0x007E,
			IN_SCRIPT_RANGE(cp2, 0x00AA, 0x02E4,
				IN_SCRIPT_RANGE(cp2, 0x1D00, 0x1D6B,
					IN_SCRIPT_RANGE(cp2, 0x1E00, 0x1EF9,
						IN_SCRIPT_RANGE(cp2, 0x2071, 0x207F,
							IN_SCRIPT_RANGE(cp2, 0x212A, 0x212B,
								IN_SCRIPT_RANGE_(cp2, 0xFB00, 0xFB06)))))))))
		|| IN_SAME_SCRIPT_RANGE(0xFF21, 0xFF5A)		// Fullwidth Latin
		|| (IN_SCRIPT_RANGE(cp1, 0x00B5, 0x00B5,	// Greek
				IN_SCRIPT_RANGE(cp1, 0x037A, 0x03FB,
					IN_SCRIPT_RANGE(cp1, 0x1D26, 0x1FFC,
						IN_SCRIPT_RANGE_(cp1, 0x2126, 0x2126)))) &&
		(IN_SCRIPT_RANGE(cp2, 0x00B5, 0x00B5,
				IN_SCRIPT_RANGE(cp2, 0x037A, 0x03FB,
					IN_SCRIPT_RANGE(cp2, 0x1D26, 0x1FFC,
						IN_SCRIPT_RANGE_(cp2, 0x2126, 0x2126))))))
		|| (IN_SCRIPT_RANGE(cp1, 0x0400, 0x050F,	// Cyrillic
				IN_SCRIPT_RANGE_(cp1, 0x1D28, 0x1D28)) &&
		(IN_SCRIPT_RANGE(cp2, 0x0400, 0x050F,
				IN_SCRIPT_RANGE_(cp2, 0x1D28, 0x1D28))))
		|| (IN_SCRIPT_RANGE(cp1, 0x0531, 0x0587,	// Armenian
				IN_SCRIPT_RANGE_(cp1, 0xFB13, 0xFB17)) &&
		(IN_SCRIPT_RANGE(cp2, 0x0531, 0x0587,
				IN_SCRIPT_RANGE_(cp2, 0xFB13, 0xFB17))))
		|| (IN_SCRIPT_RANGE(cp1, 0x05D0, 0x05F2,	// Hebrew
				IN_SCRIPT_RANGE_(cp1, 0xFB1D, 0xFB4F)) &&
		(IN_SCRIPT_RANGE(cp2, 0x05D0, 0x05F2,
				IN_SCRIPT_RANGE_(cp2, 0xFB1D, 0xFB4F))))
		|| (IN_SCRIPT_RANGE(cp1, 0x0621, 0x06FF,	// Arabic
				IN_SCRIPT_RANGE(cp1, 0xFB50, 0xFDFB,
					IN_SCRIPT_RANGE_(cp1, 0xFE70, 0xFEFC))) &&
		(IN_SCRIPT_RANGE(cp2, 0x0621, 0x06FF,
				IN_SCRIPT_RANGE(cp2, 0xFB50, 0xFDFB,
					IN_SCRIPT_RANGE_(cp2, 0xFE70, 0xFEFC)))))
		|| IN_SAME_SCRIPT_RANGE(0x0780, 0x07B1)		// Thaana
		|| IN_SAME_SCRIPT_RANGE(0x0901, 0x096F)		// Devanagari
		|| IN_SAME_SCRIPT_RANGE(0x0981, 0x09F1)		// Bengali
		|| IN_SAME_SCRIPT_RANGE(0x0A02, 0x0A74)		// Gurmukhi
		|| IN_SAME_SCRIPT_RANGE(0x0A81, 0x0AEF)		// Gujarati
		|| IN_SAME_SCRIPT_RANGE(0x0B01, 0x0B71)		// Oriya
		|| IN_SAME_SCRIPT_RANGE(0x0B82, 0x0BF2)		// Tamil
		|| IN_SAME_SCRIPT_RANGE(0x0C01, 0x0C6F)		// Telugu
		|| IN_SAME_SCRIPT_RANGE(0x0C82, 0x0CEF)		// Kannada
		|| IN_SAME_SCRIPT_RANGE(0x0D02, 0x0D6F)		// Malayalam
		|| IN_SAME_SCRIPT_RANGE(0x0D82, 0x0DF3)		// Sinhala
		|| IN_SAME_SCRIPT_RANGE(0x0E01, 0x0E59)		// Thai
		|| IN_SAME_SCRIPT_RANGE(0x0E81, 0x0EDD)		// Lao
		|| IN_SAME_SCRIPT_RANGE(0x0F00, 0x0FC6)		// Tibetan
		|| IN_SAME_SCRIPT_RANGE(0x1000, 0x1059)		// Myanmar
		|| IN_SAME_SCRIPT_RANGE(0x10A0, 0x10F8)		// Georgian
		|| (IN_SCRIPT_RANGE(cp1, 0x1100, 0x11F9,	// Hangul
				IN_SCRIPT_RANGE(cp1, 0x3131, 0x318E,
					IN_SCRIPT_RANGE_(cp1, 0xAC00, 0xD7A3))) &&
		(IN_SCRIPT_RANGE(cp2, 0x1100, 0x11F9,
				IN_SCRIPT_RANGE(cp2, 0x3131, 0x318E,
					IN_SCRIPT_RANGE_(cp2, 0xAC00, 0xD7A3)))))
		|| IN_SAME_SCRIPT_RANGE(0xFFA0, 0xFFDC)		// Halfwidth Hangul
		|| (IN_SCRIPT_RANGE(cp1, 0x1200, 0x135A,	// Ethiopic
				IN_SCRIPT_RANGE_(cp1, 0x1369, 0x137C)) &&
		(IN_SCRIPT_RANGE(cp2, 0x1200, 0x135A,
				IN_SCRIPT_RANGE_(cp2, 0x1369, 0x137C))))
		|| IN_SAME_SCRIPT_RANGE(0x13A0, 0x13F4)		// Cherokee
		|| IN_SAME_SCRIPT_RANGE(0x1401, 0x1676)		// Canadian Aboriginal
		|| IN_SAME_SCRIPT_RANGE(0x1681, 0x169A)		// Ogham
		|| IN_SAME_SCRIPT_RANGE(0x16A0, 0x16F0)		// Runic
		|| IN_SAME_SCRIPT_RANGE(0x1780, 0x17E9)		// Khmer
		|| IN_SAME_SCRIPT_RANGE(0x1810, 0x18A9)		// Mongolian
		|| IN_SAME_SCRIPT_RANGE(0x3041, 0x309F)		// Hiragana
		|| (IN_SCRIPT_RANGE(cp1, 0x30A1, 0x30FF,	// Katakana
				IN_SCRIPT_RANGE_(cp1, 0x31F0, 0x31FF)) &&
		(IN_SCRIPT_RANGE(cp2, 0x30A1, 0x30FF,
				IN_SCRIPT_RANGE_(cp2, 0x31F0, 0x31FF))))
		|| IN_SAME_SCRIPT_RANGE(0xFF66, 0xFF9D)		// Halfwidth Katakana
		|| (IN_SCRIPT_RANGE(cp1, 0x3105, 0x312C,	// Bopomofo
				IN_SCRIPT_RANGE_(cp1, 0x31A0, 0x31B7)) &&
		(IN_SCRIPT_RANGE(cp2, 0x3105, 0x312C,
				IN_SCRIPT_RANGE_(cp2, 0x31A0, 0x31B7))))
		|| (IN_SCRIPT_RANGE(cp1, 0x2E80, 0x2FD5,	// Han
				IN_SCRIPT_RANGE(cp1, 0x3005, 0x303B,
					IN_SCRIPT_RANGE(cp1, 0x3400, 0x4DB5,
						IN_SCRIPT_RANGE(cp1, 0x4E00, 0x9FA5,
							IN_SCRIPT_RANGE(cp1, 0xF900, 0xFA6A,
								IN_SCRIPT_RANGE(cp1, 0x20000, 0x2A6D6,
									IN_SCRIPT_RANGE_(cp1, 0x2F800, 0x2FA1D))))))) &&
		(IN_SCRIPT_RANGE(cp2, 0x2E80, 0x2FD5,
				IN_SCRIPT_RANGE(cp2, 0x3005, 0x303B,
					IN_SCRIPT_RANGE(cp2, 0x3400, 0x4DB5,
						IN_SCRIPT_RANGE(cp2, 0x4E00, 0x9FA5,
							IN_SCRIPT_RANGE(cp2, 0xF900, 0xFA6A,
								IN_SCRIPT_RANGE(cp2, 0x20000, 0x2A6D6,
									IN_SCRIPT_RANGE_(cp2, 0x2F800, 0x2FA1D)))))))))
		|| IN_SAME_SCRIPT_RANGE(0xA000, 0xA4C6)		// Yi
		|| IN_SAME_SCRIPT_RANGE(0x10300, 0x1031F)	// Old Italic
		|| IN_SAME_SCRIPT_RANGE(0x10330, 0x1034A)	// Gothic
		|| IN_SAME_SCRIPT_RANGE(0x10400, 0x1044F)	// Deseret
		|| IN_SAME_SCRIPT_RANGE(0x1700, 0x1714)		// Tagalog
		|| IN_SAME_SCRIPT_RANGE(0x1720, 0x1734)		// Hanunoo
		|| IN_SAME_SCRIPT_RANGE(0x1740, 0x1573)		// Buhid
		|| IN_SAME_SCRIPT_RANGE(0x1760, 0x1773)		// Tagbanwa
		|| IN_SAME_SCRIPT_RANGE(0x1900, 0x194F)		// Limbu
		|| IN_SAME_SCRIPT_RANGE(0x1950, 0x1974)		// Tai Le
		|| IN_SAME_SCRIPT_RANGE(0x10000, 0x100FA)	// Linear B
		|| IN_SAME_SCRIPT_RANGE(0x10380, 0x1039D)	// Ugaritic
		|| IN_SAME_SCRIPT_RANGE(0x10450, 0x1047F)	// Shavian
		|| IN_SAME_SCRIPT_RANGE(0x10480, 0x104A9)	// Osmanya
		|| IN_SAME_SCRIPT_RANGE(0x10800, 0x1083F)	// Cypriot
		|| IN_SAME_SCRIPT_RANGE(0x2800, 0x28FF);	// Braille

#undef IN_SCRIPT_RANGE
#undef IN_SCRIPT_RANGE_
#undef IN_SAME_SCRIPT_RANGE
}

///	r[Ԃ
const CEditView& CBoundarySearcher::GetView() const {
	AssertValid();
	return m_view;
}

/**
 *	PꋫEɎgNXԂ
 *	@param cp	ׂR[h|Cg
 *	@return		PꋫENX
 */
CBoundarySearcher::WBClass CBoundarySearcher::GetWordBoundaryClass(CodePoint cp) const {
	using CBoundarySearcher::WBClass;

	const LANGID	langid = ::GetUserDefaultLangID();
	const CLexer*	pLexer = m_view.GetLexer();

	// General Category  `Cf'  `Zs' ̃R[h|Cg (unicat.pl 萶)
#if(ASCENSION_UNICODE_VERSION != 0x0400)
#error List version differs from Ascension Unicode version. Update correspoding code.
#endif
	static const CodePoint	arrCf[] = {
		0x00AD, 0x0600, 0x0601, 0x0602, 0x0603, 0x06DD, 0x070F, 0x17B4,
		0x17B5, 0x200C, 0x200D, 0x200E, 0x200F, 0x202A, 0x202B, 0x202C,
		0x202D, 0x202E, 0x2060, 0x2061, 0x2062, 0x2063, 0x206A, 0x206B,
		0x206C, 0x206D, 0x206E, 0x206F, 0xFEFF, 0xFFF9, 0xFFFA, 0xFFFB,
		0x1D173, 0x1D174, 0x1D175, 0x1D176, 0x1D177, 0x1D178, 0x1D179, 0x1D17A,
		0xE0001, 0xE0020, 0xE0021, 0xE0022, 0xE0023, 0xE0024, 0xE0025, 0xE0026,
		0xE0027, 0xE0028, 0xE0029, 0xE002A, 0xE002B, 0xE002C, 0xE002D, 0xE002E,
		0xE002F, 0xE0030, 0xE0031, 0xE0032, 0xE0033, 0xE0034, 0xE0035, 0xE0036,
		0xE0037, 0xE0038, 0xE0039, 0xE003A, 0xE003B, 0xE003C, 0xE003D, 0xE003E,
		0xE003F, 0xE0040, 0xE0041, 0xE0042, 0xE0043, 0xE0044, 0xE0045, 0xE0046,
		0xE0047, 0xE0048, 0xE0049, 0xE004A, 0xE004B, 0xE004C, 0xE004D, 0xE004E,
		0xE004F, 0xE0050, 0xE0051, 0xE0052, 0xE0053, 0xE0054, 0xE0055, 0xE0056,
		0xE0057, 0xE0058, 0xE0059, 0xE005A, 0xE005B, 0xE005C, 0xE005D, 0xE005E,
		0xE005F, 0xE0060, 0xE0061, 0xE0062, 0xE0063, 0xE0064, 0xE0065, 0xE0066,
		0xE0067, 0xE0068, 0xE0069, 0xE006A, 0xE006B, 0xE006C, 0xE006D, 0xE006E,
		0xE006F, 0xE0070, 0xE0071, 0xE0072, 0xE0073, 0xE0074, 0xE0075, 0xE0076,
		0xE0077, 0xE0078, 0xE0079, 0xE007A, 0xE007B, 0xE007C, 0xE007D, 0xE007E,
		0xE007F,
	};
	static const CodePoint	arrZs[] = {
		0x0009,	// ^u Zs ł͂ȂAł͋󔒗ޕƂĈ
		0x0020, 0x00A0, 0x1680, 0x180E, 0x2000, 0x2001, 0x2002, 0x2003,
		0x2004, 0x2005, 0x2006, 0x2007, 0x2008, 0x2009, 0x200A, 0x200B,
		0x202F, 0x205F, 0x3000,
	};

	if(cp == 0x05F3		// Hebrew Punctuation Geresh
			|| pLexer->IsIdentifierStartCodePoint(cp))
		return aLetter;
	else if(binary_search(arrZs, arrZs + sizeof(arrZs) / sizeof(CodePoint), cp))
		return space;
	else if(cp == 0x002E		// Full Stop
			|| (cp == 0x003A	// Colon (for Swedish)
			&& toBoolean(langid & LANG_SWEDISH)))
		return midNumLet;
	else if(cp == 0x0027		// Apostrophe
			|| cp == 0x00B7		// Middle Dot
			|| cp == 0x05F4		// Hebrew Punctuation Gershayim
			|| cp == 0x2019		// Right Single Quotation Mark
			|| cp == 0x2027		// Hyphenation Point
			|| (cp == 0x0022	// Quotation Mark (for legacy Hebrew)
			&& toBoolean(langid & LANG_HEBREW)))
		return midLetter;
	else if(cp == 0x2024		// One Dot Leader
			|| cp == 0x2025		// Two Dot Leader
			|| cp == 0x2026)	// Horizontal Ellipsis
		return midNum;
	else if(binary_search(arrCf, arrCf + sizeof(arrCf) / sizeof(CodePoint), cp))
		return format;
	else if(CLexer::IsDigitCodePoint(cp))	// Mathematical digits
		return numeric;
	else
		return other;
}

/**
 *	񂪎wʒuɒPꋫEǂׂ
 *	@param str	ׂ镶
 *	@param i	ׂʒu
 *	@return		<var>i</var> PꋫȄꍇ true
 */
bool CBoundarySearcher::HasWordBoundaryAt(const string_t& str, length_t i) const {
	AssertValid();

	if(i == 0 || i == str.length())
		return true;	// (1, 2)
	else if(i > str.length())
		return false;

	const char_t*	pwsz = str.c_str();
	const length_t	cch = str.length();
	CodePoint		cpNext = DecodeUTF16SurrogatePairToCodePoint(pwsz + i, cch - i);

	if(!IsFirstCharacterOfCluster(cpNext))
		return false;	// (3)

	WBClass	wbcNext = GetWordBoundaryClass(cpNext);

	if(wbcNext == format)
		return false;	// (4)
	if(wbcNext == aLetter || wbcNext == midLetter
			|| wbcNext == midNumLet || wbcNext == midNum || wbcNext == numeric) {
		length_t	iPrev = i;	// i != 0
		CodePoint	cpPrev;
		WBClass		wbcPrev;

		while(true) {	// (3)
			if(iPrev == 0) {
				cpPrev = 0x0041;	// 芸...
				break;
			} else if(iPrev > 2
					&& IsUTF16HighSurrogate(pwsz[iPrev - 2])
					&& IsUTF16LowSurrogate(pwsz[iPrev - 1]))
				cpPrev = DecodeUTF16SurrogatePairToCodePoint(pwsz + iPrev - 2, 2);
			else
				cpPrev = pwsz[iPrev - 1];
			iPrev -= (cpPrev > 0xFFFF) ? 2 : 1;
			if(IsFirstCharacterOfCluster(cpPrev))
				break;
		}
		wbcPrev = GetWordBoundaryClass(cpPrev);

		if(wbcNext == aLetter && wbcPrev == aLetter)	// (5+, !13)
			return !AreSameScriptType(cpPrev, cpNext);
		else if((wbcNext == aLetter || wbcNext == numeric)
				&& (wbcPrev == aLetter || wbcPrev == numeric))	// (8, 9, 10)
			return false;
		else if((wbcPrev == aLetter && (wbcNext == midLetter || wbcNext == midNumLet))
				|| (wbcPrev == numeric && (wbcNext == midNum || wbcNext == midNumLet))) {	// (6, 12)?
			length_t	iNextNext = i + ((cpNext > 0xFFFF) ? 2 : 1);
			CodePoint	cpNextNext;

			while(true) {
				if(iNextNext == cch)
					return true;
				cpNextNext = DecodeUTF16SurrogatePairToCodePoint(pwsz + iNextNext, cch - iNextNext);
				if(IsFirstCharacterOfCluster(cpNextNext))
					break;
			}
			return wbcPrev == GetWordBoundaryClass(cpNextNext);
		} else if(((wbcPrev == midLetter || wbcPrev == midNumLet) && wbcNext == aLetter)
				|| ((wbcPrev == midNum || wbcPrev == midNumLet) && wbcNext == numeric)) {	// (7, 11)?
			length_t	iPrevPrev = iPrev;
			CodePoint	cpPrevPrev;

			if(iPrevPrev == 0)
				return true;

			while(true) {
				if(iPrevPrev == 0) {
					cpPrevPrev = 0x0041;
					break;
				} else if(iPrevPrev == 1)
					cpPrevPrev = pwsz[0];
				else if(IsUTF16HighSurrogate(pwsz[iPrevPrev - 2])
						&& IsUTF16LowSurrogate(pwsz[iPrevPrev - 1]))
					cpPrevPrev = DecodeUTF16SurrogatePairToCodePoint(pwsz + iPrevPrev - 2, 2);
				else
					cpPrevPrev = pwsz[iPrevPrev - 1];
				if(IsFirstCharacterOfCluster(cpPrevPrev))
					break;
				iPrevPrev -= (cpPrevPrev > 0xFFFF) ? 2 : 1;
			}
			return wbcNext == GetWordBoundaryClass(cpPrevPrev);
		}
	}
	return true;
}

/**
 *	NX^̍ŏ̕ɂȂ邩𒲂ׂ (A
 *	ʃTQ[g͒ÕR[h|CgɈˑ邽߂̃\bhłׂ͒Ȃ)
 *	@param cp	ׂR[h|Cg
 *	@return		<var>cp</var> NX̐擪̕ɂȂꍇ true
 */
bool CBoundarySearcher::IsFirstCharacterOfCluster(CodePoint cp) {
	// ʕނ Mn (Mark, Non-Spacing)AMc (Mark, Spacing Combining)A
	// Me (Mark, Enclosing) ̂ꂩłR[h|Cg̃Xg
	// unicat.pl 莩 (Unicode 4.0)
	//
	// NOTE: ̕ނɂĂ Unicode R\[VA
	// uUAX#29: Text BoundariesvɃKChC邪A
	// Ascension  (6, 7, 8, 9) ɂĂ͂ɏ]Ȃ
	// ((9) ɂĂ͏L̈ʕނg)B
	//
	// NOTE: XɈȉ͂̃̕\bhł͔łȂA
	// Lbg󂯎ȂقǂȂB
	//   U+17B4: Khmer Vowel Inherent Aq
	//   U+17B5: Khmer Vowel Inherent Aa
	// 2 Unicode 4.0 ł񐄏ƂȂĂ
	static const CodePoint	arrMxCodePoints[] = {
#if(ASCENSION_UNICODE_VERSION != 0x0400)
#error Included file version differs from Ascension Unicode version. Update correspoding file.
#endif
#include "script\EditView_GraphemeExtender_4_0"
	};
	return !std::binary_search(arrMxCodePoints,
			arrMxCodePoints + sizeof(arrMxCodePoints) / sizeof(CodePoint), cp);
}

/**
 *	NX^̐擪̕
 *	@param pos		Jnʒu
 *	@param bForward	O (hLg̏I[) ̏ꍇ true
 *	@return			ʒu
 */
CCharPos CBoundarySearcher::SearchFirstCharacterOfCluster(const CCharPos& pos, bool bForward) const {
	AssertValid();

	if(pos.m_iChar == 0 && !bForward) {
		if(pos.m_iLine == 0)
			return pos;
		return CCharPos(pos.m_iLine - 1,
				m_view.GetDocument()->GetLineLength(pos.m_iLine - 1));
	}

	const length_t	cLines = m_view.GetDocument()->GetLineCount();
	const string_t&	strLine = m_view.GetDocument()->GetLine(pos.m_iLine);
	const length_t	cchLine = strLine.length();

	assert(pos.m_iLine < cLines && pos.m_iChar <= cchLine);

	if(pos.m_iChar == cchLine
			&& pos.m_iLine < cLines - 1)
		return CCharPos(pos.m_iLine + 1, 0);

	const char_t*	pwszLine = strLine.c_str();
	CodePoint		cp;

	if(bForward) {
		for(length_t i = pos.m_iChar + 1; i < cchLine; ++i) {
			cp = DecodeUTF16SurrogatePairToCodePoint(pwszLine + i, cchLine - 1);
			if(CBoundarySearcher::IsFirstCharacterOfCluster(cp))
				return CCharPos(pos.m_iLine, i);
			if(cp > 0xFFFF)
				++i;
		}
		return CCharPos(pos.m_iLine, cchLine);
	} else {
		for(length_t i = pos.m_iChar - 1; i > 0; --i) {
			if(i > 2
					&& IsUTF16HighSurrogate(pwszLine[i - 2])
					&& IsUTF16LowSurrogate(pwszLine[i - 1]))
				cp = DecodeUTF16SurrogatePairToCodePoint(pwszLine + i - 2, 2);
			else
				cp = pwszLine[i - 1];
			if(CBoundarySearcher::IsFirstCharacterOfCluster(cp))
				return CCharPos(pos.m_iLine, i);
			if(cp > 0xFFFF)
				--i;
		}
		return CCharPos(pos.m_iLine, 0);
	}
}

/**
 *	@brief	E
 *
 *	@param pos		Jnʒu
 *	@param bForward	O (hLg̏I[) ̏ꍇ true
 *	@param bp		Êǂ̕邩
 *	@return			ʒu
 */
CCharPos CBoundarySearcher::SearchSentenceBoundary(const CCharPos& pos, bool bForward, BoundaryPosition bp) const {
	AssertValid();

	return pos;	// 
}

/**
 *	@brief	PꋫE
 *
 *	ʒu͌JnʒuƂ͈قȂʒuɂȂ
 *	(hLg̐擪ȀꍇȊO)
 *
 *	PꋫE̊TO͒PPʂ̈ړR}h (CmdMoveNextWord)
 *	_uNbNɂJ[\ʒu̒PIȂǂɎgp
 *
 *	Ascension ɂPꋫE̒`͂܂m肵ĂȂB
 *	݂̎ IsFirstCharacterOfCluster Ɠ
 *	uUAX #29: Text BoundariesvqgɂĂ邪A
 *	̃KChCSɖĂ킯ł͂Ȃ
 *	({ɓɂȂĂ)
 *
 *	@param pos		Jnʒu
 *	@param bForward	O (hLg̏I[) ̏ꍇ true
 *	@param bp		PꋫÊǂ̕邩
 *	@return			ʒu
 */
CCharPos CBoundarySearcher::SearchWordBoundary(const CCharPos& pos, bool bForward, BoundaryPosition bp) const {
	AssertValid();

	const CLexer*	pLexer = m_view.GetLexer();
	const CEditDoc*	pDocument = m_view.GetDocument();
	CCharPos		posFound = pos;

	if(pos.m_iChar == 0 && !bForward) {	// sO̍sɖ߂낤ƂĂ
		if(pos.m_iLine == 0 || toBoolean(bp & BP_NOANOTHERLINE))
			return pos;
		posFound.m_iLine = pos.m_iLine - 1;
		posFound.m_iChar = pDocument->GetLineLength(posFound.m_iLine);
		if(!toBoolean(bp & BP_START))
			return posFound;
	} else if(bForward	// s玟̍sɐiƂĂ
			&& pos.m_iChar == pDocument->GetLineLength(pos.m_iLine)) {
		if(pos.m_iLine == pDocument->GetLineCount() - 1 || toBoolean(bp & BP_NOANOTHERLINE))
			return pos;
		posFound.m_iLine = pos.m_iLine + 1;
		const length_t	cchLine = pDocument->GetLineLength(posFound.m_iLine);
		posFound.m_iChar = pLexer->IsWhiteSpace(pDocument->GetLine(posFound.m_iLine).c_str(), cchLine, true);
		if(toBoolean(bp & BP_START)) {
			if(toBoolean(bp & BP_ALPHANUM) && posFound.m_iChar < cchLine) {
				const CodePoint	cp = DecodeUTF16SurrogatePairToCodePoint(
					pDocument->GetLine(posFound.m_iLine).c_str() + posFound.m_iChar, cchLine - posFound.m_iChar);
				if(pLexer->IsIdentifierContinueCodePoint(cp))
					return posFound;
			} else
				return posFound;
		}
	}

	// .::.
	// ȉ̃Rg̊ʕt̐ UAX#29 Ɍ鋫EK̂́B
	// !(n) ͋K̗pȂƂA(n)+ ͓YKɓƎ߂{Ƃ\B
	// (0)  Ascension ̒ǉKŁAʂP̐擪AI[AP\ɐꍇɓKpB

	const string_t&	strLine = pDocument->GetLine(posFound.m_iLine);
	const char_t*	pwszLine = strLine.c_str();
	length_t		cchLine = strLine.length();

	if(cchLine == 0 /*|| (pos.m_iChar >= cchLine - 1 && bForward)*/)	// eot (2)
		return posFound;

	WBClass		wbcLeft = uncalculated;			// ׂʒu̍
	WBClass		wbcRight = uncalculated;		// ׂʒủE
	CodePoint	cp, cpNext = -1, cpPrev = -1;

	if(bForward) {
		WBClass	wbcRightAnother = uncalculated;	// ׂʒu2E
		while(posFound.m_iChar < cchLine) {
			if(cpNext != -1) {
				cp = cpNext;
				cpNext = -1;
			} else
				cp = DecodeUTF16SurrogatePairToCodePoint(
						pwszLine + posFound.m_iChar, cchLine - posFound.m_iChar);
			if(posFound.m_iChar == pos.m_iChar
					|| (posFound.m_iChar == 0 && cchLine != 0)) {
				wbcLeft = GetWordBoundaryClass(cp);
				++posFound.m_iChar;
				cpPrev = cp;
				continue;
			}
			if(wbcRight == uncalculated)
				wbcRight = GetWordBoundaryClass(cp);

			// Lbg󂯎ȂR[h|Cg͖ (3)
			// Cf R[h|Cg͖ (4)
			if(wbcRight == format || CBoundarySearcher::IsFirstCharacterOfCluster(cp)) {
				if(wbcLeft == aLetter && wbcRight == aLetter) {	// (5+, !13)
					if(!AreSameScriptType(cpPrev, cp))
						return posFound;
					posFound.m_iChar += (cp > 0xFFFF) ? 2 : 1;
				} else if((wbcLeft == aLetter || wbcLeft == numeric)
						&& (wbcRight == aLetter || wbcRight == numeric))	// (8, 9, 10)
					posFound.m_iChar += (cp > 0xFFFF) ? 2 : 1;
				else if((wbcLeft == aLetter	// (6, 7)?
						&& (wbcRight == midLetter /*|| wbcRight == midNumLet*/))
						|| (wbcLeft == numeric)	// (11, 12)?
						&& (wbcRight == midNum || wbcRight == midNumLet)) {
					const length_t	iNextChar = posFound.m_iChar + ((cp > 0xFFFF) ? 2 : 1);

					if(iNextChar >= cchLine)
						return CCharPos(posFound.m_iLine, cchLine);	// (2)
					cpNext = DecodeUTF16SurrogatePairToCodePoint(pwszLine + iNextChar, cchLine - iNextChar);
					wbcRightAnother = GetWordBoundaryClass(cpNext);
					if(wbcLeft != wbcRightAnother) {	// (14)
						if(!toBoolean(bp & BP_ALPHANUM)
								|| pLexer->IsIdentifierContinueCodePoint(cp)
								|| pLexer->IsIdentifierContinueCodePoint(cpNext))
							return posFound;
					}
					posFound.m_iChar = iNextChar;	// (6, 7, 11, 12)
					wbcRight = wbcRightAnother;
					wbcRightAnother = uncalculated;
				} else if((!toBoolean(bp & BP_END) && wbcRight == space)	// (0)
						|| (!toBoolean(bp & BP_START) && wbcLeft == space))	// (0)
					posFound.m_iChar += (cp > 0xFFFF) ? 2 : 1;
				else if(toBoolean(bp & BP_ALPHANUM)	// (0)
						&& (!toBoolean(bp & BP_START) || !pLexer->IsIdentifierContinueCodePoint(cp))
						&& (!toBoolean(bp & BP_END) || !pLexer->IsIdentifierContinueCodePoint(cpPrev)))
					posFound.m_iChar += (cp > 0xFFFF) ? 2 : 1;
				else
					return posFound;	// (14)
			} else {
				posFound.m_iChar += (cp > 0xFFFF) ? 2 : 1;
				wbcRight = wbcLeft;
				cp = cpPrev;
			}

			wbcLeft = wbcRight;
			wbcRight = wbcRightAnother;
			wbcRightAnother = uncalculated;
			cpPrev = cp;
		}
		return posFound;	// (2)
	} else {
		WBClass	wbcLeftAnother = uncalculated;	// ׂʒu2
		while(posFound.m_iChar > 0) {
			if(cpNext != -1) {
				cp = cpNext;
				cpNext = -1;
			} else if(posFound.m_iChar > 1
					&& IsUTF16LowSurrogate(pwszLine[posFound.m_iChar - 1])
					&& IsUTF16HighSurrogate(pwszLine[posFound.m_iChar - 2]))
				cp = DecodeUTF16SurrogatePairToCodePoint(
					pwszLine + posFound.m_iChar - 2, cchLine - posFound.m_iChar + 2);
			else
				cp = DecodeUTF16SurrogatePairToCodePoint(
					pwszLine + posFound.m_iChar - 1, cchLine - posFound.m_iChar + 1);
			if(posFound.m_iChar == pos.m_iChar
					|| (posFound.m_iChar == cchLine && cchLine != 0)) {
				wbcRight = GetWordBoundaryClass(cp);
				posFound.m_iChar -= (cp > 0xFFFF) ? 2 : 1;
				cpPrev = cp;
				continue;
			}
			if(wbcLeft == uncalculated)
				wbcLeft = GetWordBoundaryClass(cp);

			// Lbg󂯎ȂR[h|Cg͖ (3)
			// Cf R[h|Cg͖ (4)
			if(wbcLeft == format || CBoundarySearcher::IsFirstCharacterOfCluster(cp)) {
				if(wbcLeft == aLetter && wbcRight == aLetter) {	// (5+, !13)
					if(!AreSameScriptType(cp, cpPrev))
						return posFound;
					posFound.m_iChar -= (cp > 0xFFFF) ? 2 : 1;
				} else if((wbcLeft == aLetter || wbcLeft == numeric)	// (8, 9, 10)
						&& (wbcRight == aLetter || wbcRight == numeric))
					posFound.m_iChar -= (cp > 0xFFFF) ? 2 : 1;
				else if((wbcRight == aLetter	// (6, 7)?
						&& (wbcLeft == midLetter /*|| wbcLeft == midNumLet*/))
						|| (wbcRight == numeric)	// (11, 12)?
						&& (wbcLeft == midNum || wbcLeft == midNumLet)) {
					length_t	iNextChar = posFound.m_iChar - ((cp > 0xFFFF) ? 2 : 1);

					if(iNextChar == 0)
						return CCharPos(posFound.m_iLine, 0);	// (2)
					iNextChar -= iNextChar > 1
						&& IsUTF16HighSurrogate(pwszLine[iNextChar - 2])
						&& IsUTF16LowSurrogate(pwszLine[iNextChar - 1]) ? 2 : 1;
					cpNext = DecodeUTF16SurrogatePairToCodePoint(pwszLine + iNextChar, cchLine - iNextChar);
					wbcLeftAnother = GetWordBoundaryClass(cpNext);
					if(wbcRight != wbcLeftAnother) {
						if(!toBoolean(bp & BP_ALPHANUM)
								|| pLexer->IsIdentifierContinueCodePoint(cp)
								|| pLexer->IsIdentifierContinueCodePoint(cpNext))
							return posFound;	// (14)
					}
					posFound.m_iChar = iNextChar;	// (6, 7, 11, 12)
			//		wbcLeft = wbcLeftAnother;
			//		wbcLeftAnother = uncalculated;
				} else if((!toBoolean(bp & BP_END) && wbcRight == space)	// (0)
						|| (!toBoolean(bp & BP_START) && wbcLeft == space))	// (0)
					posFound.m_iChar -= (cp > 0xFFFF) ? 2 : 1;
				else if(toBoolean(bp & BP_ALPHANUM)	// (0)
						&& (!toBoolean(bp & BP_START) || !pLexer->IsIdentifierContinueCodePoint(cpPrev))
						&& (!toBoolean(bp & BP_END) || !pLexer->IsIdentifierContinueCodePoint(cp)))
					posFound.m_iChar -= (cp > 0xFFFF) ? 2 : 1;
				else
					return posFound;
			} else {
				posFound.m_iChar -= (cp > 0xFFFF) ? 2 : 1;
				wbcLeft = wbcRight;
				cp = cpPrev;
			}

			wbcRight = wbcLeft;
			wbcLeft = wbcLeftAnother;
			wbcLeftAnother = uncalculated;
			cpPrev = cp;
		}
		return posFound;	// (2)
	}
}


// _CAscensionRegExpTraits class implementation
/////////////////////////////////////////////////////////////////////////////

const CLexer*	_CAscensionRegExpTraits::m_pLexer = 0;

inline bool _CAscensionRegExpTraits::is_class(char_t c, boost::uint32_t f) {
	// ̕NX͉肵ĒׂƁAWin32 `ɈϏ
	if(toBoolean(f & char_class_alpha) && !m_pLexer->IsIdentifierStartCodePoint(c))
		return false;
	f &= ~char_class_alpha;
	if(toBoolean(f & char_class_cntrl) && _CComparison<SF_IGNORE_CONTROLS>::Collate(c) != 0xFFFFFFFF)
		return false;
	f &= char_class_cntrl;
	if(toBoolean(f & char_class_cntrl) && _CComparison<SF_IGNORE_PUNCTUATIONS>::Collate(c) != 0xFFFFFFFF)
		return false;
	f &= char_class_punct;
	if(toBoolean(f & char_class_space) && !m_pLexer->IsWhiteSpace(&c, 1, false))
		return false;
	f &= ~char_class_space;
	if(toBoolean(f & char_class_blank) && !m_pLexer->IsWhiteSpace(&c, 1, true))
		return false;
	f &= ~char_class_blank;
	if(toBoolean(f & char_class_alnum) && iswdigit(c) && !m_pLexer->IsIdentifierStartCodePoint(c))
		return false;
	f &= ~char_class_alnum;
	if(toBoolean(f & char_class_word) && !m_pLexer->IsIdentifierStartCodePoint(c))
		return false;
	f &= ~char_class_word;
	return (f != char_class_none) ? boost::regex_traits<char_t>::is_class(c, f) : true;
}

inline bool _CAscensionRegExpTraits::is_combining(char_t c) {
	return !CBoundarySearcher::IsFirstCharacterOfCluster(c);
}

inline bool _CAscensionRegExpTraits::is_separator(char_t c) {
	return c == L'\r' || c == L'\n' || c == 0x0085 || c == 0x2028 || c == 0x2029;
}

inline char_t _CAscensionRegExpTraits::translate(char_t c, bool /* icase */) {
	return static_cast<char_t>(m_pSearcher->_Collate(c));	// BMP ̂...
}

/* [EOF] */