// TextSearcher.h
// (c) 2004 exeal

#ifndef _TEXT_SEARCHER_H_
#define _TEXT_SEARCHER_H_
#include "AscensionCommon.h"
#include "..\..\Manah\Object.h"
#include <stdexcept>

#ifndef NO_REGEXP
#include <boost/regex.hpp>
#endif /* NO_REGEXP */


namespace Ascension {

///	tO
typedef unsigned long	SearchFlag;
///	O
const SearchFlag	SF_FORWARD				= 0x000000;
///	
const SearchFlag	SF_BACKWARD				= 0x000001;
///	啶ʂ
const SearchFlag	SF_MATCH_CASE			= 0x000000;
///	啶ʂȂ (ASCII)
const SearchFlag	SF_IGNORE_CASE_ASCII	= 0x000002;
///	啶ʂȂ (P)
const SearchFlag	SF_IGNORE_CASE_SIMPLE	= 0x000004;
///	啶ʂȂ (S)
const SearchFlag	SF_IGNORE_CASE_FULL		= 0x000006;
///	ЉʂȂ
const SearchFlag	SF_IGNORE_KANATYPE		= 0x000008;
///	SppʂȂ
const SearchFlag	SF_IGNORE_WIDTH			= 0x000010;
///	̎ނʂȂ (0-9 ̂)
const SearchFlag	SF_IGNORE_DIGITTYPE		= 0x000020;
///	AtnUʂȂ
const SearchFlag	SF_IGNORE_ALEFHAMZA		= 0x000040;
///	؂蕶𖳎
const SearchFlag	SF_IGNORE_PUNCTUATIONS	= 0x000080;
///	L𖳎
const SearchFlag	SF_IGNORE_SYMBOLS		= 0x000100;
///	󔒗ޕ𖳎
const SearchFlag	SF_IGNORE_WHITESPACES	= 0x000200;
///	𖳎
const SearchFlag	SF_IGNORE_DIACRITICS	= 0x000400;
///	ꉹ (ꉹ܂) 𖳎
const SearchFlag	SF_IGNORE_VOWELS		= 0x000800;
///	JV_𖳎
const SearchFlag	SF_IGNORE_KASHIDA		= 0x001000;
///	䕶Aw蕶𖳎
const SearchFlag	SF_IGNORE_CONTROLS		= 0x002000;
///	𕪉
const SearchFlag	SF_DECOMPOSE			= 0x004000;
///	PPʂŌ
const SearchFlag	SF_WHOLEWORD			= 0x008000;
///	K\g
const SearchFlag	SF_REGEXP				= 0x010000;
///	啶ʂ邩̃}XN
const SearchFlag	SF_MATCHCASE_MASK		= 0x000006;

///	{BtO (ꎋ镶A\LȂǁBMS Word ̃pN)B
///	̃tO͐K\ɂ͎gpłȂ
typedef unsigned short	JapaneseFuzzySearchFlag;
///	/Љ
const JapaneseFuzzySearchFlag	JFSF_KANATYPE					= 0x0001;
///	X/
const JapaneseFuzzySearchFlag	JFSF_YOUON_SOKUON				= 0x0002;
///	}CiX//_bV
const JapaneseFuzzySearchFlag	JFSF_MINUS_PROLONGEDMARK_DASH	= 0x0004;
///	JԂL
const JapaneseFuzzySearchFlag	JFSF_ITERATIONMARK				= 0x0008;
///	\L̂
const JapaneseFuzzySearchFlag	JFSF_UNUNIFIEDKANJI				= 0x0010;
///	̐V/
const JapaneseFuzzySearchFlag	JFSF_LEGACY_MODERN_KANAFIGURE	= 0x0020;
///	ƕꉹ
const JapaneseFuzzySearchFlag	JFSF_PROLONGEDMARK_VOWEL		= 0x0040;
///	a/WAd/Y
const JapaneseFuzzySearchFlag	JFSF_DI_JI_DU_ZU				= 0x0080;
///	o/@An/t@
const JapaneseFuzzySearchFlag	JFSF_BA_VA_HA_FA				= 0x0100;
///	cB/eB/`AfB/W
const JapaneseFuzzySearchFlag	JFSF_TSI_THI_TI_DHI_JI			= 0x0200;
///	q/tAr/
const JapaneseFuzzySearchFlag	JFSF_HYU_FYU_BYU_VYU			= 0x0400;
///	Z/VFA[/WF
const JapaneseFuzzySearchFlag	JFSF_SE_SYE_ZE_JE				= 0x0800;
///	CiAGiɑA/
const JapaneseFuzzySearchFlag	JFSF_A_YA_FOLLOWING_I_E			= 0x1000;
///	Ts̑ÕL/N
const JapaneseFuzzySearchFlag	JFSF_KI_KU_FOLLOWEDBY_S			= 0x2000;

/*
 *	.::.
 *
 *	tO͈ꕔ̎͊ȒPAłȂ̂B
 *	炩̈̕Ⴂ𖳎悤ȏ̏ꍇA
 *	r2̕𓯂Ƀ}bvKv邪A
 *	̏̉ł͕̕ω\B
 *	Ⴆ΁u啶ʂȂ (S)vł̓P[XtHfBOs
 *	̒ςÄvʒuȂȂB
 *	邽߂ɁAΏەϊƂɂǂŕω
 *	() LĂ΂낤B
 *
 *	Ȃ當̃}bv͂܂ȒPł͂ȂAUnicode 4.0
 *	ɏ]Ƃ΂Ȃ̎KvɂȂB
 *	@啶ʂȂ (P): CaseFolding.txt g
 *	@啶ʂȂ (S): CaseFolding.txt g
 *	@ЉʂȂ: LCMapStringW łǂ炩
 *	@SppʂȂ: LCMapStringW őSp
 *	@̎ނʂȂ: ArA0-9 (LCMapStringW ͕sS)
 *	@L𖳎: UnicodeData.txt g
 *	@ꏊƂȂ𖳎: ???
 *	@JV_𖳎: ???
 *	@𕪉: ???
 *
 *	!_ł͕̕ς}bv͎ĂȂ!
 */


///	K\GWpӂłȂƂ\O
class EFailedToLoadRegExpEngine : public std::runtime_error {
public:
	///	RXgN^
	EFailedToLoadRegExpEngine()
			: std::runtime_error("Failed to load regular expression engine.") {
	}
};

///	K\p^[ȂƂ\O
class ERegExpPatternIsInvalid : public std::logic_error {
public:
	///	RXgN^
	ERegExpPatternIsInvalid(const std::string& str) : std::logic_error(str) {
	}
};

class CLexer;
class CBoundarySearcher;

///	eLXgs
class CTextSearcher : public Manah::CObject {
	// RXgN^
public:
	CTextSearcher();
private:
	CTextSearcher(const CTextSearcher& rhs);
	operator =(const CTextSearcher& rhs);

	// \bh
public:
	SearchFlag	GetFlags() const;
	bool		IsRegExpAvailable() const;
	bool		Search(const string_t& strText, length_t iBegin,
					length_t& iFound, length_t& cchFound, const CBoundarySearcher& boundary)
					throw(EFailedToLoadRegExpEngine, ERegExpPatternIsInvalid);
	void		SetFlags(SearchFlag flags);
	void		SetText(const string_t& strText);
private:
	Manah::Text::CodePoint	_Collate(Manah::Text::CodePoint ch) const;

	// f[^o
protected:
	string_t				m_strFindWhat;
	SearchFlag				m_sfFlags;
	JapaneseFuzzySearchFlag	m_jsfsFlags;

	friend class _CAscensionRegExpTraits;
};


///	CBoundarySearcher::SearchWordBoundary 
///	CBoundarySearcher::SearchSentenceBoundary ŎgtO
enum BoundaryPosition {
	///	擪
	BP_START			= 0x01,
	///	I[
	BP_END				= 0x02,
	///	[
	BP_AROUND			= BP_START | BP_END,
	///	P\̂
	BP_ALPHANUM			= 0x04,
	///	ݍŝ݂Ō
	BP_NOANOTHERLINE	= 0x08,
};


///	NX^EAPꋫEAE
class CBoundarySearcher : public Manah::CObject {
private:
	enum WBClass {
		format, aLetter, midLetter, midNumLet, midNum,
		numeric, space, other, uncalculated
	};

	// RXgN^
public:
	explicit CBoundarySearcher(CEditView& view);

	// \bh
public:
	static bool			AreSameScriptType(Manah::Text::CodePoint cp1, Manah::Text::CodePoint cp2);
	const CEditView&	GetView() const;
	static bool			IsFirstCharacterOfCluster(Manah::Text::CodePoint cp);
	bool				HasWordBoundaryAt(const string_t& str, length_t i) const;
	bool				HasSentenceBoundaryAt(const string_t& str, length_t i) const;
	CCharPos			SearchFirstCharacterOfCluster(const CCharPos& pos, bool bForward) const;
	CCharPos			SearchWordBoundary(const CCharPos& pos, bool bForward, BoundaryPosition bp) const;
	CCharPos			SearchSentenceBoundary(const CCharPos& pos, bool bForward, BoundaryPosition bp) const;
private:
	WBClass	GetWordBoundaryClass(Manah::Text::CodePoint cp) const;

	// f[^o
private:
	CEditView&	m_view;
};

#ifndef NO_REGEXP
///	K\Ŏg̓
class _CAscensionRegExpTraits : public boost::regex_traits<char_t> {
	// \bh
public:
	static char_t	translate(char_t c, bool icase);
	static bool		is_separator(char_t c);
	static bool		is_combining(char_t c);
	static bool		is_class(char_t c, boost::uint32_t f);

	// f[^o
public:
	static const CLexer*	m_pLexer;
	static CTextSearcher*	m_pSearcher;
};
#endif /* NO_REGEXP */

} // namespace Ascension

#endif /* _TEXT_SEARCHER_H_ */

/* [EOF] */