// lexer.h cK\̎͊
#ifndef __MERCURY_REGEX_LEXER__
#define __MERCURY_REGEX_LEXER__

#include <utility>                      // operator!=, operator<=, operator>, operator>=
#include "traits.h"

namespace mercury
{
	namespace _regex
	{
		using namespace std::rel_ops;

		// g[N̎
		typedef enum tagTOKEN_KIND
		{
			TOKEN_CHARACTER,                        // ʏ̕
			TOKEN_NUMBER,                           // l
			TOKEN_DISJUNCTION,                      // I
			TOKEN_REPEAT0,                          // 0ȏ̌JԂ
			TOKEN_REPEAT1,                          // 1ȏ̌JԂ
			TOKEN_REPEAT01,                         // 0܂1̌JԂ
			TOKEN_REPEAT_BEGIN,                     // ʉJn
			TOKEN_REPEAT_SEPARATOR,                 // ʉ؂
			TOKEN_REPEAT_END,                       // ʉI
			TOKEN_ANY,                              // Cӂ1
			TOKEN_SET_BEGIN,                        // WJn
			TOKEN_SET_RANGE,                        // ͈
			TOKEN_SET_NOT,                          // W
			TOKEN_SET_END,                          // WI
			TOKEN_GROUP_BEGIN,                      // O[vJn
			TOKEN_GROUP_END,                        // O[vI
			TOKEN_CLASS_UPPER,                      // At@xbg啶
			TOKEN_CLASS_LOWER,                      // At@xbg
			TOKEN_CLASS_ALPHA,                      // At@xbg
			TOKEN_CLASS_ALNUM,                      // p
			TOKEN_CLASS_DIGIT,                      // 
			TOKEN_CLASS_XDIGIT,                     // 16i
			TOKEN_CLASS_PUNCT,                      // Ǔ_
			TOKEN_CLASS_BLANK,                      // Xy[XE^u
			TOKEN_CLASS_SPACE,                      // 󔒕
			TOKEN_CLASS_CNTRL,                      // 䕶
			TOKEN_CLASS_GRAPH,                      // 󎚉\
			TOKEN_CLASS_PRINT,                      // 󎚉\{Xy[X
			TOKEN_EOS,                              // I[
		} TOKEN_KIND;


		// g[N\
		template<typename _Char>
		struct token
		{
			TOKEN_KIND kind;

			_Char      ch;
			int        num;
		};


		// ͊
		template<typename _Char, typename _ConstIterator, typename _Traits = regex_traits<_Char> >
		class lexer
		{
		public:
			// RXgN^iwj
			lexer(_ConstIterator pattern_begin, _ConstIterator pattern_end)
				: m_pattern_begin(pattern_begin), m_pattern_end(pattern_end), m_context(CTX_NORMAL), m_kind_before(TOKEN_CHARACTER)
			{
				_init(pattern_begin, pattern_end);
			}


			// ̃g[N擾
			token<_Char> get_token(void)
			{
				typedef token<_Char> token_t;

				// ̏I[
				if(m_now == m_pattern_end)
				{
					const token_t tk = {TOKEN_EOS, _Char()};
					return tk;
				}

				const CONTEXT context =  m_context;
				const _Char   ch      = *m_now++;
				token_t tk = {TOKEN_CHARACTER, ch};

				// GXP[vi̕Iɕʂ̕ƂĈj
				if(ch == _Traits::meta_escape())
				{
					if(m_now != m_pattern_end)
					{
						tk.ch = *m_now++;
					}
				}

				_ConstIterator p = m_now; p--;
				switch(context)
				{
				case CTX_NORMAL:                           // ʏ
					// I
					if(ch == _Traits::meta_disjunction())
					{
						tk.kind = TOKEN_DISJUNCTION;
					}
					// 0ȏ̌JԂ
					if(ch == _Traits::meta_repeat0())
					{
						tk.kind = TOKEN_REPEAT0;
					}
					// 1ȏ̌JԂ
					if(ch == _Traits::meta_repeat1())
					{
						tk.kind = TOKEN_REPEAT1;
					}
					// 0܂1̌JԂ
					if(ch == _Traits::meta_repeat01())
					{
						tk.kind = TOKEN_REPEAT01;
					}
					// ʉJn
					if(ch == _Traits::meta_repeat_begin())
					{
						m_context = CTX_REPEAT;
						tk.kind = TOKEN_REPEAT_BEGIN;
					}
					// Cӂ1
					if(ch == _Traits::meta_any())
					{
						tk.kind = TOKEN_ANY;
					}
					// WJn
					if(ch == _Traits::meta_set_begin())
					{
						m_context = CTX_SET;
						tk.kind = TOKEN_SET_BEGIN;
					}
					// O[vJn
					if(ch == _Traits::meta_group_begin())
					{
						tk.kind = TOKEN_GROUP_BEGIN;
					}
					// O[vI
					if(ch == _Traits::meta_group_end())
					{
						tk.kind = TOKEN_GROUP_END;
					}
					break;

				case CTX_SET:                           // W
					// NX
					switch(_Traits::get_charclass(p, m_pattern_end))
					{
					case CHARCLASS_UPPER : tk.kind = TOKEN_CLASS_UPPER ; m_now = p; break;
					case CHARCLASS_LOWER : tk.kind = TOKEN_CLASS_LOWER ; m_now = p; break;
					case CHARCLASS_ALPHA : tk.kind = TOKEN_CLASS_ALPHA ; m_now = p; break;
					case CHARCLASS_DIGIT : tk.kind = TOKEN_CLASS_DIGIT ; m_now = p; break;
					case CHARCLASS_ALNUM : tk.kind = TOKEN_CLASS_ALNUM ; m_now = p; break;
					case CHARCLASS_XDIGIT: tk.kind = TOKEN_CLASS_XDIGIT; m_now = p; break;
					case CHARCLASS_PUNCT : tk.kind = TOKEN_CLASS_PUNCT ; m_now = p; break;
					case CHARCLASS_BLANK : tk.kind = TOKEN_CLASS_BLANK ; m_now = p; break;
					case CHARCLASS_SPACE : tk.kind = TOKEN_CLASS_SPACE ; m_now = p; break;
					case CHARCLASS_CNTRL : tk.kind = TOKEN_CLASS_CNTRL ; m_now = p; break;
					case CHARCLASS_GRAPH : tk.kind = TOKEN_CLASS_GRAPH ; m_now = p; break;
					case CHARCLASS_PRINT : tk.kind = TOKEN_CLASS_PRINT ; m_now = p; break;
					default: break;
					}
					// W
					if(ch == _Traits::meta_set_not())
					{
						if(m_kind_before == TOKEN_SET_BEGIN)
						{
							tk.kind = TOKEN_SET_NOT;
						}
					}
					// ͈
					if(ch == _Traits::meta_set_range())
					{
						// Õg[NŁAWIłȂΕ͈͂̃g[N
						if(m_kind_before == TOKEN_CHARACTER && m_now != m_pattern_end && *m_now != _Traits::meta_set_end())
						{
							tk.kind = TOKEN_SET_RANGE;
						}
					}
					// WI
					if(ch == _Traits::meta_set_end())
					{
						m_context = CTX_NORMAL;
						tk.kind = TOKEN_SET_END;
					}
					break;

				case CTX_REPEAT:                        // ʉ
					// l	
					tk.num = _Traits::number(p, m_pattern_end);
					if(tk.num != -1)
					{
						tk.kind = TOKEN_NUMBER;
						m_now = p;
					}
					// ʉ؂
					if(ch == _Traits::meta_repeat_separator())
					{
						tk.kind = TOKEN_REPEAT_SEPARATOR;
					}
					// ʉI
					if(ch == _Traits::meta_repeat_end())
					{
						m_context = CTX_NORMAL;
						tk.kind = TOKEN_REPEAT_END;
					}
					break;
				}

				m_kind_before = tk.kind;
				return tk;
			}

		private:
			_ConstIterator m_pattern_begin;
			_ConstIterator m_pattern_end;
			_ConstIterator m_now;

			// i݈ʒuj
			typedef enum tagCONTEXT
			{
				CTX_NORMAL,                             // ʏ̕
				CTX_SET,                                // W
				CTX_REPEAT,                             // ʉ
			} CONTEXT;
			CONTEXT    m_context;
			TOKEN_KIND m_kind_before;

			// 
			void _init(_ConstIterator begin, _ConstIterator /* end */)
			{
				m_now = begin;
			}
		};
	}
}

#endif // __MERCURY_REGEX_LEXER__
