#ifndef __MERCURY_REGEX_LEXER__
#define __MERCURY_REGEX_LEXER__

#include <utility>
#include "common.h"
#include "traits.h"

namespace mercury
{
	namespace _regex
	{
		using namespace std::rel_ops;
		typedef enum tagTOKEN_TYPE
		{
			TOKEN_CHARACTER,
			TOKEN_CLASS,
			TOKEN_NUMBER,
			TOKEN_DISJUNCTION,
			TOKEN_REPEAT0,
			TOKEN_REPEAT1,
			TOKEN_REPEAT01,
			TOKEN_REPEAT_BEGIN,
			TOKEN_REPEAT_SEPARATOR,
			TOKEN_REPEAT_END,
			TOKEN_REPEAT_LAZY,
			TOKEN_ANY,
			TOKEN_SET_BEGIN,
			TOKEN_SET_RANGE,
			TOKEN_SET_NOT,
			TOKEN_SET_END,
			TOKEN_GROUP_BEGIN,
			TOKEN_GROUP_END,
			TOKEN_EOS,
		} TOKEN_TYPE;
		template<typename _Char>
		struct token
		{
			TOKEN_TYPE type;

			_Char     ch;
			int       num;
			CHARCLASS cls;
			bool      invert;
		};
		template<typename _Char, typename _InputStreamIterator, typename _Traits = regex_traits<_Char> >
		class lexer
		{
		public:
			typedef token<_Char> token_t;

		public:
			lexer(const _InputStreamIterator &pattern)
				: m_iterator(pattern), m_context(CTX_NORMAL), m_type_before(TOKEN_CHARACTER),
				  m_head(false), m_tail(false)
			{
				_init();
			}


			bool is_head(void) const { return m_head; }
			bool is_tail(void) const { return m_tail; }
			token_t get_token(void)
			{
				if(m_iterator.eos())
				{
					const token_t tk = {TOKEN_EOS};
					return tk;
				}

				const _InputStreamIterator prev = m_iterator++;
				const _Char                ch   = *prev;
				token_t tk = {TOKEN_CHARACTER};
				tk.ch = ch;
				if(ch == _Traits::meta_escape())
				{
					if(!m_iterator.eos())
					{
						tk = _token_escape(*m_iterator++);
					}
				}
				else
				{
					switch(m_context)
					{
					case CTX_NORMAL:
						tk = _token_normal(ch);
						break;

					case CTX_SET:
						tk = _token_set(ch, prev);
						break;

					case CTX_REPEAT:
						tk = _token_repeat(ch, prev);
						break;
					}
				}
				m_type_before = tk.type;
				return tk;
			}

		private:
			_InputStreamIterator m_iterator;
			typedef enum tagCONTEXT
			{
				CTX_NORMAL,
				CTX_SET,
				CTX_REPEAT,
			} CONTEXT;
			CONTEXT    m_context;
			TOKEN_TYPE m_type_before;

			bool m_head;
			bool m_tail;
			void _init(void)
			{
				if(!m_iterator.eos() && *m_iterator == _Traits::meta_head())
				{
					m_head = true;
					m_iterator++;
				}
			}

		private:
			token_t _token_escape(const _Char ch)
			{
				token_t tk;
				{
					const struct
					{
						_Char escape;
						_Char cntrl;
					}
					char_array[] =
					{
						{_Traits::escape_bel(), _Traits::cntrl_bel()},
						{_Traits::escape_ff (), _Traits::cntrl_ff ()},
						{_Traits::escape_lf (), _Traits::cntrl_lf ()},
						{_Traits::escape_cr (), _Traits::cntrl_cr ()},
						{_Traits::escape_ht (), _Traits::cntrl_ht ()},
						{_Traits::escape_vt (), _Traits::cntrl_vt ()},
						{_Traits::escape_esc(), _Traits::cntrl_esc()},
					};

					for(size_t i = 0; i < mercury_countof(char_array); i++)
					{
						if(ch == char_array[i].escape)
						{
							tk.type = TOKEN_CHARACTER;
							tk.ch   = char_array[i].cntrl;
							goto exit;
						}
					}
				}
				{
					const struct
					{
						_Char     escape;
						CHARCLASS cls;
						bool      invert;
					}
					class_array[] =
					{
						{_Traits::escape_digit       (), CHARCLASS_DIGIT, false},
						{_Traits::escape_digit_invert(), CHARCLASS_DIGIT,  true},
						{_Traits::escape_upper       (), CHARCLASS_UPPER, false},
						{_Traits::escape_upper_invert(), CHARCLASS_UPPER,  true},
						{_Traits::escape_lower       (), CHARCLASS_LOWER, false},
						{_Traits::escape_lower_invert(), CHARCLASS_LOWER,  true},
						{_Traits::escape_space       (), CHARCLASS_SPACE, false},
						{_Traits::escape_space_invert(), CHARCLASS_SPACE,  true},
						{_Traits::escape_csym        (), CHARCLASS_CSYM , false},
						{_Traits::escape_csym_invert (), CHARCLASS_CSYM ,  true},
					};

					for(size_t i = 0; i < mercury_countof(class_array); i++)
					{
						if(ch == class_array[i].escape)
						{
							tk.type   = TOKEN_CLASS;
							tk.cls    = class_array[i].cls;
							tk.invert = class_array[i].invert;
							goto exit;
						}
					}
				}
				{
					if(ch == _Traits::escape_any() && m_context == CTX_NORMAL)
					{
						tk.type = TOKEN_ANY;
						goto exit;
					}
				}
				tk.type = TOKEN_CHARACTER;
				tk.ch   = ch;

exit:
				return tk;
			}
			token_t _token_normal(const _Char ch)
			{
				token_t tk;
				tk.type = TOKEN_CHARACTER;
				tk.ch   = ch;
				if(ch == _Traits::meta_tail())
				{
					if(m_iterator.eos())
					{
						m_tail  = true;
						tk.type = TOKEN_EOS;
					}
				}
				if(ch == _Traits::meta_disjunction())
				{
					tk.type = TOKEN_DISJUNCTION;
				}
				if(ch == _Traits::meta_repeat0())
				{
					tk.type = TOKEN_REPEAT0;
				}
				if(ch == _Traits::meta_repeat1())
				{
					tk.type = TOKEN_REPEAT1;
				}
				if(ch == _Traits::meta_repeat01())
				{
					tk.type = TOKEN_REPEAT01;
				}
				if(ch == _Traits::meta_repeat_begin())
				{
					m_context = CTX_REPEAT;
					tk.type = TOKEN_REPEAT_BEGIN;
				}
				if(ch == _Traits::meta_any())
				{
					tk.type = TOKEN_ANY;
				}
				if(ch == _Traits::meta_set_begin())
				{
					m_context = CTX_SET;
					tk.type = TOKEN_SET_BEGIN;
				}
				if(ch == _Traits::meta_group_begin())
				{
					tk.type = TOKEN_GROUP_BEGIN;
				}
				if(ch == _Traits::meta_group_end())
				{
					tk.type = TOKEN_GROUP_END;
				}
				if(ch == _Traits::meta_repeat_lazy())
				{
					switch(m_type_before)
					{
					case TOKEN_REPEAT0:
					case TOKEN_REPEAT1:
					case TOKEN_REPEAT01:
					case TOKEN_REPEAT_END:
						tk.type = TOKEN_REPEAT_LAZY;
						break;

					default:
						break;
					}
				}

				return tk;
			}
			token_t _token_set(const _Char ch, const _InputStreamIterator &prev)
			{
				token_t tk;
				tk.type = TOKEN_CHARACTER;
				tk.ch   = ch;
				const CHARCLASS cls = _Traits::get_charclass(prev, m_iterator);
				if(cls != CHARCLASS_NONE)
				{
					tk.type   = TOKEN_CLASS;
					tk.cls    = cls;
					tk.invert = false;
				}
				if(ch == _Traits::meta_set_not())
				{
					if(m_type_before == TOKEN_SET_BEGIN)
					{
						tk.type = TOKEN_SET_NOT;
					}
				}
				if(ch == _Traits::meta_set_range())
				{
					if(m_type_before == TOKEN_CHARACTER && !m_iterator.eos() && *m_iterator != _Traits::meta_set_end())
					{
						tk.type = TOKEN_SET_RANGE;
					}
				}
				if(ch == _Traits::meta_set_end())
				{
					m_context = CTX_NORMAL;
					tk.type = TOKEN_SET_END;
				}

				return tk;
			}
			token_t _token_repeat(const _Char ch, const _InputStreamIterator &prev)
			{
				token_t tk;
				tk.type = TOKEN_CHARACTER;
				tk.ch   = ch;
				tk.num = _Traits::number(prev, m_iterator);
				if(tk.num != -1)
				{
					tk.type = TOKEN_NUMBER;
				}
				if(ch == _Traits::meta_repeat_separator())
				{
					tk.type = TOKEN_REPEAT_SEPARATOR;
				}
				if(ch == _Traits::meta_repeat_end())
				{
					m_context = CTX_NORMAL;
					tk.type = TOKEN_REPEAT_END;
				}

				return tk;
			}
		};
	}
}

#endif
