#ifndef __MERCURY_REGEX_PARSER__
#define __MERCURY_REGEX_PARSER__

#include <memory>
#include "common.h"
#include "lexer.h"
#include "parse_tree.h"


namespace mercury
{
	namespace _regex
	{

		template<typename _Input, typename _InputStreamIterator, typename _Traits = regex_traits<_Input> >
		class parser
		{
		public:
			typedef parse_tree            <_Input> *parse_tree_ptr;
			typedef parse_node_element_set<_Input> *parse_node_element_set_ptr;
			typedef typename std::auto_ptr<parse_tree            <_Input> > auto_parse_tree_ptr;
			typedef typename std::auto_ptr<parse_node_element_set<_Input> > auto_parse_node_element_set_ptr;

		public:
			parser(const _InputStreamIterator &pattern) : m_lexer(pattern)
			{
				_init();
			}
			parse_tree_ptr parse(void)
			{
				auto_parse_tree_ptr tree = rule_expression();
				return tree.release();
			}
			bool is_head(void) const { return m_lexer.is_head(); }
			bool is_tail(void) const { return m_lexer.is_tail(); }

		private:
			lexer<_Input, _InputStreamIterator, _Traits> m_lexer;
			token<_Input>                                m_lookahead;

		private:
			void _init(void)
			{
				move();
			}
			void move(void)
			{
				m_lookahead = m_lexer.get_token();
			}
			void move(const TOKEN_TYPE type)
			{
				if(m_lookahead.type != type)
				{
					throw RE_SYNTAX;
				}
				move();
			}

		private:
			auto_parse_tree_ptr rule_expression(void)
			{
				auto_parse_tree_ptr ret = rule_subexpression();
				move(TOKEN_EOS);
				return ret;
			}
			auto_parse_tree_ptr rule_subexpression(void)
			{
				auto_parse_tree_ptr ret = rule_sequence();
				if(m_lookahead.type == TOKEN_DISJUNCTION)
				{
					move(TOKEN_DISJUNCTION);
					auto_parse_tree_ptr node1 = ret;
					auto_parse_tree_ptr node2 = rule_subexpression();

					ret.reset(new parse_node_disjunction<_Input, _Traits>(node1.get(), node2.get(), NULL));
					node1.release();
					node2.release();
				}
				return ret;
			}
			auto_parse_tree_ptr rule_sequence(void)
			{
				if(_is_subsequence(m_lookahead.type))
				{
					return rule_subsequence();
				}
				else
				{
					auto_parse_tree_ptr ret(new parse_node_none<_Input, _Traits>());
					return ret;
				}
			}
			auto_parse_tree_ptr rule_subsequence(void)
			{
				auto_parse_tree_ptr ret = rule_repeat();

				if(_is_subsequence(m_lookahead.type))
				{
					auto_parse_tree_ptr node1 = ret;
					auto_parse_tree_ptr node2 = rule_subsequence();

					ret.reset(new parse_node_conjunction<_Input, _Traits>(node1.get(), node2.get(), NULL));
					node1.release();
					node2.release();
				}
				else
				{
				}
				return ret;
			}
			auto_parse_tree_ptr rule_repeat(void)
			{
				auto_parse_tree_ptr ret;
				auto_parse_tree_ptr node = rule_factor();

				bool lazy = false;
				switch(m_lookahead.type)
				{
				case TOKEN_REPEAT0:
					move(TOKEN_REPEAT0);
					if(m_lookahead.type == TOKEN_REPEAT_LAZY)
					{
						move(TOKEN_REPEAT_LAZY);
						lazy = true;
					}
					{
						auto_parse_tree_ptr node2(new parse_node_repeat0<_Input, _Traits>(node.get(), lazy));
						ret = node2;
					}
					break;

				case TOKEN_REPEAT1:
					move(TOKEN_REPEAT1);
					if(m_lookahead.type == TOKEN_REPEAT_LAZY)
					{
						move(TOKEN_REPEAT_LAZY);
						lazy = true;
					}
					{
						auto_parse_tree_ptr node2(new parse_node_repeat1<_Input, _Traits>(node.get(), lazy));
						ret = node2;
					}
					break;

				case TOKEN_REPEAT01:
					move(TOKEN_REPEAT01);
					if(m_lookahead.type == TOKEN_REPEAT_LAZY)
					{
						move(TOKEN_REPEAT_LAZY);
						lazy = true;
					}
					{
						auto_parse_tree_ptr node2(new parse_node_repeat01<_Input, _Traits>(node.get(), lazy));
						ret = node2;
					}
					break;

				case TOKEN_REPEAT_BEGIN:
					{
						auto_parse_tree_ptr node2 = _subrule_repeat_number(node);
						ret = node2;
					}
					break;

				default:
					ret = node;
					break;
				}
				node.release();
				return ret;
			}
			auto_parse_tree_ptr rule_factor(void)
			{
				auto_parse_tree_ptr ret;
				switch(m_lookahead.type)
				{
				case TOKEN_GROUP_BEGIN:
					move(TOKEN_GROUP_BEGIN);
					{
						auto_parse_tree_ptr node = rule_subexpression();
						ret = node;
					}
					move(TOKEN_GROUP_END);
					break;

				case TOKEN_SET_BEGIN:
					move(TOKEN_SET_BEGIN);
					{
						auto_parse_tree_ptr node = rule_set();
						ret = node;
					}
					move(TOKEN_SET_END);
					break;

				case TOKEN_ANY:
					{
						auto_parse_tree_ptr node(new parse_node_any<_Input, _Traits>());
						ret = node;
					}
					move(TOKEN_ANY);
					break;

				case TOKEN_CLASS:
					{
						auto_parse_tree_ptr node(new parse_node_class<_Input,_Traits>(m_lookahead.cls, m_lookahead.invert));
						ret = node;
					}
					move(TOKEN_CLASS);
					break;

				case TOKEN_CHARACTER:
					{
						auto_parse_tree_ptr node(new parse_node_character<_Input, _Traits>(m_lookahead.ch));
						ret = node;
					}
					move(TOKEN_CHARACTER);
					break;

				default:
					break;
				}
				return ret;
			}
			auto_parse_tree_ptr rule_set(void)
			{
				auto_parse_node_element_set_ptr ret;

				if(m_lookahead.type == TOKEN_SET_NOT)
				{
					move(TOKEN_SET_NOT);
					auto_parse_node_element_set_ptr node = rule_subset();

					ret.reset(new parse_node_not<_Input, _Traits>(node.get()));
					node.release();
				}
				else
				{
					auto_parse_node_element_set_ptr node = rule_subset();
					ret = node;
				}
				return auto_parse_tree_ptr(ret.release());
			}
			auto_parse_node_element_set_ptr rule_subset(void)
			{
				auto_parse_node_element_set_ptr ret;
				switch(m_lookahead.type)
				{
				case TOKEN_CLASS:
					{
						auto_parse_node_element_set_ptr node = _subrule_subset_class();
						ret = node;
					}
					break;

				case TOKEN_CHARACTER:
					{
						auto_parse_node_element_set_ptr node = _subrule_subset_character();
						ret = node;
					}
					break;

				default:
					break;
				}
				return ret;
			}

		private:
			auto_parse_tree_ptr _subrule_repeat_number(auto_parse_tree_ptr &node)
			{
				auto_parse_tree_ptr ret;

				move(TOKEN_REPEAT_BEGIN);

				const int number1 = m_lookahead.num;
				move(TOKEN_NUMBER);

				bool lazy = false;

				if(m_lookahead.type != TOKEN_REPEAT_SEPARATOR)
				{
					move(TOKEN_REPEAT_END);
					if(m_lookahead.type == TOKEN_REPEAT_LAZY)
					{
						move(TOKEN_REPEAT_LAZY);
					}
					ret.reset(new parse_node_repeatn<_Input, _Traits>(node.get(), number1));
					goto repeat_exit;
				}

				move(TOKEN_REPEAT_SEPARATOR);

				if(m_lookahead.type != TOKEN_NUMBER)
				{
					move(TOKEN_REPEAT_END);
					if(m_lookahead.type == TOKEN_REPEAT_LAZY)
					{
						move(TOKEN_REPEAT_LAZY);
						lazy = true;
					}
					ret.reset(new parse_node_repeatn_<_Input, _Traits>(node.get(), number1, lazy));
					goto repeat_exit;
				}
				else
				{
					const int number2 = m_lookahead.num;
					move(TOKEN_NUMBER);
					move(TOKEN_REPEAT_END);
					if(m_lookahead.type == TOKEN_REPEAT_LAZY)
					{
						move(TOKEN_REPEAT_LAZY);
						lazy = true;
					}

					if(number1 > number2) { throw RE_REPEAT_MINMAX; }
					ret.reset(new parse_node_repeatmn<_Input, _Traits>(node.get(), number1, number2, lazy));

					goto repeat_exit;
				}
repeat_exit:
				return ret;
			}
			auto_parse_node_element_set_ptr _subrule_subset_class(void)
			{
				auto_parse_node_element_set_ptr ret(new parse_node_class<_Input, _Traits>(m_lookahead.cls, m_lookahead.invert));
				move(TOKEN_CLASS);

				switch(m_lookahead.type)
				{
				case TOKEN_CHARACTER:
				case TOKEN_CLASS:
					{
						auto_parse_node_element_set_ptr node1 = ret;
						auto_parse_node_element_set_ptr node2 = rule_subset();
						auto_parse_node_element_set_ptr node3(new parse_node_set<_Input, _Traits>(node1.get(), node2.get(), NULL));

						ret = node3;
						node1.release();
						node2.release();
					}
					break;

				default:
					break;
				}
				return ret;
			}
			auto_parse_node_element_set_ptr _subrule_subset_character(void)
			{
				auto_parse_node_element_set_ptr ret;

				const _Input char1 = m_lookahead.ch;
				move(TOKEN_CHARACTER);
				switch(m_lookahead.type)
				{
				case TOKEN_CHARACTER:
				case TOKEN_CLASS:
					{
						auto_parse_node_element_set_ptr node1(new parse_node_character<_Input, _Traits>(char1));
						auto_parse_node_element_set_ptr node2 = rule_subset();
						auto_parse_node_element_set_ptr node3(new parse_node_set<_Input, _Traits>(node1.get(), node2.get(), NULL));

						ret = node3;
						node1.release();
						node2.release();
					}
					break;

				case TOKEN_SET_RANGE:
					{
						move(TOKEN_SET_RANGE);
						const _Input char2 = m_lookahead.ch;
						move(TOKEN_CHARACTER);

						if(char1 > char2) { throw RE_SET_RANGE; }

						auto_parse_node_element_set_ptr node(new parse_node_range<_Input, _Traits>(char1, char2));
						ret = node;

						switch(m_lookahead.type)
						{
						case TOKEN_CHARACTER:
						case TOKEN_CLASS:
							{
								auto_parse_node_element_set_ptr node1 = ret;
								auto_parse_node_element_set_ptr node2 = rule_subset();
								auto_parse_node_element_set_ptr node3(new parse_node_set<_Input, _Traits>(node1.get(), node2.get(), NULL));

								ret = node3;
								node1.release();
								node2.release();
							}
							break;

						default:
							break;
						}
					}
					break;

				default:
					{
						auto_parse_node_element_set_ptr node(new parse_node_character<_Input, _Traits>(char1));
						ret = node;
					}
					break;
				}
				return ret;
			}

		private:
			bool _is_subsequence(const TOKEN_TYPE type)
			{
				return (type == TOKEN_GROUP_BEGIN || type == TOKEN_SET_BEGIN || type == TOKEN_ANY || type == TOKEN_CLASS || type == TOKEN_CHARACTER);
			}
		};
	}
}

#endif
