// parser.h cK\̍\͊
#ifndef __MERCURY_REGEX_PARSER__
#define __MERCURY_REGEX_PARSER__

#include "lexer.h"
#include "parse_node.h"


namespace mercury
{
	// G[
	typedef enum tagREGEX_ERROR
	{
		REGERR_SYNTAX,                          // \G[
		REGERR_SET_RANGE,                       // NX͈̔̓G[
		REGERR_REPEAT_MINMAX,                   // ʉ̉񐔃G[
	} REGEX_ERROR;

	namespace _regex
	{
		////////////////////////////////////////////////////////////////////////////////
		// \͊
		// ȉŒ`镶R@ɂA\͂
		//  expression     subexpression EOS
		//  subexpression  sequence '|' subexpression | sequence
		//  sequence       subsequence | ''
		//  subsequence    repeat subsequence | repeat
		//  repeat         factor '*' | factor '+' | factor '?' | factor '{' NUMBER '}' | factor '{' NUMBER ',' '}' | factor '{' NUMBER ',' NUMBER '}' | factor
		//  factor         '(' subexpression ')' | '[' set ']' | '.' | CHARACTER
		//  set            '^' subset | subset
		//  subset         CLASS | CLASS subset | CHARACTER subset | CHARACTER | CHARACTER '-' CHARACTER subset | CHARACTER '-' CHARACTER

		template<typename _Input, typename _ConstIterator, typename _Traits = regex_traits<_Input> >
		class parser
		{
		public:
			typedef parse_tree<_Input> *parse_tree_ptr;

		public:
			// RXgN^iwj
			parser(_ConstIterator pattern_begin, _ConstIterator pattern_end) : m_lexer(pattern_begin, pattern_end)
			{
				_init(pattern_begin, pattern_end);
			}

			// ͂A\؂쐬
			parse_tree_ptr parse(void)
			{
				// \؂쐬
				return rule_expression();
			}

		private:
			lexer<_Input, _ConstIterator, _Traits> m_lexer;
			token<_Input>                          m_lookahead;

		private:
			// R@[

			// expression  subexpression EOS
			parse_tree_ptr rule_expression(void)
			{
				parse_tree_ptr node = NULL;
				try
				{
					// expression  subexpression EOS
					node = rule_subexpression();
					move(TOKEN_EOS);
					return node;
				}
				catch(...)
				{
					if(node != NULL) { node->free(); }
					throw;
				}
			}

			// subexpression  sequence '|' subexpression | sequence
			parse_tree_ptr rule_subexpression(void)
			{
				parse_tree_ptr node  = NULL;
				parse_tree_ptr node2 = NULL;
				try
				{
					node = rule_sequence();
					if(m_lookahead.kind == TOKEN_DISJUNCTION)
					{
						// subexpression  sequence '|' subexpression
						move();
						node2 = rule_subexpression();
						node  = new parse_node_disjunction<_Input, _Traits>(node, node2, NULL);
						node2 = NULL;
					}
					return node;
				}
				catch(...)
				{
					if(node  != NULL) { node ->free(); }
					if(node2 != NULL) { node2->free(); }
					throw;
				}
			}

			// sequence  subsequence | ''
			parse_tree_ptr rule_sequence(void)
			{
				if(_is_subsequence(m_lookahead.kind))
				{
					// sequence  subsequence
					return rule_subsequence();
				}
				else
				{
					// sequence  ''
					return new parse_node_none<_Input, _Traits>();
				}
			}

			// subsequence  repeat subsequence | repeat
			parse_tree_ptr rule_subsequence(void)
			{
				parse_tree_ptr node1 = NULL;
				parse_tree_ptr node2 = NULL;
				try
				{
					node1 = rule_repeat();

					if(_is_subsequence(m_lookahead.kind))
					{
						// subsequence  repeat subsequence
						node2 = rule_subsequence();
						return new parse_node_conjunction<_Input, _Traits>(node1, node2, NULL);
					}
					else
					{
						// subsequence  repeat
						return node1;
					}
				}
				catch(...)
				{
					if(node1 != NULL) { node1->free(); }
					if(node2 != NULL) { node2->free(); }
					throw;
				}
			}

			// repeat  factor '*' | factor '+' | factor '?' | factor '{' NUMBER '}' | factor '{' NUMBER ',' '}' | factor '{' NUMBER ',' NUMBER '}' | factor
			parse_tree_ptr rule_repeat(void)
			{
				parse_tree_ptr node = NULL;
				try
				{
					node = rule_factor();
					switch(m_lookahead.kind)
					{
					case TOKEN_REPEAT0:
						// repeat  factor '*'
						move();
						node = new parse_node_repeat0<_Input, _Traits>(node);
						break;

					case TOKEN_REPEAT1:
						// repeat  factor '+'
						move();
						node = new parse_node_repeat1<_Input, _Traits>(node);
						break;

					case TOKEN_REPEAT01:
						// repeat  factor '?'
						move();
						node = new parse_node_repeat01<_Input, _Traits>(node);
						break;

					case TOKEN_REPEAT_BEGIN:
						// repeat  factor '{' NUMBER '}' | factor '{' NUMBER ',' '}' | factor '{' NUMBER ',' NUMBER '}'
						node = _repeat_number(node);
						break;

					default:
						// repeat  factor
						break;
					}
					return node;
				}
				catch(...)
				{
					if(node != NULL) { node->free(); }
					throw;
				}
			}

			// factor  '(' subexpression ')' | '[' set ']' | '.' | class | CHARACTER
			parse_tree_ptr rule_factor(void)
			{
				parse_tree_ptr node = NULL;
				try
				{
					switch(m_lookahead.kind)
					{
					case TOKEN_GROUP_BEGIN:
						// factor  '(' subexpression ')'
						move(TOKEN_GROUP_BEGIN);
						node = rule_subexpression();
						move(TOKEN_GROUP_END);
						break;

					case TOKEN_SET_BEGIN:
						// factor  '[' set ']'
						move(TOKEN_SET_BEGIN);
						node = rule_set();
						move(TOKEN_SET_END);
						break;

					case TOKEN_ANY:
						// factor  '.'
						node = new parse_node_any<_Input, _Traits>();
						move();
						break;

					default:
						// factor  CHARACTER
						node = new parse_node_character<_Input, _Traits>(m_lookahead.ch);
						move(TOKEN_CHARACTER);
						break;
					}
					return node;
				}
				catch(...)
				{
					if(node != NULL) { node->free(); }
					throw;
				}
			}

			// set  '^' subset | subset
			parse_tree_ptr rule_set(void)
			{
				parse_node_element_set<_Input> *node = NULL;
				try
				{
					if(m_lookahead.kind == TOKEN_SET_NOT)
					{
						// set  '^' subset
						move();
						node = rule_subset();
						node  = new parse_node_not<_Input, _Traits>(node);
					}
					else
					{
						// set  subset
						node = rule_subset();
					}
					return node;
				}
				catch(...)
				{
					if(node != NULL) { node->free(); }
					throw;
				}
			}

			// subset  CLASS | CLASS subset | CHARACTER subset | CHARACTER | CHARACTER '-' CHARACTER subset | CHARACTER '-' CHARACTER
			parse_node_element_set<_Input> *rule_subset(void)
			{
				parse_node_element_set<_Input> *node  = NULL;
				parse_node_element_set<_Input> *node1 = NULL;
				parse_node_element_set<_Input> *node2 = NULL;
				try
				{
					const _Input char1 = m_lookahead.ch;
					CHARCLASS charclass = CHARCLASS_NONE;
					switch(m_lookahead.kind)
					{
					case TOKEN_CLASS_UPPER : charclass = CHARCLASS_UPPER ; break;
					case TOKEN_CLASS_LOWER : charclass = CHARCLASS_LOWER ; break;
					case TOKEN_CLASS_ALPHA : charclass = CHARCLASS_ALPHA ; break;
					case TOKEN_CLASS_ALNUM : charclass = CHARCLASS_ALNUM ; break;
					case TOKEN_CLASS_DIGIT : charclass = CHARCLASS_DIGIT ; break;
					case TOKEN_CLASS_XDIGIT: charclass = CHARCLASS_XDIGIT; break;
					case TOKEN_CLASS_PUNCT : charclass = CHARCLASS_PUNCT ; break;
					case TOKEN_CLASS_BLANK : charclass = CHARCLASS_BLANK ; break;
					case TOKEN_CLASS_SPACE : charclass = CHARCLASS_SPACE ; break;
					case TOKEN_CLASS_CNTRL : charclass = CHARCLASS_CNTRL ; break;
					case TOKEN_CLASS_GRAPH : charclass = CHARCLASS_GRAPH ; break;
					case TOKEN_CLASS_PRINT : charclass = CHARCLASS_PRINT ; break;

					case TOKEN_CHARACTER:
						// subset  CHARACTER subset | CHARACTER | CHARACTER '-' CHARACTER subset | CHARACTER '-' CHARACTER
						move();

						// ̕ɂĕ
						switch(m_lookahead.kind)
						{
						case TOKEN_CHARACTER:
							// subset  CHARACTER subset
							{
								node1 = new parse_node_character<_Input, _Traits>(char1);
								node2 = rule_subset();
								node  = new parse_node_set<_Input, _Traits>(node1, node2, NULL);
								node1 = NULL;
								node2 = NULL;
							}
							break;

						case TOKEN_SET_RANGE:
							// subset  CHARACTER '-' CHARACTER subset | CHARACTER '-' CHARACTER
							{
								move();
								const _Input char2 = m_lookahead.ch;
								move(TOKEN_CHARACTER);

								if(char1 > char2) { throw REGERR_SET_RANGE; }
								node = new parse_node_range<_Input, _Traits>(char1, char2);

								// subset  CHARACTER '-' CHARACTER subset
								if(m_lookahead.kind == TOKEN_CHARACTER)
								{
									node2 = rule_subset();
									node  = new parse_node_set<_Input, _Traits>(node, node2, NULL);
									node2 = NULL;
								}
							}
							break;

						default:
							// subset  CHARACTER
							node = new parse_node_character<_Input, _Traits>(char1);
						}
						break;

					default:
						break;
					}

					// TOKEN_CLASS_xxx ̏ꍇ
					if(charclass != CHARCLASS_NONE)
					{
						// subset  CLASS | CLASS subset
						node = new parse_node_class<_Input, _Traits>(charclass);
						move();

						node2 = NULL;
						switch(m_lookahead.kind)
						{
						case TOKEN_CLASS_UPPER :
						case TOKEN_CLASS_LOWER :
						case TOKEN_CLASS_ALPHA :
						case TOKEN_CLASS_ALNUM :
						case TOKEN_CLASS_DIGIT :
						case TOKEN_CLASS_XDIGIT:
						case TOKEN_CLASS_PUNCT :
						case TOKEN_CLASS_BLANK :
						case TOKEN_CLASS_SPACE :
						case TOKEN_CLASS_CNTRL :
						case TOKEN_CLASS_GRAPH :
						case TOKEN_CLASS_PRINT :
						case TOKEN_CHARACTER:
							// subset  CLASS subset
							node2 = rule_subset();
							node  = new parse_node_set<_Input, _Traits>(node, node2, NULL);
							node2 = NULL;
							break;

						default:
							// subset  CLASS
							break;
						}
					}
					return node;
				}
				catch(...)
				{
					if(node  != NULL) { node ->free(); }
					if(node1 != NULL) { node1->free(); }
					if(node2 != NULL) { node2->free(); }
					throw;
				}
			}

		private:
			void _init(_ConstIterator /* pattern_begin */, _ConstIterator /* pattern_end */)
			{
				move();
			}


			// ksubsequence̐擪H
			bool _is_subsequence(const TOKEN_KIND kind)
			{
				// subsequence̐擪ɂfactori@̃[QƂ̂Ɓj
				return (kind == TOKEN_GROUP_BEGIN || kind == TOKEN_SET_BEGIN || kind == TOKEN_ANY || kind == TOKEN_CHARACTER);
			}


			// g[Nǂݍ݁iG[`FbNȂj
			void move(void)
			{
				m_lookahead = m_lexer.get_token();
			}

			// g[Nǂݍ݁iG[`FbNj
			void move(const TOKEN_KIND kind)
			{
				if(m_lookahead.kind != kind)
				{
					throw REGERR_SYNTAX;
				}
				move();
			}

		private:
			// repeat  factor '{' NUMBER '}' | factor '{' NUMBER ',' '}' | factor '{' NUMBER ',' NUMBER '}'
			parse_tree_ptr _repeat_number(parse_tree_ptr node)
			{
				move(TOKEN_REPEAT_BEGIN);

				const int number1 = m_lookahead.num;
				move(TOKEN_NUMBER);

				if(m_lookahead.kind != TOKEN_REPEAT_SEPARATOR)
				{
					// repeat  factor '{' NUMBER '}'
					node = new parse_node_repeatn<_Input, _Traits>(node, number1);
					goto repeat_exit;
				}

				move(TOKEN_REPEAT_SEPARATOR);

				if(m_lookahead.kind != TOKEN_NUMBER)
				{
					// repeat  factor '{' NUMBER ',' '}'
					node = new parse_node_repeatn_<_Input, _Traits>(node, number1);
					goto repeat_exit;
				}
				else
				{
					// repeat  factor '{' NUMBER ',' NUMBER '}'
					const int number2 = m_lookahead.num;
					move(TOKEN_NUMBER);

					if(number1 > number2) { throw REGERR_REPEAT_MINMAX; }
					node = new parse_node_repeatmn<_Input, _Traits>(node, number1, number2);

					goto repeat_exit;
				}
repeat_exit:
				move(TOKEN_REPEAT_END);
				return node;
			}
		};
	}
}

#endif // __MERCURY_REGEX_PARSER__
