﻿/**
 *	PEG文法コンパイラのモジュール。
 *
 *	Version:
 *		$Revision$
 *	Date:
 *		$Date$
 *	License:
 *		MIT/X Consortium License
 *	History:
 *		$Log$
 */

module outland.tl.peg;

import std.string;

import outland.tl.parser;

/// 識別子の先頭文字になれる文字。
const char[] IDENTIFIER_HEAD = std.string.letters ~ "_";

/// 識別子の後続文字になれる文字。
const char[] IDENTIFIER_TAIL = IDENTIFIER_HEAD ~ std.string.digits;

/// 解析エラーを示す文字列。
const char[] PARSE_ERROR = "";

/// リテラル文字列の変換。
char[] makeLiteral(char[] src) {return "\"" ~ src ~ "\"";}

/// 識別子の変換。
char[] makeIdentifier(char[] src) {return src;}

/// 意味アクションの変換。
char[] makeAction(char[] parser, char[] action) {return "act(" ~ parser ~ ",&" ~ action ~ ")";}

/// オプションの変換。
char[] makeOption(char[] src) {return "opt(" ~ src ~ ")";}

/// オプション繰り返しの変換。
char[] makeOptionRepeat(char[] src) {return "optr(" ~ src ~ ")";}

/// 繰り返しの変換。
char[] makeRepeat(char[] src) {return "rep(" ~ src ~ ")";}

/// Andの変換。
char[] makeAnd(char[] src) {return "and(" ~ src ~ ")";}

/// Notの変換。
char[] makeNot(char[] src) {return "not(" ~ src ~ ")";}

/// 繰り返しの変換。
char[] makeSequence(char[] head, char[] tail) {return "seq(" ~ head ~ tail ~ ")";}

/// 繰り返しの後続部分の変換。
char[] makeSequenceTail(char[] src) {return "," ~ src;}

/// 選択の変換。
char[] makeChoice(char[] head, char[] tail) {return "choice(" ~ head ~ tail ~ ")";}

/// 選択の後続部分の変換。
char[] makeChoiceTail(char[] src) {return "," ~ src;}

/// ルール宣言の変換。
char[] makeDeclare(char[] name) {return "auto " ~ name ~ " = new Rule();" ~ std.string.newline;}

/// ルール代入の変換。
char[] makeAssign(char[] lhs, char[] rhs) {return lhs ~ ".assign(" ~ rhs ~ ");" ~ std.string.newline;}

/// 指定文字集合に含まれる文字を読み飛ばす。
bool readInclude(char[] src, inout size_t p, char[] set) {
	 if(src.length <= p) return false;
	 foreach(c; set) {
		 if(c == src[p]) {
			 ++p;
			 return true;
		 }
	 }
	 return false;
}

/// コメントを読み飛ばす。
bool skipBlockComment(char[] src, inout size_t p) {
	 if(src.length < 2 || src[p] != '/' || src[p + 1] != '*') {
		 return false;
	 }
	 p += 2;
	 
	 while(p < src.length) {
		 if((p + 2) <= src.length && src[p] == '*' && src[p + 1] == '/') {
			 p += 2;
			 break;
		 } else {
			 ++p;
		 }
	 }
	 return true;
}

/// コメントを読み飛ばす。
bool skipNestedBlockComment(char[] src, inout size_t p) {
	 if(src.length < 2 || src[p] != '/' || src[p + 1] != '+') {
		 return false;
	 }
	 p += 2;
	 
	 size_t nest = 1;
	 while(p < src.length && nest != 0) {
		 if((p + 2) > src.length) {
			 ++p;
			 continue;
		 }
		 
		 if(src[p] == '/' && src[p + 1] == '+') {
			 p += 2;
			 ++nest;
		 } else if(src[p] == '+' && src[p + 1] == '/') {
			 p += 2;
			 --nest;
		 } else {
			 ++p;
		 }
	 }
	 return true;
}

/// コメントを読み飛ばす。
bool skipLineComment(char[] src, inout size_t p) {
	 
	 if(src.length < 2 || src[p] != '/' || src[p + 1] != '/') {
		 return false;
	 }
	 
	 while(p < src.length) {
		 if((p + 2) <= src.length && src[p] == '\r' && src[p + 1] == '\n') {
			 p += 2;
			 break;
		 } else if(src[p] == '\r' || src[p] == '\n') {
			 ++p;
			 break;
		 } else {
			 ++p;
		 }
	 }
	 return true;
}

/// 空白文字を読み飛ばす。
bool skipSpace(char[] src, inout size_t p) {
	 while(p < src.length) {
		 // 空白以外の文字に到達したら終了。
		 if(!skipLineComment(src, p)
				 && !skipBlockComment(src, p)
				 && !skipNestedBlockComment(src, p)
				 && !readInclude(src, p, std.string.whitespace)) {
			 break;
		 }
	 }
	 return true;
}

/// リテラル文字列を読み込む。
char[] readLiteral(char[] src, inout size_t p) {
	 char[] text;
	 for(; p < src.length; ++p) {
		 switch(src[p]) {
		 case '\"':
			 return makeLiteral(text);
		 case '\\':
			 if(++p >= src.length) return PARSE_ERROR;
			 text ~= '\\';
			 // follow through
		 default:
			 text ~= src[p];
			 break;
		 }
	 }
	 return PARSE_ERROR;
}

/// 識別子を読み込む。
char[] readIdentifier(char[] src, inout size_t p) {
	 skipSpace(src, p);
	 size_t begin = p;
	 
	 if(readInclude(src, p, IDENTIFIER_HEAD)) {
		 while(readInclude(src, p, IDENTIFIER_TAIL)) {}
		 return makeIdentifier(src[begin .. p]);
	 }
	 return PARSE_ERROR;
}

/// 原始式を読み込む。
char[] readPrimaryExpression(char[] src, inout size_t p) {
	 if(p >= src.length) return PARSE_ERROR;
	 char[] result;
	 switch(src[p]) {
	 case '(':
		 ++p;
		 result = readChoice(src, p);
		 if(result.length == 0 || p >= src.length || src[p] != ')') return PARSE_ERROR;
		 ++p;
		 return result;
	 case '\"':
		 ++p;
		 result = readLiteral(src, p);
		 if(result.length == 0 || p >= src.length || src[p] != '\"') return PARSE_ERROR;
		 ++p;
		 return result;
	 default:
		 return readIdentifier(src, p);
	 }
}

/// 意味アクション式を読み込む。
char[] readActionExpression(char[] src, inout size_t p) {
	 char[] exp = readPrimaryExpression(src, p);
	 if(exp.length == 0) return PARSE_ERROR;
	 skipSpace(src, p);
	 if(p < src.length && src[p] == '[') {
		 ++p;
		 char[] act = readIdentifier(src, p);
		 if(p >= src.length || src[p] != ']') return PARSE_ERROR;
		 ++p;
		 exp = makeAction(exp, act);
	 }
	 return exp;
}

/// 後置演算式を読み込む。
char[] readPostfixExpression(char[] src, inout size_t p) {
	 char[] head = readActionExpression(src, p);
	 if(head.length == 0) return PARSE_ERROR;
	 skipSpace(src, p);
	 if(p < src.length) {
		 switch(src[p]) {
		 case '?':
			 ++p;
			 return makeOption(head);
		 case '*':
			 ++p;
			 return makeOptionRepeat(head);
		 case '+':
			 ++p;
			 return makeRepeat(head);
		 default:
			 break;
		 }
	 }
	 return head;
}

/// 前置演算子式を読み込む。
char[] readPrefixExpression(char[] src, inout size_t p) {
	 skipSpace(src, p);
	 if(p >= src.length) return PARSE_ERROR;
	 
	 char[] tail;
	 switch(src[p]) {
	 case '&':
		 ++p;
		 if((tail = readPostfixExpression(src, p)).length == 0) return PARSE_ERROR;
		 return makeAnd(tail);
	 case '!':
		 ++p;
		 if((tail = readPostfixExpression(src, p)).length == 0) return PARSE_ERROR;
		 return makeNot(tail);
	 default:
		 return readPostfixExpression(src, p);
	 }
}

/// 連続式を読み込む。
char[] readSequence(char[] src, inout size_t p) {
	 char[] head = readPrefixExpression(src, p);
	 if(head.length == 0) return PARSE_ERROR;
	 
	 char[] tail;
	 for(char[] s; (s = readPrefixExpression(src, p)).length != 0; tail ~= makeSequenceTail(s)) {}
	 return makeSequence(head, tail);
}

/// 選択式のスラッシュを読み込む。
bool readChoiceSlash(char[] src, inout size_t p) {
	 skipSpace(src, p);
	 if(p < src.length && src[p] == '/') {
		 ++p;
		 return true;
	 }
	 return false;
}

/// 選択式を読み込む。
char[] readChoice(char[] src, inout size_t p) {
	 char[] head = readSequence(src, p);
	 if(head.length == 0) return PARSE_ERROR;
	 
	 char[] tail;
	 while(readChoiceSlash(src, p)) {
		 char[] s = readSequence(src, p);
		 if(s.length == 0) return PARSE_ERROR;
		 tail ~= makeChoiceTail(s);
	 }
	 return makeChoice(head, tail);
}

/// 代入式を読み込む。
bool readAssign(char[] src, inout size_t p, inout char[] decls, inout char[] dest) {
	 char[] lhs = readIdentifier(src, p);
	 if(lhs.length == 0) return false;
	 skipSpace(src, p);
	 if(p >= src.length || src[p] != '=') return false;
	 ++p;
	 
	 decls ~= makeDeclare(lhs);
	 
	 char[] rhs = readChoice(src, p);
	 if(rhs.length == 0) return false;
	 skipSpace(src, p);
	 if(p >= src.length || src[p] != ';') return false;
	 ++p;
	 
	 dest ~= makeAssign(lhs, rhs);
	 return true;
}

/// 構文規則をコンパイルする。
char[] compileRule(char[] src) {
	 char[] dest;
	 char[] decls;
	 size_t p;
	 while(readAssign(src, p, decls, dest)) {}
	 return decls ~ dest;
}

unittest {
	 alias .Rule!(ArrayIterator!(char)) Rule;
	 
	 pragma(msg, compileRule(`a = "test";`));
	 mixin(compileRule(`a = "test";`));
	 auto r = a.parse(iterator("test"));
	 assert(r.match);
	 assert(r.length == 4);
}

unittest {
	 alias .Rule!(ArrayIterator!(char)) Rule;
	 
	 mixin(compileRule(`a = "test" "seq";`));
	 auto r = a.parse(iterator("testseq"));
	 assert(r.match);
	 assert(r.length == 7);
}

unittest {
	 alias .Rule!(ArrayIterator!(char)) Rule;
	 
	 mixin(compileRule(`a = "test" "seq";`));
	 auto r = a.parse(iterator("testse"));
	 assert(!r.match);
	 assert(r.length == 0);
}

unittest {
	 alias .Rule!(ArrayIterator!(char)) Rule;
	 
	 mixin(compileRule(`a = "test"/"seq";`));
	 auto r = a.parse(iterator("testse"));
	 assert(r.match);
	 assert(r.length == 4);
}

unittest {
	 alias .Rule!(ArrayIterator!(char)) Rule;
	 
	 mixin(compileRule(`a = "test"/"seq";`));
	 auto r = a.parse(iterator("seqt"));
	 assert(r.match);
	 assert(r.length == 3);
}

unittest {
	 alias .Rule!(ArrayIterator!(char)) Rule;
	 
	 mixin(compileRule(`a = &"test" "te";`));
	 auto r = a.parse(iterator("test"));
	 assert(r.match);
	 assert(r.length == 2, format("%d", r.length));
}

unittest {
	 alias .Rule!(ArrayIterator!(char)) Rule;
	 
	 mixin(compileRule(`a = &"test" "te";`));
	 auto r = a.parse(iterator("te"));
	 assert(!r.match);
	 assert(r.length == 0);
}

unittest {
	 alias .Rule!(ArrayIterator!(char)) Rule;
	 
	 alias Result!(ArrayIterator!(char)) Res;
	 
	 bool b = false;
	 void match(Res r) {b = r.match;}
	 mixin(compileRule(`a = "te"[match];`));
	 auto r = a.parse(iterator("test"));
	 assert(r.match);
	 assert(r.length == 2);
	 assert(b);
}
