﻿module dtmpl.dom;

// Dom
// TODO:
// ･Domのattrの順序を保証する。
private import std.string;

private alias char[] String;
class Dom
{
	enum{
		TEXT=0,
		BLOCK=1,
		VAR=2,
		CDATA=3,
		DEFXML=4,
		COMMENT=5,
		DOCTYPE=6,
		ROOT=7,
		TAG_NAME=8,
		ATTR_SPACE0=9,
		ATTR_NAME=10,
		ATTR_SPACE1_SPACE1=11,
		ATTR_SPACE1_SPACE2=12,
		ATTR_VALUE=13,
		END=14,
		TEXTSTART=15,
	};
	static String[] stateString=[
		"TEXT",
		"BLOCK",
		"VAR",
		"CDATA",
		"DEFXML",
		"COMMENT",
		"DOCTYPE",
		"ROOT",
		"TAG_NAME",
		"ATTR_SPACE0",
		"ATTR_NAME",
		"ATTR_SPACE1_SPACE1",
		"ATTR_SPACE1_SPACE2",
		"ATTR_VALUE",
		"END",
		"TESTSTART"
	];
	byte type;
	Dom[] array; // Dom用
	String[char[]] attr;// アトリビュート用
	String value;
	this(byte type,char[] value){
		this.type=type;
		this.value=value;
	}
	Dom add(Dom d){
		if(type==VAR)type=BLOCK;
		array ~= d;
		return this;
	}
	Dom setAttr(String name,String value){
		attr[name]=value;
		return this;
	}
	String getAttr(String name){
		return attr[name];
	}
	Dom get(String name){
		for(int i=0;i<array.length;i++){
			Dom d = array[i];
			if(d.value==name)return d;
		}
		return null;
	}
	private static String[] xmlEscapeString=[
		"&","&amp;",
		"<","&lt;",
		">","&gt;",
		"'","&apos;",
		"\"","&quot;",
		"\r","&#13;",
		"\n","&#10;",
	];
/**/

	public String toString()
	{
		return toString(0,"","");
	}
	String toString(int nest,String add,String ln)
	{
		String str = "";
		for(int i=0;i<nest;i++){
			str ~= add;
		}
		String add2=str;
		if(type==TEXT){
			return str~xmlEscape(value,1)~ln;
		}else
		if(type==CDATA){
			return str~"<![CDATA["~value~"]]>"~ln;
		}else
		if(type==COMMENT){
			return str~"<!--"~value~"-->"~ln;
		}else
		if(type==DOCTYPE){
			return str~"<!DOCTYPE"~value~">"~ln;
		}


		if(type==ROOT){
		}else
		if(type==DEFXML){
			str = str~"<?";
		}else{
			str = str~"<";
		}
		str ~= value;
		foreach(String key,String data;attr){
			data = xmlEscape(data,0);
			str ~= " "~key~"=\""~data~"\"";
		}
		if(type==VAR){
			str ~= "/>"~ln;
		}else
		if(type==DEFXML){
			str ~= "?>"~ln;
		}else{
			if(type!=ROOT)str ~= ">"~ln;else nest--;
			for(int i=0;i<array.length;i++){
				str ~= array[i].toString(nest+1,add,ln);
			}
			if(type!=ROOT)str ~= add2~"</"~value~">"~ln;
		}
		return str;
	}
	String toStringln()
	{
		return toString(0,"  ","\n");
	}

	static String xmlEscape(String str,int mode){
		int l = xmlEscapeString.length-mode*8;
		for(int i=0;i<l;i+=2){
			str=str.replace(xmlEscapeString[i],xmlEscapeString[i+1]);
		}
		return str;
	}
/**/
	private static String error = "parser error";
	static bool strcheck(String str,int p,String str2){
		int end=p+str2.length;
		if(str.length<end)return false;
		return str[p..end]==str2;
	}
	static Dom parse(String s){

		Dom dom = new Dom(ROOT,"");
		Dom node = null;
		String attrname="";
		String attrvalue="";
		char c;
		Stack stack = new Stack();
		byte tagtype=VAR;
		int p=0;
		int state=TEXTSTART;
		String end="";
loop:
		while(p<s.length){
			c=s[p];
//			printf("state="+stateString[state]+" "+c);
			switch(state){
			case TEXTSTART:
				node = new Dom(TEXT,"");
				state=TEXT;
			case TEXT:
				if(c=='<'){
//printf("node.value='"~node.value~"'\n"~"\0");
					if(node.value!=""){
						node.value=xmlUnescape(node.value);
						dom.add(node);
					}
					node = new Dom(BLOCK,"");
					tagtype=BLOCK;
					state=TAG_NAME;
					p++;
//					if(s.length==p){
//						parserError("b",s,p);
//						throw(new Exception(error));// 次でエラー返すので消す。
//					}
					if(s[p]=='/'){
//						if(stack.length()==0){// 後でエラーになるので消す。
//							parserError("a",s,p);
//							throw(new Exception(error));
//						}
						tagtype=END;
						break;
					}else
					if(s[p]=='?'){
//						if(stack.length()!=0){//rootノード以外では宣言を入れてよいのでコメントアウト
//							parserError("a",s,p);
//							throw(new Exception(error));
//						}
						tagtype=DEFXML;
						break;
					}else
					if(s[p]=='!'){
						p++;
						if(strcheck(s,p,"DOCTYPE")){
							p+=7;
							node = new Dom(DOCTYPE,"");
							while(p<s.length){
								c=s[p];
								p++;
								if(c=='>'){
									dom.add(node);
									state=TEXTSTART;
									continue loop;
								}
								node.value~=c;
							}
							parserError("",s,p);
							//throw(new Exception(error));
						}else
						if(strcheck(s,p,"[CDATA[")){
							end="]]>";
							node = new Dom(CDATA,"");
							state = CDATA;
							p+=7;
						}else
						if(strcheck(s,p,"--")){
							end="-->";
							node = new Dom(COMMENT,"");
							state=COMMENT;
							p+=2;
						}else{
							parserError("start comment error",s,p);
							//throw(new Exception(error));
						}
					}
					continue;
				}
				node.value ~= c;
				break;
			case COMMENT:
			case CDATA:
				if(strcheck(s,p,end)){
					dom.add(node);
					state=TEXTSTART;
					p+=3;
					continue;
				}
				node.value~=c;
				break;
			case TAG_NAME:
				if(c=='?'){
					if(tagtype==DEFXML && strcheck(s,p+1,">")){
					//	state=ATTR_SPACE0;まで抜ける。
					}else{
						parserError("",s,p);
						//throw(new Exception(error));
					}
				}else
				if(!isSpace(c) && c!='>' && c!='/'){
					node.value ~= c;
					break;
				}
				if(node.value==""){
					parserError("tagname",s,p);
//					throw(new Exception(error));
				}
				state = ATTR_SPACE0;
			case ATTR_SPACE0:
				if(isSpace(c)){
					break;
				}
				// 終了タグ
				if(c=='>'){
					if(tagtype==DEFXML){
						parserError("defxml error",s,p);
//						throw(new Exception(error));
					}
					if(tagtype==END){
						if(node.value!=dom.value){
//							throw(new Exception(error));
							parserError("attr_space0 endtag node.value="~node.value~" dom.value="~dom.value,s,p);
						}
						dom=cast(Dom)stack.pop();
						state=TEXTSTART;
						break;
					}
					node.type=BLOCK;
					dom.add(node);
					stack.push(dom);
					dom=node;
					state=TEXTSTART;
					break;
				}
				// 終了タグ2
				if(c=='/'){
					if(tagtype!=BLOCK){//</a/>
						parserError("",s,p);
//						throw(new Exception(error));
					}
					if(s[p+1]=='>'){
						node.type=VAR;
						dom.add(node);
						state=TEXTSTART;
						p+=2;
						continue;
					}
					parserError("",s,p);
//					throw(new Exception(error)); //アトリビュート名に/が入って良いか？
				}
				if(tagtype==DEFXML && c=='?'){
					if(strcheck(s,p+1,">")){
						node.type=DEFXML;
						dom.add(node);
						state=TEXTSTART;
						p+=2;
						continue;
					}
					parserError("",s,p);
//					throw(new Exception(error)); //アトリビュート名に/が入って良いか？
				}
				attrname="";
				attrvalue="";
				state = ATTR_NAME;
			case ATTR_NAME:
				if(c=='='){
					state = ATTR_SPACE1_SPACE2;
					break;
				}
				// 終了タグ
				if(c=='>' || c=='/'){//アトリビュート名に>とか/とか入ってよいか？
					parserError("",s,p);
//					throw(new Exception(error));
				}
				if(!isSpace(c)){
					attrname ~= c;
					break;
				}
				state = ATTR_SPACE1_SPACE1;
			case ATTR_SPACE1_SPACE1:
				if(isSpace(c)){
					break;
				}
				if(c == '='){
					state = ATTR_SPACE1_SPACE2;
					break;
				}
				parserError("",s,p);
//				throw(new Exception(error));//変な文字が=付近にあって良いか？
			case ATTR_SPACE1_SPACE2:
				if(isSpace(c)){
					break;
				}
				if(c=='"'){
					state=ATTR_VALUE;
					break;
				}
				parserError("",s,p);
//				throw(new Exception(error));//変な文字が="付近にあって良いか？
			case ATTR_VALUE:
				if(c=='"'){
					node.setAttr(attrname,xmlUnescape(attrvalue));
					state=ATTR_SPACE0;
					break;
				}
				attrvalue ~= c;
				break;
			}
			p++;
		}
		if(state==TEXT && node.value!=""){
			dom.add(node);
			state=TEXTSTART;
		}
		if(stack.length()!=0 || (state!=TEXTSTART) ){
			parserError("",s,p);
//			throw(new Exception(error));
		}
		return dom;
	}
	public Dom getDocument()
	{
		if(type!=ROOT)return this;
		for(int i=0;i<array.length;i++){
			Dom d = array[i];
			if(d.type==BLOCK||d.type==VAR)
				return d;
		}
		throw(new Exception(error));
	}
	static private bool isSpace(char c){
		if(c==' '|| c=='\r' || c=='\n' || c=='\t'){
			return true;
		}
		return false;
	}
	static String xmlUnescape(String str){
		for(int i=0;i<xmlEscapeString.length;i+=2){
			str=str.replace(xmlEscapeString[i+1],xmlEscapeString[i]);
		}
		return str;
	}
	static void parserError(char[] error,char[] str,int p){
		int line=1;
		int start=0;
		int pos=0;
		char c;
		int i;
		for(i=0;i<str.length;i++){
			c=str[i];
			if(p<=i){
				if(c=='\n'||c=='\r'){
					i--;
					break;
				}
			}else{
				pos++;
//				printf("pos++ pos=%d\n",pos);
			}
			if(c=='\r'){
				if(str.length>i+1)
					if(str[i+1]=='\n')i++;
			}else if(c=='\n'){
			}else{
				continue;
			}
			start=i+1;
			pos=0;
			line++;
//			printf("line++ line=%d pos=%d\n",line,pos);
		}
//printf("start=%d line=%d\n",start,line);
		error = Dom.error~" "~error;
		error = error ~
			" line="~std.string.toString(line)~
			" pos="~std.string.toString(pos);
		if(p<str.length){
			error ~= "\n";
			error ~= str[start..i] ~ "\n";
			for(int j=0;j<pos-1;j++){
				error ~= "-";
			}
			error ~= "^";
		}
		throw(new Exception(error));
	}

}
class Stack{
	private Object[] stack;
	void push(Object s){stack ~= s;}
	Object pop(){
		Object s = stack[stack.length-1];
		stack.length=stack.length-1;
		return s;
	}
	int length(){
		return stack.length;
	}
	
}
Dom newDom(byte type,String value){return new Dom(type,value);}

version(DOM_TEST){
	private import std.file;
	private import std.date;
	void main()
	{
		Dom dom;
		int mode=0;
		static String[] s=[
			"<a/>","ok",
			"<a></a>","ok",
			"      <a>  </a>        ","ok",
			"<a b=\"d\">  </a>","ok",
			"<d a = \"\"/>","<d a=\"\"/>",
			"<d a =   \"\"   />","<d a=\"\"/>",
			"<d a         = \"c \"/>","<d a=\"c \"/>",
			"<a b=\"d\"><e></e></a>","ok",
			"<a b=\"d\"><e></e><d/></a>","ok",
			"<a b=\"d\"><e></e><d /></a>","<a b=\"d\"><e></e><d/></a>",
			"<a b=\"d\"><e></e><d a = \"c \"/></a>","<a b=\"d\"><e></e><d a=\"c \"/></a>",
			"<c b=\"d\"><e></e ><d a = \"c \"/></c>","<c b=\"d\"><e></e><d a=\"c \"/></c>",
			"<?xml?>","ok",
			"<?xml a=\"b\"?>","ok",
			"<?xml ?>","<?xml?>",
			"<?x ?>","<?x?>",
			"<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\" \"http://www.w3.org/TR/html4/loose.dtd\">","ok",
			"<!DOCTYPE>","ok",
			" ","ok",
			"<a><!--abc--></a>","ok",
			"<a><!----></a>","ok",
			"<a><!--a--></a>","ok",
			"<a><![CDATA[a]]></a>","ok",
			"<? ?>","error",
			"<?a>?>","error",
			"<??>","error",
			"<!!>","error",
			"<?>","error",
			"<!DOCTYPE","error",
			"<!DOCTYP","error",
			"<![CDATA[","error",
			"<!--","error",
			"<!-- ","error",
			"<!-- -","error",
			"<!-- --","error",
			"</a>","error",
			"<a>\n</b></a>","error",
			"<a><!--aa-></a>","error",
			"<a><!aa--></a>","error",
			"<a>","error",
			"<a >","error",
			"<a/><a/>","ok",//複数ノード入れられる。
		];
		if(mode==0)mode=s.length;
		for(int i=0;i<mode;i+=2){
			try{
				dom=Dom.parse(s[i]);
				if(s[i+1]=="error"){
					printf("1 no parser error:\""~s[i]~"\",dom=\""~dom.toString()~"\"\n");
					assert(false);
				}else{
					if(s[i+1]=="ok"){s[i+1]=s[i];}
					if(dom.toString()!=s[i+1]){
						printf("2 parser error:\n\""~dom.toString()~"\"!=\n\""~s[i+1]~"\"\n");
						assert(false);
					}
				}
			}catch(Exception e){
				if(s[i+1]=="error"){
					if(Dom.error!=e.toString()[0..Dom.error.length]){
//					if("java.lang.Exception: parser error"!=e.toString()){
						printf("3 parser error error:\""~s[i]~"\"\n");
						printf("3 parser Exception:\""~e.toString()~"\"\n");
						assert(false);
					}
				}else{
					printf("4 error not error parser error:\""~s[i]~"\"\n");
					printf("4 parser Exception:\""~e.toString()~"\"\n");
					assert(false);
				}
			}
		}
//		assert(Dom.replaceAll("abcdeeeAbf","b","123")=="a123cdeeeA123f");
//		assert(Dom.replaceAll("ab12cdeeeAb1","b12","123")=="a123cdeeeAb1");
		assert(Dom.xmlUnescape("&lt;")=="<");
//		assert(Dom.replaceAll("&lt;","&lt;","<")=="<");
		Dom.parse(cast(char[])read("tmpl/bbs/index.test.xml")).toString();
//		printf("%.*s\n",Dom.parse(cast(String)read("xhtml.html")).toString());
		String s1 = cast(String)read("xhtml.html");
		long starttime=getUTCtime();
		for(int i=0;i<10;i++)Dom.parse(s1);
		long endtime=getUTCtime();
		printf("%dmsec\n",cast(int)(endtime-starttime));
		printf("ok\n");
	}
}
