﻿import dom;
import std.string;
import std.file;

class BinPack{
	enum{
		END=0,
		ROOT=1,
		BLOCK=2,
		NONATTRBLOCK=3,
		VAR=4,
		NONATTRVAR=5,
		DEFXML=6,
		CDATA=7,
		COMMENT=8,
		DOCTYPE=9,
		TEXTSTART=10,
	}
	static char[] sizeToCompArray(uint size){
		char[] v;
		v ~= cast(char)(size & 0x7f);
		while(size>=0x80){
			v[v.length-1]=v[v.length-1]|0x80;
			size=size>>7;
			v ~= cast(char)(size & 0x7f);
		}
		return v;
	}

	static char[] arrayToCompArray(char[] str)
	{
		return sizeToCompArray(str.length+1) ~ str;
	}

	static char[] pack(Dom dom){
	try{
		// ノード全体をなめて、
		// ハッシュに出現数を入れる。
		int[char[]] hash;
		Dom[] stack;
		Dom root;
		if(dom.type==Dom.ROOT){
			root=dom;
		}else{
			root=new Dom(Dom.ROOT,"");
			root.add(dom);
		}
		dom=root;
		while(true){
			switch(dom.type){
			case Dom.ROOT:
				stack = stack ~ dom.array.dup.reverse;
				break;
			case Dom.BLOCK:
				hash[dom.value]++;
				stack = stack ~ dom.array.dup.reverse;
			case Dom.VAR:
			case Dom.DEFXML:
				hash[dom.value]++;
				foreach(char[] key,char[] value;dom.attr){
					hash[key]++;
					hash[value]++;
				}
			case Dom.TEXT:
			case Dom.CDATA:
			case Dom.COMMENT:
			case Dom.DOCTYPE:
				hash[dom.value]++;
				break;
			default:
				break;
			}
			if(stack.length==0)break;
			dom=stack[stack.length-1];stack.length=stack.length-1;
		}

		dom = root;
		// 出現数の多い順にTEXTSTARTから番号をつける。
		char[][][char[]] rehash;
		int array[];
		foreach(char[] key,int value;hash){
			rehash[std.string.toString(value)]~=key;
			array ~= value;
		}
		array.sort.reverse;
		int num=int.max;
		int no=TEXTSTART;

		// 出力を開始する。
		char[] rc;

		for(int i=0;i<array.length;i++){
			if(num>array[i]){
				char[][] carray = rehash[std.string.toString(array[i])];
				for(int j=0;j<carray.length;j++){
					hash[carray[j]]=no++;
					rc ~= arrayToCompArray(carray[j]);
				}
				num=array[i];
			}
		}
		rc ~= END;
		// domをなめる。
loop:
		while(true){
			switch(dom.type){
			case Dom.ROOT:
				rc ~= ROOT;
				rc = rc ~ sizeToCompArray(hash[dom.value]);
				stack ~= null;
				stack = stack ~ dom.array.dup.reverse;
				break;
			case Dom.BLOCK:
				if(dom.attr.length==0){
					rc ~= NONATTRBLOCK;
					rc = rc ~ sizeToCompArray(hash[dom.value]);
				}else{
					rc ~= BLOCK;
					rc = rc ~ sizeToCompArray(hash[dom.value]);
					foreach(char[] key,char[] value;dom.attr){
						rc = rc ~ sizeToCompArray(hash[key]);
						rc = rc ~ sizeToCompArray(hash[value]);
					}
					rc ~= END;
				}
				stack ~= null;
				stack = stack ~ dom.array.dup.reverse;
				break;
			case Dom.VAR:
				if(dom.attr.length==0){
					rc ~= NONATTRVAR;
					rc = rc ~ sizeToCompArray(hash[dom.value]);
				}else{
					rc ~= VAR;
					rc = rc ~ sizeToCompArray(hash[dom.value]);
					foreach(char[] key,char[] value;dom.attr){
						rc = rc ~ sizeToCompArray(hash[key]);
						rc = rc ~ sizeToCompArray(hash[value]);
					}
					rc ~= END;
				}
				break;
			case Dom.DEFXML:
				rc ~= DEFXML;
				rc = rc ~ sizeToCompArray(hash[dom.value]);
				foreach(char[] key,char[] value;dom.attr){
					rc = rc ~ sizeToCompArray(hash[key]);
					rc = rc ~ sizeToCompArray(hash[value]);
				}
				rc ~= END;
				break;
			case Dom.CDATA:
				rc ~= CDATA;
				rc = rc ~ sizeToCompArray(hash[dom.value]);
				break;
			case Dom.COMMENT:
				rc ~= COMMENT;
				rc = rc ~ sizeToCompArray(hash[dom.value]);
				break;
			case Dom.DOCTYPE:
				rc ~= DOCTYPE;
				rc = rc ~ sizeToCompArray(hash[dom.value]);
				break;
			case Dom.TEXT:
				rc = rc ~ sizeToCompArray(hash[dom.value]);
				break;
			default:
				throw( new Exception("dom.type error type="~std.string.toString(dom.type)));
				break;
			}
			if(stack.length==0){
				//printf("break1\n");
				break;
			}
			dom=stack[stack.length-1];stack.length=stack.length-1;
			while(dom===null){
				rc ~= END;
				if(stack.length==0)break loop;
				dom=stack[stack.length-1];stack.length=stack.length-1;
			}
		}
		rc ~= END;
		return rc;
	}catch(Exception e){
		throw( new Exception("pack error: "~e.toString()));
	}
	}
}

//void dbg(char[] str){
//	append("dbg.txt",cast(void[])str);
//}

class BinUnpack{
	enum{
		END=0,
		ROOT=1,
		BLOCK=2,
		NONATTRBLOCK=3,
		VAR=4,
		NONATTRVAR=5,
		DEFXML=6,
		CDATA=7,
		COMMENT=8,
		DOCTYPE=9,
		TEXTSTART=10,
	}
	static char[] compArrayToText(char[] str,inout int pos,char[][char[]] hash){
		return hash[std.string.toString(compArrayToSize(str,pos))];
	}
	static long compArrayToSize(char[] str,inout int pos){
		byte b=str[pos++];
		int size=b&0x7f;
		int n=7;
		while((b&0x80)!=0){
			b=str[pos++];
			size+=b<<(n++);
		}
		return size;
	}

	static char[] compArrayToArray(char[] str,inout int pos){
		long size=compArrayToSize(str,pos)-1;
		if(size==0)return "";
		char[] s=str[pos..(pos+size)];
		pos+=size;
		return s;
	}

	static Dom unpack(char[] str){
	try{
		char[][char[]] hash;
		int no = TEXTSTART;
		// テキストテーブル読み込み
		int pos = 0;//読み込み位置
		while(true){
			if(str[pos]==END){
				pos++;
				break;
			}
			char[] text = compArrayToArray(str,pos);
			hash[std.string.toString(no++)]=text;
		}
		// dom生成
		Dom root = newDom(Dom.ROOT,"");
		Dom d;
		Dom dom=root;
		Dom[] stack;
		int posback=-1;
loop:
		while(true){
			if(pos<=posback){
				throw(new Exception("pos error posback="~std.string.toString(posback)~" pos="~std.string.toString(pos)));
				break;
			}
			posback=pos;
			char c = str[pos];
			if(c>=TEXTSTART){
				dom.add(newDom(Dom.TEXT,compArrayToText(str,pos,hash)));
				continue;
			}
			pos++;
			switch(c){
			case END:
				if(stack.length==0){
					break loop;
				}
//dbg("pop  "~dom.value~">");
				dom=stack[stack.length-1];stack.length=stack.length-1;
//dbg(dom.value~"\n");
				break;
			case ROOT:
				d = newDom(Dom.ROOT,compArrayToText(str,pos,hash));
//dbg("push "~dom.value~">"~d.value~"\n");
				dom.add(d);
				stack~=dom;dom=d;
				break;
			case BLOCK:
				d = newDom(Dom.BLOCK,compArrayToText(str,pos,hash));
				while(true){
					if(pos<=posback){
						throw(new Exception("pos error posback="~std.string.toString(posback)~" pos="~std.string.toString(pos)));
						break;
					}
					posback=pos;
					c = str[pos];
					if(c==END){
						pos++;
						break;
					}
					char[] key=compArrayToText(str,pos,hash);
					char[] value=compArrayToText(str,pos,hash);
					d.setAttr(key,value);
				}
//dbg("push "~dom.value~">"~d.value~"\n");
				dom.add(d);
				stack~=dom;dom=d;
				break;
			case NONATTRBLOCK:
				d = newDom(Dom.BLOCK,compArrayToText(str,pos,hash));
//dbg("push "~dom.value~">"~d.value~"\n");
				dom.add(d);
				stack~=dom;dom=d;
				break;
			case VAR:
				d = newDom(Dom.VAR,compArrayToText(str,pos,hash));
				while(true){
					if(pos<=posback){
						throw(new Exception("pos error posback="~std.string.toString(posback)~" pos="~std.string.toString(pos)));
						break;
					}
					posback=pos;
					c = str[pos];
					if(c==END){
						pos++;
						break;
					}
					char[] key=compArrayToText(str,pos,hash);
					char[] value=compArrayToText(str,pos,hash);
					d.setAttr(key,value);
				}
				dom.add(d);
				break;
			case NONATTRVAR:
				dom.add(newDom(Dom.VAR,compArrayToText(str,pos,hash)));
				break;
			case DEFXML:
				d = newDom(Dom.DEFXML,compArrayToText(str,pos,hash));
				while(true){
					if(pos<=posback){
						throw(new Exception("pos error posback="~std.string.toString(posback)~" pos="~std.string.toString(pos)));
						break;
					}
					posback=pos;
					c = str[pos];
					if(c==END){
						pos++;
						break;
					}
					char[] key=compArrayToText(str,pos,hash);
					char[] value=compArrayToText(str,pos,hash);
					d.setAttr(key,value);
				}
				dom.add(d);
				break;
			case CDATA:
				dom.add(newDom(Dom.CDATA,compArrayToText(str,pos,hash)));
				break;
			case COMMENT:
				dom.add(newDom(Dom.COMMENT,compArrayToText(str,pos,hash)));
				break;
			case DOCTYPE:
				dom.add(newDom(Dom.DOCTYPE,compArrayToText(str,pos,hash)));
				break;
			default:
				throw( new Exception("dom.type error type="~std.string.toString(c)));
				break;
			}
		}
		return root.array[0];
	}catch(Error e1){
		throw(new Exception("unpack error "~e1.toString()));
	}
	}

}
version(BINLOAD_TEST){
import std.date;

void main()
{
	Dom dom = newDom(Dom.BLOCK,"test")
		.setAttr("attr1","a")
		.setAttr("attr2","b")
		.setAttr("attr3","c")
		.add(newDom(Dom.TEXT,"text"))
		.add(newDom(Dom.VAR,"nonvar"))
		.add(newDom(Dom.VAR,"var")
			.setAttr("attr1","c")
			.setAttr("attr2","b")
			.setAttr("attr3","a")
		)
		.add(newDom(Dom.VAR,"var")
			.setAttr("attr1","dd")
			.setAttr("attr2","ddd")
			.setAttr("attr3","dddd")
		)
		.add(newDom(Dom.BLOCK,"block")
			.setAttr("attr1","e")
			.setAttr("attr2","ee")
			.setAttr("attr3","eee")
			.add(newDom(Dom.TEXT,"text"))
			.add(newDom(Dom.VAR,"nonvar"))
			.add(newDom(Dom.VAR,"var")
				.setAttr("attr1","c")
				.setAttr("attr2","b")
				.setAttr("attr3","a")
			)
			.add(newDom(Dom.VAR,"var")
				.setAttr("attr1","dd")
				.setAttr("attr2","ddd")
				.setAttr("attr3","dddd")
			)
		)
		.add(newDom(Dom.BLOCK,"nonattrblock")
			.add(newDom(Dom.TEXT,"text"))
			.add(newDom(Dom.VAR,"nonvar"))
			.add(newDom(Dom.VAR,"var")
				.setAttr("attr1","c")
				.setAttr("attr2","b")
				.setAttr("attr3","a")
			)
			.add(newDom(Dom.VAR,"var")
				.setAttr("attr1","dd")
				.setAttr("attr2","ddd")
				.setAttr("attr3","dddd")
			)
		)
	;
//	printf("("~dom.toStringln()~")\n\0");
	write("binload1.xml",dom.toString());
	write("binload.bin",BinPack.pack(dom));
	Dom d = BinUnpack.unpack(cast(char[])read("binload.bin"));
	write("binload2.xml",d.toString());
//	printf("("~d.toStringln()~")\n\0");
	assert(d.toString()==dom.toString());
	assert(d.toString()==Dom.parse(cast(char[])read("binload1.xml")).toString());
//	printf("%.*s\n",BinUnpack.unpack(BinPack.pack(Dom.parse("<a></a>").getDocument())).toString());
//	printf("%.*s\n",BinUnpack.unpack(BinPack.pack(Dom.parse("<a/>").getDocument())).toString());
	assert(BinUnpack.unpack(BinPack.pack(Dom.parse("<a/>").getDocument())).toString()=="<a/>");
	assert(BinUnpack.unpack(BinPack.pack(Dom.parse("<a></a>").getDocument())).toString()=="<a></a>");

//	printf("%.*s\n",BinUnpack.unpack(BinPack.pack(Dom.parse("<a></a>"))).toString());
//	printf("%.*s\n",BinUnpack.unpack(BinPack.pack(Dom.parse("<a/>"))).toString());

	assert(BinUnpack.unpack(BinPack.pack(Dom.parse("<a/>"))).toString()=="<a/>");
	assert(BinUnpack.unpack(BinPack.pack(Dom.parse("<a></a>"))).toString()=="<a></a>");
	assert(BinUnpack.unpack(BinPack.pack(Dom.parse(""))).toString()=="");

	char[] s = BinPack.arrayToCompArray("");
	int pos=0;
	char[] s2 = BinUnpack.compArrayToArray(s,pos);
	assert(s=="\1");
	assert(s2=="");
	assert(pos==1);
	int[char[]] a;
	a[""]++;a[""]++;a["b"]+=4;
	assert(a[""]==2);
	assert(a["b"]==4);
	assert(BinUnpack.unpack(BinPack.pack(Dom.parse("<name a=\"\"/>"))).toString()=="<name a=\"\"/>");

	dom = Dom.parse(cast(char[])read("xhtml.html"));
	char[] data = BinPack.pack(dom);
	write("xhtml.bin",cast(void[])data);
	printf("data.length=%d\n",data);
	d = BinUnpack.unpack(data);
//	printf("%.*s\n",d.toString());
	assert(d.toString()==dom.toString());

	long starttime=getUTCtime();
	for(int i=0;i<100;i++)BinUnpack.unpack(data);
	long endtime=getUTCtime();
	printf("%dmsec\n",cast(int)(endtime-starttime));

	printf("ok\n");
}
}