/******************************************************************************
 *
 * Copyright (c) 1999	TOSHIYUKI ARAI. ALL RIGHTS RESERVED. 
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions, and the following disclaimer.
 *  
 * 2. The name of the author may not be used to endorse or promote products
 *    derived from this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 
 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 
 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 
 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR 
 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 
 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 *
 *	HTMLParser.cpp
 *
 *****************************************************************************/

// 1999.08.12 Modified replaceEntity

#include "HTMLParser.h"


HTMLParser::HTMLParser() 
{
	richText     = null;
	document     = null;
	startElement = False;
	startComment = False;
	styleBlock   = False;
	preMode      = False;
}

HTMLParser::HTMLParser(HTMLDocument* doc) 
{
	richText     = null;
	startElement = False;
	startComment = False;
	styleBlock   = False;
	document     = doc;
	preMode      = False;
}


void HTMLParser::replaceNewLine(char* buffer)
{
	char* p1 = null;
	char* p2 = null;
	while(*buffer) {
		p1 = strchr(buffer, 0xd);
		if (p1) {
			*p1 = ' ';
		}
		p2 = strchr(buffer, 0xa);
		if (p2) {
			*p2 = ' ';
		}
		if (p1 == null || p2 == null) break;
		buffer++;
	}
}


void HTMLParser::replaceEntity(char* buffer)
{
	static char* entities[] = {
		"&lt;", "&gt;", "&amp;", "&apos;","&quot;", "&nbsp;"
	};
	static char chara[] = {
		'<', '>', '&', '\'', '\"', ' ',
	};
	
	char* ptr = buffer;
	while (*ptr) {
		for (int i = 0; i<XtNumber(entities); i++) {	

			char* p = strstr(ptr, entities[i]);
			if (p) {
				ptr = p;
				*p = chara[i];
				p++;
				int len = strlen(p);
				int xlen = strlen(entities[i]) -1;
				for(int i = 0; i<len - xlen ; i++) {
					*(p+i) = *(p+ xlen+i);
				}
				*(p+i) = Zero;
			}
		}
		ptr++;
	}
}


void HTMLParser::parse(char* buffer)
{
	char* ptr = buffer;
	//Printf("--[%s]\r\n", buffer);
	while (*ptr) {
		//Printf("==[%s]\r\n", ptr);

		if (*ptr == 0xd || *ptr == 0xa) {
				richText -> append(" ");
				ptr++;
				continue;
		}

		if (startElement == False) {
			char* p = strstr(ptr, "<");

			if (p == null) {
				if (richText) {
					replaceNewLine(ptr);
					replaceEntity(ptr);
					richText -> append(ptr);
				}
				break;
			}

			if (p) {
				if (strncmp(p, "<!--", 4) == 0) {
					startComment = True;
				}

				if (strncmp(p, "<style", 6) == 0) {
					startComment = True;
				}
				if (strncmp(p, "</style>", 8) == 0) {
					startComment = False;
					ptr = p +8;
					continue;
				}

				*p = Zero;
				if (richText) {
					replaceNewLine(ptr);
					replaceEntity(ptr);
					richText -> append(ptr);
				}
				startElement = True;

				p++;

				// Handle comment block.
				if (startComment) {
					char* endOfComment = strstr(p, "-->");
					// Found endOfComment
					if (endOfComment) {
						startComment = False;
						startElement = False;
						ptr = endOfComment+3;
						//ptr++;
						continue;

					}
					endOfComment = strstr(p, "/style>");
					if (endOfComment) {
						startComment = False;
						startElement = False;
						ptr = endOfComment+7;
						//ptr++;
						continue;

					} else {
						// Ignore comment block
						return;
					}

				}

				char* e = strstr(p, ">");
			 
				if (e) {
					startElement = False;
					char t = *e;
					*e = Zero;
					element.append(p);
					//Printf(".%s\r\n", element.getBuffer());
					if (element.getContentSize() == 2) {
						if (strcmpi(element.getBuffer(), "br") == 0 ||
							strcmpi(element.getBuffer(), "hr") == 0) {
						
							if (richText) {
								//Printf("<BR>\r\n");
								richText -> append("\r\n");	
							}
						}
					}
					if (element.getContentSize() == 3) {
						if (strcmpi(element.getBuffer(), "/tr") == 0) {
							if (richText) {
								//Printf("<BR>\r\n");
								richText -> append("\r\n");	
							}
						}
					}

					if (element.getContentSize() == 1) {
						if (strcmpi(element.getBuffer(), "p") == 0) {
							if (richText) {
								//Printf("<BR>\r\n");
								richText -> append("\r\n");	
							}
						}
					}

					//Printf("<%s>\r\n", element.getBuffer());
					element.clear();
					*e = t;
					ptr = e;
				} else {
					//Printf("##[%s]\r\n", (p+1));
					element.append(++p);
					break;
				}
			} 
			ptr++;
			continue;
		}

		// We are in comment block.
		if (startComment ==True) {
			char* endOfComment = strstr(ptr, "-->");
			if (endOfComment) {
				startComment = False;
				startElement = False;
				ptr = endOfComment+3;
				//ptr++;
				continue;
			} 

			endOfComment = strstr(ptr, "/style>");
			if (endOfComment) {
				startComment = False;
				startElement = False;

				ptr = endOfComment+7;
				//ptr++;
				continue;
			} else {
				return;
			}

		}

		if (startElement == True) {
			//Printf("$$[%s]\r\n", ptr);

			char* e = strstr(ptr, ">");
			if (e && startComment ==False) {
				startElement = False;
				char t = *e;
				*e = Zero;
		
				element.append(ptr);
				//Printf("..%s\r\n", element.getBuffer());
				*e = t;
				ptr = e;
				//Printf("<%s>\r\n", element.getBuffer());
				element.clear();
			} else {
				//Printf("++[%s]\r\n", ptr);
				element.append(ptr);
				break;
			}
			ptr++;
			continue;
		}
	}
}
