/*******************************************************************************
 * Copyright (c) 2008 IGA Tosiki, NTT DATA BUSINESS BRAINS Corp.
 * All rights reserved. This program and the accompanying materials
 * are made available under the terms of the Eclipse Public License v1.0
 * which accompanies this distribution, and is available at
 * http://www.eclipse.org/legal/epl-v10.html
 *
 * Contributors:
 *    IGA Tosiki (NTT DATA BUSINESS BRAINS Corp.) - initial API and implementation
 *******************************************************************************/
/*
 * blanco Framework
 * Copyright (C) 2008 NTT DATA BUSINESS BRAINS CORPORATION
 * 
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 */
package blanco.html.parser;

import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.List;

import blanco.commons.util.BlancoStringUtil;
import blanco.html.parser.helper.BlancoHtmlDecideEncodingContentHandler;
import blanco.html.parser.helper.BlancoHtmlNullContentHandler;
import blanco.html.parser.valueobject.BlancoHtmlAttribute;

class BlancoHtmlParserImpl implements BlancoHtmlParser {
    protected BufferedReader fReader;

    protected BlancoHtmlContentHandler fHandler;

    public void setHandler(final BlancoHtmlContentHandler handler) {
        fHandler = handler;
    }

    public BlancoHtmlContentHandler getHandler() {
        return fHandler;
    }

    public BufferedReader getReader() {
        return fReader;
    }

    /**
     * p[X̍ۂɗp镶GR[fBOݒ肵܂B
     */
    protected String fEncoding;

    /**
     * p[X̍ۂɗp镶GR[fBOݒ肵܂B
     * 
     * @param encoding
     *            GR[fBOB
     */
    public void setEncoding(String encoding) {
        fEncoding = encoding;
    }

    /**
     * p[X̍ۂɗp镶GR[fBO擾܂B
     * 
     * @return GR[fBOB
     */
    public String getEncoding() {
        return fEncoding;
    }

    public void parse(final byte[] argInputHtml) throws IOException {
        // R[h菈
        if (BlancoStringUtil.null2Blank(getEncoding()).length() == 0) {
            // R[hw肪^ĂȂꍇɂ͎͂Ŕ肵܂B
            decideEncoding(argInputHtml);
        }

        // m肵R[hŃp[X{B
        final BufferedReader reader = new BufferedReader(new InputStreamReader(
                new ByteArrayInputStream(argInputHtml), getEncoding()));
        try {
            parse(reader);
        } finally {
            reader.close();
        }
    }

    public void parse(final BufferedReader reader) throws IOException {
        fReader = reader;

        if (fHandler == null) {
            // nh[ZbgĂȂƗOĂ܂̂ nullnh[Zbg܂B
            fHandler = new BlancoHtmlNullContentHandler();
        }

        processDocument();
    }

    /**
     * HTML̕R[h݂܂B
     * 
     * @param argInputHtml
     * @throws IOException
     */
    protected void decideEncoding(final byte[] argInputHtml) throws IOException {
        final BlancoHtmlParser encodingParser = new BlancoHtmlParserImpl();
        final BlancoHtmlDecideEncodingContentHandler encodingHandler = new BlancoHtmlDecideEncodingContentHandler();
        encodingParser.setHandler(encodingHandler);

        encodingParser.parse(new BufferedReader(new InputStreamReader(
                new ByteArrayInputStream(argInputHtml), "ISO8859_1")));
        if (encodingHandler.getEncoding() != null) {
            setEncoding(encodingHandler.getEncoding());
            // TODO JavaT|[gȂR[h^ꂽꍇ̗OB
        } else {
            // Ȃ̂Ō߂B
            setEncoding("Windows-31J");
        }
    }

    protected void processDocument() throws IOException {
        fHandler.startDocument();

        StringBuffer characters = new StringBuffer();
        for (;;) {
            final int iRead = fReader.read();
            if (iRead < 0) {
                break;
            }

            final char cRead = (char) iRead;
            if (cRead == '<') {
                if (characters.length() > 0) {
                    fHandler.characters(characters.toString());
                    characters = new StringBuffer();
                }

                processElementOrComment();
            } else {
                characters.append(cRead);
            }
        }

        if (characters.length() > 0) {
            fHandler.characters(characters.toString());
            characters = new StringBuffer();
        }

        fHandler.endDocument();
    }

    /**
     * vf܂̓Rg܂B
     * 
     * @throws IOException
     */
    protected void processElementOrComment() throws IOException {
        boolean isStartElement = true;
        final StringBuffer bufElement = new StringBuffer();
        final List<BlancoHtmlAttribute> attributeList = new ArrayList<BlancoHtmlAttribute>();
        for (;;) {
            final int iRead = fReader.read();
            if (iRead < 0) {
                // ???
                break;
            }

            final char cRead = (char) iRead;
            if (cRead == ' ') {
                // 󔒂łB瑮
                final BlancoHtmlAttribute attr = processAttribute();
                if (attr != null) {
                    attributeList.add(attr);
                }

                // ̋󔒂͑̋؂Ȃ̂Ŗ܂B
                continue;
            } else if (cRead == '!') {
                if (bufElement.length() == 0) {
                    {
                        // ςȂ ! ̂ŁAvfł͂ȂAނRg̉\B
                        fReader.mark(100);
                        if (fReader.read() == '-' && fReader.read() == '-') {
                            // RgJn܂B
                            processComment(0);
                            break;
                        }
                        fReader.reset();
                    }
                    {
                        // ςȂ ! ɂ DOCTYPE ̏ꍇB
                        fReader.mark(100);
                        char[] buf = new char[8];
                        fReader.read(buf);
                        if (String.valueOf(buf).equals("DOCTYPE ")) {
                            // TODO DOCTYPȄB
                            // DOCTYPE Jn܂B
                            // processDoctype();
                            // break;
                        }
                        fReader.reset();
                    }
                } else {
                }
            } else if (cRead == '%') {
                if (bufElement.length() == 0) {
                    // ςȂ % ɂĂRg̏ꍇ邻
                    fReader.mark(100);
                    if (fReader.read() == '-' && fReader.read() == '-') {
                        // RgJn܂B
                        processComment(1);
                        break;
                    }

                    fReader.reset();
                } else {
                }
            } else if (cRead == '/') {
                if (bufElement.length() == 0) {
                    // ςȂ / ̂ [</]̌`B
                    // ͏I^OƔf
                    isStartElement = false;
                    // ItOZbg!
                    // ŃReBj[I
                    continue;
                } else {
                    // ǂ܂ȂƕȂB
                    fReader.mark(100);
                    final char nextChar = (char) fReader.read();
                    if (nextChar == '>') {
                        // ŊJnĂɏIvf
                        fHandler.startElement(bufElement.toString(),
                                attributeList);
                        fHandler.endElement(bufElement.toString());
                        break;
                    }

                    // Ȋ^ł͂ȂBHTML?
                    fReader.reset();
                }
            } else if (cRead == '>') {
                if (isStartElement) {
                    fHandler.startElement(bufElement.toString(), attributeList);
                } else {
                    fHandler.characters("");
                    fHandler.endElement(bufElement.toString());
                }
                break;
            }

            bufElement.append(cRead);
        }
    }

    protected void processComment(final int argType) throws IOException {
        final StringBuffer bufComment = new StringBuffer();
        for (;;) {
            final int iRead = fReader.read();
            if (iRead < 0) {
                // ???
                break;
            }

            final char cRead = (char) iRead;
            if (cRead == '-') {
                fReader.mark(100);
                if (fReader.read() == '-' && fReader.read() == '>') {
                    // RgI
                    fHandler.comments(bufComment.toString(), argType);
                    break;
                }

                fReader.reset();
            }

            bufComment.append(cRead);
        }
    }

    protected BlancoHtmlAttribute processAttribute() throws IOException {
        final StringBuffer bufName = new StringBuffer();
        final StringBuffer bufValue = new StringBuffer();

        final BlancoHtmlAttribute attrib = new BlancoHtmlAttribute();
        attrib.setQuote(0);

        for (;;) {
            // name
            fReader.mark(1);
            final int iRead = fReader.read();
            if (iRead < 0) {
                // ???
                break;
            }

            final char cRead = (char) iRead;
            if (cRead == '=') {
                break;
            }
            if (cRead == ' ') {
                // z肵ȂIB
                fReader.reset();

                if (bufName.length() > 0) {
                    attrib.setName(bufName.toString());
                    return attrib;
                } else {
                    return null;
                }
            }
            if (cRead == '>') {
                // z肵ȂIB肦B
                fReader.reset();

                if (bufName.length() > 0) {
                    attrib.setName(bufName.toString());
                    return attrib;
                } else {
                    return null;
                }
            }

            bufName.append(cRead);
        }
        outerloop: for (;;) {
            // value
            fReader.mark(1);
            final int iRead = fReader.read();
            if (iRead < 0) {
                // l̏I
                break;
            }

            final char cRead = (char) iRead;
            if (cRead == '>') {
                fReader.reset();
                break;
            }
            if (cRead == ' ') {
                // ̓Agr[gԂ̋󔒂ł邽߁A[_[̃Zbg͂܂B
                break;
            }
            if (cRead == '"') {
                // GXP[vJnB
                attrib.setQuote(2);

                for (;;) {
                    final int iReadIn = fReader.read();
                    if (iReadIn < 0) {
                        // l̏I
                        break outerloop;
                    }
                    final char cReadIn = (char) iReadIn;
                    if (cReadIn == '"') {
                        // GXP[vIB
                        break outerloop;
                    }
                    bufValue.append(cReadIn);
                }
            } else if (cRead == '\'') {
                // GXP[vJnB
                attrib.setQuote(1);

                for (;;) {
                    final int iReadIn = fReader.read();
                    if (iReadIn < 0) {
                        // l̏I
                        break outerloop;
                    }
                    final char cReadIn = (char) iReadIn;
                    if (cReadIn == '\'') {
                        // GXP[vIB
                        break outerloop;
                    }
                    bufValue.append(cReadIn);
                }
            }

            // _uNI[gŊJnĂȂĂA荞ށB
            bufValue.append(cRead);
        }

        attrib.setName(bufName.toString());
        attrib.setValue(bufValue.toString());
        return attrib;
    }
}
