/*******************************************************************************
 * Copyright (c) 2008 IGA Tosiki, NTT DATA BUSINESS BRAINS Corp.
 * All rights reserved. This program and the accompanying materials
 * are made available under the terms of the Eclipse Public License v1.0
 * which accompanies this distribution, and is available at
 * http://www.eclipse.org/legal/epl-v10.html
 *
 * Contributors:
 *    IGA Tosiki (NTT DATA BUSINESS BRAINS Corp.) - initial API and implementation
 *******************************************************************************/
/*
 * blanco Framework
 * Copyright (C) 2008 NTT DATA BUSINESS BRAINS CORPORATION
 * 
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 */
package blanco.html.normalizer.parser.helper;

import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
import java.nio.charset.Charset;
import java.nio.charset.CharsetEncoder;
import java.util.List;

import blanco.html.parser.BlancoHtmlContentHandler;
import blanco.html.parser.valueobject.BlancoHtmlAttribute;

/**
 * TagSoup lt@X̂fR[hĉǐo߂̃RecEnh[
 * 
 * @author IGA Tosiki
 */
public class BlancoHtmlDecodeNumericCharacterReferenceContentHandler implements
        BlancoHtmlContentHandler {
    /**
     * GR[fBOB
     */
    protected String fEncoding = "Windows-31J";

    /**
     * GR[fBOƂđÓǂ`FbNB
     */
    protected CharsetEncoder fValidateCharsetEncode = null;

    /**
     * `FC̃RecEnh[B
     */
    protected BlancoHtmlContentHandler fHandler = null;

    /**
     * o̓nh[ݒ肵܂B
     * 
     * @param handler
     *            nh[B
     */
    public void setHandler(final BlancoHtmlContentHandler handler) {
        fHandler = handler;
    }

    /**
     * o̓nh[擾܂B
     * 
     * @return o̓nh[B
     */
    public BlancoHtmlContentHandler getHandler() {
        return fHandler;
    }

    /**
     * GR[fBOw肵܂B
     * 
     * @param encoding
     *            GR[fBOB
     */
    public void setEncoding(final String encoding) {
        fEncoding = encoding;
    }

    /**
     * GR[fBO擾܂B
     * 
     * @return GR[fBOB
     */
    public String getEncoding() {
        return fEncoding;
    }

    /**
     * hLg̊JnB
     * 
     * @throws IOException
     */
    public void startDocument() throws IOException {
        if (fHandler == null) {
            throw new IllegalArgumentException(
                    "nh null Ȃ܂ startDocument Ăяo܂B");
        }

        fValidateCharsetEncode = Charset.forName(getEncoding()).newEncoder();

        fHandler.startDocument();
    }

    public void endDocument() throws IOException {
        fHandler.endDocument();
    }

    public void startElement(String name, List<BlancoHtmlAttribute> attrs)
            throws IOException {
        for (BlancoHtmlAttribute attr : attrs) {
            if (attr.getValue() != null) {
                final String decoded = decode(attr.getValue());
                if (attr.getValue().equals(decoded) == false) {
                    attr.setValue(decoded);
                }
            }
        }

        fHandler.startElement(name, attrs);
    }

    public void endElement(String name) throws IOException {
        fHandler.endElement(name);
    }

    /**
     * BQƂȂǂ̏ĂȂ񂪁û܂܁v߂܂B
     * 
     * @param argCharacters
     *            BQƂȂǂ́û܂܁vi[܂B
     */
    public void characters(final String argCharacters) throws IOException {
        fHandler.characters(decode(argCharacters));
    }

    public void comments(String argComments, int argType) throws IOException {
        fHandler.comments(argComments, argType);
    }

    protected String decode(final String argCharacters) throws IOException {
        final StringReader reader = new StringReader(argCharacters);
        final StringBuffer result = new StringBuffer();

        for (;;) {
            reader.mark(10);
            final int iRead = reader.read();
            if (iRead < 0) {
                break;
            }
            char cRead = (char) iRead;
            if (cRead == '&') {
                if (isReferenceNumber(reader)) {
                    final int number = getReferenceNumber(reader);

                    if (fValidateCharsetEncode.canEncode((char) number)) {
                        result.append((char) number);
                    } else {
                        if (number == 160) {
                            result.append("&nbsp;");
                            // TODO ̐ɂĂ`FbNB
                        } else {
                            // dȂ̂łƂ̂܂܏óB
                            result.append("&#" + number + ";");
                        }
                    }
                } else {
                    result.append(cRead);
                }
            } else {
                result.append(cRead);
            }
        }
        return result.toString();
    }

    /**
     * ŏ & ̓`FbNς݂ƂďB
     * 
     * @param reader
     * @return
     * @throws IOException
     */
    private boolean isReferenceNumber(Reader reader) throws IOException {
        // Ql: http://e-words.jp/p/r-htmlentity.html
        reader.mark(12);
        final int iRead = reader.read();
        if (iRead < 0) {
            reader.reset();
            return false;
        }
        final char cRead = (char) iRead;
        if (cRead != '#') {
            reader.reset();
            return false;
        }
        reader.reset();
        return true;
    }

    /**
     * ŏ & ̓`FbNς݂ƂďB
     * 
     * @param reader
     * @return
     * @throws IOException
     */
    private int getReferenceNumber(Reader reader) throws IOException {
        // Ql: http://e-words.jp/p/r-htmlentity.html
        final StringBuffer buf = new StringBuffer();
        reader.mark(12);
        final int iRead = reader.read();
        if (iRead < 0) {
            reader.reset();
            return -1;
        }
        final char cRead = (char) iRead;
        if (cRead != '#') {
            reader.reset();
            return -1;
        }
        for (;;) {
            final int iRead2 = reader.read();
            if (iRead2 < 0) {
                reader.reset();
                return -1;
            }
            final char cRead2 = (char) iRead2;
            if (cRead2 == ';') {
                // IB
                break;
            } else {
                buf.append(cRead2);
            }
        }

        // TODO 16i͖l
        return Integer.parseInt(buf.toString());
    }
}
