/*******************************************************************************
 * Copyright (c) 2008 IGA Tosiki, NTT DATA BUSINESS BRAINS Corp.
 * All rights reserved. This program and the accompanying materials
 * are made available under the terms of the Eclipse Public License v1.0
 * which accompanies this distribution, and is available at
 * http://www.eclipse.org/legal/epl-v10.html
 *
 * Contributors:
 *    IGA Tosiki (NTT DATA BUSINESS BRAINS Corp.) - initial API and implementation
 *******************************************************************************/
/*
 * blanco Framework
 * Copyright (C) 2008 NTT DATA BUSINESS BRAINS CORPORATION
 * 
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 */
package blanco.html.normalizer.util;

import java.io.BufferedWriter;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.util.List;

import junit.framework.TestCase;
import blanco.html.normalizer.parser.BlancoHtmlEventListSerializer;
import blanco.html.normalizer.parser.helper.BlancoHtmlEventParserContentHandler;
import blanco.html.normalizer.parser.valueobject.BlancoHtmlEvent;
import blanco.html.normalizer.parser.valueobject.BlancoHtmlEventEndDocument;
import blanco.html.normalizer.parser.valueobject.BlancoHtmlEventEndElement;
import blanco.html.normalizer.parser.valueobject.BlancoHtmlEventStartDocument;
import blanco.html.normalizer.parser.valueobject.BlancoHtmlEventStartElement;
import blanco.html.parser.BlancoHtmlParser;
import blanco.html.parser.BlancoHtmlParserFactory;

public class BlancoHtmlNormalizerUtilTest extends TestCase {
    public void test001() throws Exception {
        // {`B
        final String inputData = "<html>\n"
                + "<head>\n"
                + "<title>eXgf[^E^Cg</title>\n"
                + "<meta http-equiv=\"Content-Type\" content=\"text/html; charset=Windows-31J\">\n"
                + "</head>\n" + "<body>\n"
                + "<a href=http://sourceforge.jp/projects/blancofw/></a>\n"
                + "</body>\n" + "</html>";
        final byte[] inputDataBytes = inputData.getBytes("Windows-31J");

        // HTML̃GR[fBO擾B
        final BlancoHtmlParser encodingParser = BlancoHtmlParserFactory
                .getInstance();
        encodingParser.parse(inputDataBytes);

        final BlancoHtmlParser parser = BlancoHtmlParserFactory.getInstance();
        parser.setEncoding(encodingParser.getEncoding());

        final BlancoHtmlEventParserContentHandler handler = new BlancoHtmlEventParserContentHandler();
        parser.setHandler(handler);

        parser.parse(BlancoHtmlNormalizerUtil.normalize(inputDataBytes));

        final List<BlancoHtmlEvent> eventList = handler.getEventList();

        if (false) {
            new File("./tmp").mkdirs();
            new BlancoHtmlEventListSerializer().serialize(eventList,
                    new BufferedWriter(new OutputStreamWriter(
                            new FileOutputStream("./tmp/test001.html"),
                            encodingParser.getEncoding())));
        }
    }

    public void test002() throws Exception {
        // ЂÂȂI^O̓RgAEg܂B
        final String inputData = "<html>\n"
                + "<head>\n"
                + "<title>eXgf[^E^Cg</title>\n"
                + "<meta http-equiv=\"Content-Type\" content=\"text/html; charset=Windows-31J\">\n"
                + "</head>\n"
                + "<body>\n"
                + "<a href=http://sourceforge.jp/projects/blancofw/></a></font>\n"
                + "</body>\n" + "</html>";
        final String expectedData = "<!DOCTYPE PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\" \"\">\n<html>"
                + "<head>"
                + "<title>eXgf[^E^Cg</title>"
                + "<meta http-equiv=\"Content-Type\" content=\"text/html; charset=Windows-31J\">"
                + "</head>"
                + "<body>\n"
                + "<a href=\"http://sourceforge.jp/projects/blancofw/\"></a>\n"
                + "</body>" + "</html>\n\n";

        final BlancoHtmlEventParserContentHandler handler = new BlancoHtmlEventParserContentHandler();
        final byte[] inputDataBytes = inputData.getBytes("Windows-31J");
        final BlancoHtmlParser parser = BlancoHtmlParserFactory.getInstance();
        parser.setHandler(handler);

        parser.parse(BlancoHtmlNormalizerUtil.normalize(inputDataBytes));

        final List<BlancoHtmlEvent> eventList = handler.getEventList();

        assertEquals("ЂÂȂI^O̓RgAEg܂B", expectedData,
                eventList2String(eventList));
        assertEquals("ŏ startDocument Cxg -1 ̐[B", -1, eventList.get(0)
                .getDeapth());
        assertEquals("Ō endDocument Cxg  -1 ̐[B", -1, eventList.get(
                eventList.size() - 1).getDeapth());
    }

    public void test003() throws Exception {
        // Ȃ p ^ÔɁA p ^Oꂽۂɂ͎IɕB
        final String inputData = "<html>\n"
                + "<head>\n"
                + "<title>eXgf[^E^Cg</title>\n"
                + "<meta http-equiv=\"Content-Type\" content=\"text/html; charset=Windows-31J\">\n"
                + "</head>\n"
                + "<body>\n"
                + "<a href=http://sourceforge.jp/projects/blancofw/></a><p><p>\n"
                + "</body>\n" + "</html>";
        final String expectedData = "<!DOCTYPE PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\" \"\">\n<html>"
                + "<head>"
                + "<title>eXgf[^E^Cg</title>"
                + "<meta http-equiv=\"Content-Type\" content=\"text/html; charset=Windows-31J\">"
                + "</head>"
                + "<body>\n"
                + "<a href=\"http://sourceforge.jp/projects/blancofw/\"></a><p></p><p>"
                + "\n</p></body>" + "</html>\n\n";

        final BlancoHtmlEventParserContentHandler handler = new BlancoHtmlEventParserContentHandler();
        final byte[] inputDataBytes = inputData.getBytes("Windows-31J");
        final BlancoHtmlParser parser = BlancoHtmlParserFactory.getInstance();
        parser.setHandler(handler);

        parser.parse(BlancoHtmlNormalizerUtil.normalize(inputDataBytes));

        final List<BlancoHtmlEvent> eventList = handler.getEventList();

        if (false)
            System.out.println(eventList2String(eventList));
        assertEquals("Ȃ p ^ÔɁA p ^Oꂽۂɂ͎IɕB",
                expectedData, eventList2String(eventList));
        assertEquals("ŏ startDocument Cxg -1 ̐[B", -1, eventList.get(0)
                .getDeapth());
        assertEquals("Ō endDocument Cxg  -1 ̐[B", -1, eventList.get(
                eventList.size() - 1).getDeapth());
    }

    public void test004() throws Exception {
        // 邱ƂłȂ meta ^Oꂽꍇɂ͏B
        final String inputData = "<html>\n"
                + "<head>\n"
                + "<title>eXgf[^E^Cg</title>\n"
                + "<meta http-equiv=\"Content-Type\" content=\"text/html; charset=Windows-31J\"></meta>\n"
                + "</head>\n" + "<body>\n"
                + "<a href=http://sourceforge.jp/projects/blancofw/></a>\n"
                + "</body>\n" + "</html>";
        final String expectedData = "<!DOCTYPE PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\" \"\">\n<html>"
                + "<head>"
                + "<title>eXgf[^E^Cg</title>"
                + "<meta http-equiv=\"Content-Type\" content=\"text/html; charset=Windows-31J\">"
                + "</head>"
                + "<body>\n"
                + "<a href=\"http://sourceforge.jp/projects/blancofw/\"></a>\n"
                + "</body>" + "</html>\n\n";

        final BlancoHtmlEventParserContentHandler handler = new BlancoHtmlEventParserContentHandler();
        final byte[] inputDataBytes = inputData.getBytes("Windows-31J");
        final BlancoHtmlParser parser = BlancoHtmlParserFactory.getInstance();
        parser.setHandler(handler);

        parser.parse(BlancoHtmlNormalizerUtil.normalize(inputDataBytes));

        final List<BlancoHtmlEvent> eventList = handler.getEventList();

        assertEquals("邱ƂłȂ meta ^Oꂽꍇɂ͏B", expectedData,
                eventList2String(eventList));
        assertEquals("ŏ startDocument Cxg -1 ̐[B", -1, eventList.get(0)
                .getDeapth());
        assertEquals("Ō endDocument Cxg  -1 ̐[B", -1, eventList.get(
                eventList.size() - 1).getDeapth());
    }

    public void test005() throws Exception {
        // 邱ƂłȂ meta ^OȏIĂꍇB
        final String inputData = "<html>\n"
                + "<head>\n"
                + "<title>eXgf[^E^Cg</title>\n"
                + "<meta http-equiv=\"Content-Type\" content=\"text/html; charset=Windows-31J\"/>\n"
                + "</head>\n" + "<body>\n"
                + "<a href=http://sourceforge.jp/projects/blancofw/></a>\n"
                + "</body>\n" + "</html>";
        final String expectedData = "<!DOCTYPE PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\" \"\">\n<html>"
                + "<head>"
                + "<title>eXgf[^E^Cg</title>"
                + "<meta http-equiv=\"Content-Type\" content=\"text/html; charset=Windows-31J\">"
                + "</head>"
                + "<body>\n"
                + "<a href=\"http://sourceforge.jp/projects/blancofw/\"></a>\n"
                + "</body>" + "</html>\n\n";

        final BlancoHtmlEventParserContentHandler handler = new BlancoHtmlEventParserContentHandler();
        final byte[] inputDataBytes = inputData.getBytes("Windows-31J");
        final BlancoHtmlParser parser = BlancoHtmlParserFactory.getInstance();
        parser.setHandler(handler);

        parser.parse(BlancoHtmlNormalizerUtil.normalize(inputDataBytes));

        final List<BlancoHtmlEvent> eventList = handler.getEventList();

        assertEquals("邱ƂłȂ meta ^OȏIĂꍇB", expectedData,
                eventList2String(eventList));
        assertEquals("ŏ startDocument Cxg -1 ̐[B", -1, eventList.get(0)
                .getDeapth());
        assertEquals("Ō endDocument Cxg  -1 ̐[B", -1, eventList.get(
                eventList.size() - 1).getDeapth());
    }

    public void test006() throws Exception {
        // ׂ a ^OĂȂꍇB
        final String inputData = "<html>\n"
                + "<head>\n"
                + "<title>eXgf[^E^Cg</title>\n"
                + "<meta http-equiv=\"Content-Type\" content=\"text/html; charset=Windows-31J\">\n"
                + "</head>\n" + "<body>\n"
                + "<a href=http://sourceforge.jp/projects/blancofw/>\n"
                + "</body>\n" + "</html>";
        final String expectedData = "<!DOCTYPE PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\" \"\">\n<html>"
                + "<head>"
                + "<title>eXgf[^E^Cg</title>"
                + "<meta http-equiv=\"Content-Type\" content=\"text/html; charset=Windows-31J\">"
                + "</head>"
                + "<body>\n"
                + "<a href=\"http://sourceforge.jp/projects/blancofw/\">\n</a>"
                + "</body>" + "</html>\n\n";

        final BlancoHtmlEventParserContentHandler handler = new BlancoHtmlEventParserContentHandler();
        final byte[] inputDataBytes = inputData.getBytes("Windows-31J");
        final BlancoHtmlParser parser = BlancoHtmlParserFactory.getInstance();
        parser.setHandler(handler);

        parser.parse(BlancoHtmlNormalizerUtil.normalize(inputDataBytes));

        final List<BlancoHtmlEvent> eventList = handler.getEventList();

        if (false)
            System.out.println(eventList2String(eventList));

        if (false)
            for (BlancoHtmlEvent event : eventList) {
                if (event instanceof BlancoHtmlEventStartDocument) {
                    System.out.println("startDocument:" + event.getDeapth());
                } else if (event instanceof BlancoHtmlEventEndDocument) {
                    System.out.println("endDocument:" + event.getDeapth());
                } else if (event instanceof BlancoHtmlEventStartElement) {
                    System.out.println(((BlancoHtmlEventStartElement) event)
                            .getName()
                            + ":" + event.getDeapth());
                } else if (event instanceof BlancoHtmlEventEndElement) {
                    System.out.println(((BlancoHtmlEventEndElement) event)
                            .getName()
                            + ":" + event.getDeapth());
                }
            }

        assertEquals("ׂ a ^OĂȂꍇB", expectedData,
                eventList2String(eventList));
        assertEquals("ŏ startDocument Cxg -1 ̐[B", -1, eventList.get(0)
                .getDeapth());
        assertEquals("Ō endDocument Cxg  -1 ̐[B", -1, eventList.get(
                eventList.size() - 1).getDeapth());

    }

    public void test007() throws Exception {
        // ̓{
        final String inputData = "<html>\n"
                + "<head>\n"
                + "<title>eXgf[^E^Cg</title>\n"
                + "<meta http-equiv=\"Content-Type\" content=\"text/html; charset=Windows-31J\">\n"
                + "<meta name=\"description\" content=\"{ꑮ̃eXgB\">"
                + "</head>\n" + "<body>\n"
                + "<a href=http://sourceforge.jp/projects/blancofw/>\n"
                + "</body>\n" + "</html>";
        final String expectedData = "<!DOCTYPE PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\" \"\">\n<html>"
                + "<head>"
                + "<title>eXgf[^E^Cg</title>"
                + "<meta http-equiv=\"Content-Type\" content=\"text/html; charset=Windows-31J\"><meta name=\"description\" content=\"{ꑮ̃eXgB\">"
                + "</head>"
                + "<body>\n"
                + "<a href=\"http://sourceforge.jp/projects/blancofw/\">\n</a>"
                + "</body>" + "</html>\n\n";

        final BlancoHtmlEventParserContentHandler handler = new BlancoHtmlEventParserContentHandler();
        final byte[] inputDataBytes = inputData.getBytes("Windows-31J");
        final BlancoHtmlParser parser = BlancoHtmlParserFactory.getInstance();
        parser.setHandler(handler);

        parser.parse(BlancoHtmlNormalizerUtil.normalize(inputDataBytes));

        final List<BlancoHtmlEvent> eventList = handler.getEventList();

        assertEquals("̓{ꂪ`ȂꍇB", expectedData,
                eventList2String(eventList));
    }

    public void test008() throws Exception {
        // Rg̓`B`FbNB
        final String inputData = "<html>\n"
                + "<head>\n"
                + "<title>eXgf[^E^Cg</title>\n"
                + "<meta http-equiv=\"Content-Type\" content=\"text/html; charset=Windows-31J\">\n"
                + "<!-- Rg -->" + "</head>\n" + "<body>\n"
                + "<a href=http://sourceforge.jp/projects/blancofw/>\n"
                + "</body>\n" + "</html>";
        final String expectedData = "<!DOCTYPE PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\" \"\">\n<html>"
                + "<head>"
                + "<title>eXgf[^E^Cg</title>"
                + "<meta http-equiv=\"Content-Type\" content=\"text/html; charset=Windows-31J\">"
                + "<!-- Rg -->"
                + "</head>"
                + "<body>\n"
                + "<a href=\"http://sourceforge.jp/projects/blancofw/\">\n</a>"
                + "</body>" + "</html>\n\n";

        final BlancoHtmlEventParserContentHandler handler = new BlancoHtmlEventParserContentHandler();
        final byte[] inputDataBytes = inputData.getBytes("Windows-31J");
        final BlancoHtmlParser parser = BlancoHtmlParserFactory.getInstance();
        parser.setHandler(handler);

        parser.parse(BlancoHtmlNormalizerUtil.normalize(inputDataBytes));

        final List<BlancoHtmlEvent> eventList = handler.getEventList();

        assertEquals("̓{ꂪ`ȂꍇB", expectedData,
                eventList2String(eventList));
    }

    public static String eventList2String(final List<BlancoHtmlEvent> eventList)
            throws IOException {
        final ByteArrayOutputStream outStream = new ByteArrayOutputStream();
        new BlancoHtmlEventListSerializer().serialize(eventList,
                new BufferedWriter(new OutputStreamWriter(outStream,
                        "Windows-31J")));
        return new String(outStream.toByteArray(), "Windows-31J");
    }
}
