/*
 * Decompiled with CFR 0.152.
 */
package org.apache.tika.parser.html;

import java.io.IOException;
import java.io.InputStream;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import org.apache.tika.exception.TikaException;
import org.apache.tika.io.CloseShieldInputStream;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.parser.Parser;
import org.apache.tika.parser.html.XHTMLDowngradeHandler;
import org.apache.tika.sax.TeeContentHandler;
import org.apache.tika.sax.TextContentHandler;
import org.apache.tika.sax.WriteOutContentHandler;
import org.apache.tika.sax.XHTMLContentHandler;
import org.apache.tika.sax.xpath.Matcher;
import org.apache.tika.sax.xpath.MatchingContentHandler;
import org.apache.tika.sax.xpath.XPathParser;
import org.cyberneko.html.parsers.SAXParser;
import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;

/*
 * This class specifies class file version 49.0 but uses Java 6 signatures.  Assumed Java 6.
 */
public class HtmlParser
implements Parser {
    private static final Map<String, String> SAFE_ELEMENTS = new HashMap<String, String>();
    private static final Set<String> DISCARD_ELEMENTS = new HashSet<String>();

    public void parse(InputStream stream, ContentHandler handler, Metadata metadata, Map<String, Object> context) throws IOException, SAXException, TikaException {
        stream = new CloseShieldInputStream(stream);
        InputSource source = new InputSource(stream);
        String encoding = metadata.get("Content-Encoding");
        if (encoding != null) {
            source.setEncoding(encoding);
        }
        XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
        XPathParser xpath = new XPathParser(null, "");
        Matcher body = xpath.parse("/HTML/BODY//node()");
        Matcher title = xpath.parse("/HTML/HEAD/TITLE//node()");
        Matcher meta = xpath.parse("/HTML/HEAD/META//node()");
        handler = new TeeContentHandler(new ContentHandler[]{new MatchingContentHandler(this.getBodyHandler(xhtml), body), new MatchingContentHandler(this.getTitleHandler(metadata), title), new MatchingContentHandler(this.getMetaHandler(metadata), meta)});
        SAXParser parser = new SAXParser();
        parser.setContentHandler((ContentHandler)((Object)new XHTMLDowngradeHandler(handler)));
        parser.parse(source);
    }

    public void parse(InputStream stream, ContentHandler handler, Metadata metadata) throws IOException, SAXException, TikaException {
        Map<String, Object> context = Collections.emptyMap();
        this.parse(stream, handler, metadata, context);
    }

    private ContentHandler getTitleHandler(final Metadata metadata) {
        return new WriteOutContentHandler(){

            public void endElement(String u, String l, String n) {
                metadata.set("title", this.toString());
            }
        };
    }

    private ContentHandler getMetaHandler(final Metadata metadata) {
        return new WriteOutContentHandler(){

            public void startElement(String uri, String local, String name, Attributes atts) throws SAXException {
                if (atts.getValue("http-equiv") != null) {
                    metadata.set(atts.getValue("http-equiv"), atts.getValue("content"));
                }
                if (atts.getValue("name") != null) {
                    metadata.set(atts.getValue("name"), atts.getValue("content"));
                }
            }
        };
    }

    private ContentHandler getBodyHandler(final XHTMLContentHandler xhtml) {
        return new TextContentHandler((ContentHandler)xhtml){
            private int discardLevel;
            {
                super(x0);
                this.discardLevel = 0;
            }

            public void startElement(String uri, String local, String name, Attributes atts) throws SAXException {
                if (this.discardLevel != 0) {
                    ++this.discardLevel;
                } else if (DISCARD_ELEMENTS.contains(name)) {
                    this.discardLevel = 1;
                } else if (SAFE_ELEMENTS.containsKey(name)) {
                    xhtml.startElement((String)SAFE_ELEMENTS.get(name));
                } else if ("A".equals(name)) {
                    String href = atts.getValue("href");
                    if (href == null) {
                        href = "";
                    }
                    xhtml.startElement("a", "href", href);
                }
            }

            public void endElement(String uri, String local, String name) throws SAXException {
                if (this.discardLevel != 0) {
                    --this.discardLevel;
                } else if (SAFE_ELEMENTS.containsKey(name)) {
                    xhtml.endElement((String)SAFE_ELEMENTS.get(name));
                } else if ("A".equals(name)) {
                    xhtml.endElement("a");
                }
            }

            public void characters(char[] ch, int start, int length) throws SAXException {
                if (this.discardLevel == 0) {
                    super.characters(ch, start, length);
                }
            }

            public void ignorableWhitespace(char[] ch, int start, int length) throws SAXException {
                if (this.discardLevel == 0) {
                    super.ignorableWhitespace(ch, start, length);
                }
            }
        };
    }

    static {
        SAFE_ELEMENTS.put("P", "p");
        SAFE_ELEMENTS.put("H1", "h1");
        SAFE_ELEMENTS.put("H2", "h2");
        SAFE_ELEMENTS.put("H3", "h3");
        SAFE_ELEMENTS.put("H4", "h4");
        SAFE_ELEMENTS.put("H5", "h5");
        SAFE_ELEMENTS.put("H6", "h6");
        SAFE_ELEMENTS.put("UL", "ul");
        SAFE_ELEMENTS.put("OL", "ol");
        SAFE_ELEMENTS.put("LI", "li");
        SAFE_ELEMENTS.put("MENU", "ul");
        SAFE_ELEMENTS.put("DL", "dl");
        SAFE_ELEMENTS.put("DT", "dt");
        SAFE_ELEMENTS.put("DD", "dd");
        SAFE_ELEMENTS.put("PRE", "pre");
        SAFE_ELEMENTS.put("BLOCKQUOTE", "blockquote");
        SAFE_ELEMENTS.put("TABLE", "table");
        SAFE_ELEMENTS.put("THEAD", "thead");
        SAFE_ELEMENTS.put("TBODY", "tbody");
        SAFE_ELEMENTS.put("TR", "tr");
        SAFE_ELEMENTS.put("TH", "th");
        SAFE_ELEMENTS.put("TD", "td");
        DISCARD_ELEMENTS.add("STYLE");
        DISCARD_ELEMENTS.add("SCRIPT");
    }
}

