/*
 * Copyright (C) 2014 kgto.
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
 * MA 02110-1301  USA
 */
/*
 * $Id: HtmlParserCallback.java 80 2014-09-19 09:34:53Z tuna_p $
 */

package WebScraping;

import java.util.ArrayList;
import java.util.HashMap;
import javax.swing.text.MutableAttributeSet;
import javax.swing.text.html.HTML;
import javax.swing.text.html.HTMLEditorKit;

/**
 * ＨＴＭＬパーサ部品.
 * @author kgto
 */
class HtmlParserCallback extends HTMLEditorKit.ParserCallback {

    // Tag毎の階層
    HashMap<HTML.Tag,Integer> tagMap = new HashMap<>();
    
    // serach key 情報
    String keytag;
    String keyid;
    String keyclass;
    
    // serach key と一致時の情報退避
    int bufCount = 0;
    HTML.Tag bufTag = null;
    // serach key と一致時の情報格納ワーク
    StringBuilder bufText;
    
    // serach key と一致時のデータ一覧
    ArrayList sData;
    
    // 属性データ
    AttributeData attrdata;
    
    protected HtmlParserCallback(SearchData skey) {

        // キー情報展開
        keytag   = skey.getHtmltag();
        keyid    = skey.getHtmlid();
        keyclass = skey.getHtmlclass();
        
        sData    = new ArrayList();
    }
    
    ArrayList getrtnData() {
        return this.sData;
    }
    
    @Override
    public void handleStartTag(HTML.Tag tag, MutableAttributeSet attr, int pos){
        // Tag毎の階層を保持
        int count = 1;
        if(tagMap.containsKey(tag)) {
            count = tagMap.get(tag);
            count++;
        }
        tagMap.put(tag, count);

        // 属性解析
        AttributeData handleStartattrdata = new AttributeData();
        handleStartattrdata.add(tag, attr);
        
        DebugProcess.htmlinfo(tag, attr, "handleStartTag", count);
        
        if(bufCount == 0) {
            if(tag.toString().equals(keytag)) {
                //if(serachAttribute(attr)) {
                if(serachAttribute(tag, handleStartattrdata)) {    
                    bufCount = count;
                    bufTag   = tag;
                    attrdata = new AttributeData();
                    bufText  = new StringBuilder();
                }
            }
        }
        if(bufCount > 0) {
            attrdata.add(tag, attr);
        }
    }

    @Override
    public void handleEndTag(HTML.Tag tag, int pos){
        // Tag毎の階層を取得
        int count = 0;
        if(tagMap.containsKey(tag)) {
            count = tagMap.get(tag);
        }

        DebugProcess.htmlinfo(tag, null, "handleEndTag", count);
        
        if(tag.equals(bufTag) && count <= bufCount) {
            
            // 溜め込んだ一致情報をリストへ格納
            sData.add(bufText.toString());
            
            // 退避したserach keyとの一致情報クリア
            bufCount = 0;
            bufTag   = null;
            bufText  = null;
        }

        // Tag毎の階層減算
        tagMap.put(tag, --count);
    }

    @Override
    public void handleText(char[] data, int pos){
        
        DebugProcess.htmlinfo(data, "handleText");
        
        String splitchar = "\t";
        //制御文字の削除
        // &nbsp; 0xa0
        StringBuilder buf = new StringBuilder();
        for(int i = 0; i < data.length; i++) {
            if(data[i] > 0x1f && data[i] != 0x7f && data[i] != 0xa0) {
                buf.append(data[i]);
            }
        }
        if(bufCount > 0) {
            if(bufText.length() > 0) {
                bufText.append(splitchar);
            }
            bufText.append(buf.toString());
        }
    }

    @Override
    public void handleSimpleTag(HTML.Tag tag, MutableAttributeSet attr, int pos){
        if(bufCount > 0) {
            attrdata.add(tag, attr);
        }
        DebugProcess.htmlinfo(tag, attr, "handleSimpleTag", 0);
    }
    
    /**
     * ページ内のID/CLASS値と検索キーを比較する.
     * @param attr ページのMutableAttributeSet
     * @return boolean 検索キーと一致の時、true
     */
    boolean serachAttribute(MutableAttributeSet attr) {
        String currentID    = (String)attr.getAttribute(HTML.Attribute.ID);
        String currentClass = (String)attr.getAttribute(HTML.Attribute.CLASS);
        
        if(keyid.isEmpty() == false && keyclass.isEmpty() == false) {
            if(keyid.equals(currentID) && keyclass.equals(currentClass)) {
                return true;
            }
        }
        
        if(keyid.isEmpty() == false) {
            if(keyid.equals(currentID)) {
                return true;
            }
        }
        
        if(keyclass.isEmpty() == false) {
            if(keyclass.equals(currentClass)) {
                return true;
            }
        }
        
        return false;
    }

    /**
     * ページ内のID/CLASS値と検索キーを比較する.
     * @param tag
     * @param attrdata
     * @return boolean 検索キーと一致の時、true
     */
    boolean serachAttribute(HTML.Tag tag, AttributeData attrdata) {
        // ID と CLASS の両方にキー入力有りの場合
        if(keyid.isEmpty() == false && keyclass.isEmpty() == false) {
            if(attrdata.searchId(tag, keyid) && attrdata.searchClass(tag, keyclass)) {
                return true;
            }
        }
        // ID のキーチェック
        if(keyid.isEmpty() == false) {
            return attrdata.searchId(tag, keyid);
        }
        // CLASS のキーチェック
        if(keyclass.isEmpty() == false) {
            return attrdata.searchClass(tag, keyclass);
        }
        return false;
    }
}
