package com.limegroup.gnutella.downloader;

import java.io.IOException;
import java.io.ObjectInputStream;
import java.io.Serializable;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.StringTokenizer;
import java.util.Vector;

import org.xml.sax.SAXException;

import com.limegroup.gnutella.FileManager;
import com.limegroup.gnutella.MediaType;
import com.limegroup.gnutella.RemoteFileDesc;
import com.limegroup.gnutella.ResponseVerifier;
import com.limegroup.gnutella.util.ApproximateMatcher;
import com.limegroup.gnutella.xml.LimeXMLDocument;
import com.limegroup.gnutella.xml.SchemaNotFoundException;

/** 
 * Encapsulates important details about an auto download.  Serializable for 
 * downloads.dat file; be careful when modifying!
 */
public class AutoDownloadDetails implements Serializable {
    static final long serialVersionUID = 3400666689236195243L;

    // the query associated with this search
    private String query = null;
    // the rich query associated with this search
    private String richQuery = null;
    // the LimeXMLDocument of this rich query ... 
    // initialized when needed.
    private transient LimeXMLDocument xmlDoc = null;
    // flag of whether or not we've tried to create the doc.
    private transient boolean xmlCreated = false;
    // the 'filter' associated with this search
    private transient MediaType type = null;
    // the GUID associated with this search
    private byte[] guid = null;
    // the list of downloads made so far - should not exceed size
    // MAX_DOWNLOADS
    private List /* of RemoteFileDesc */ dlList = null;
    
    /**
     * The description of the media type.
     */
    private String mediaDesc;
    
    /** the size of the approx matcher 2d buffer...
     */
    private static final int MATCHER_BUF_SIZE = 120;
    /** this is used for matching of filenames.  kind of big so we only want
     *  one.
     */
    private static ApproximateMatcher matcher = 
        new ApproximateMatcher(MATCHER_BUF_SIZE);
    
    /** the precision that the matcher uses for comparing candidates to RFDs
     *  that have already been accepted for download....
     */
    private float MATCH_PRECISION_DL = .30f;

    /** the percentage of matching that invalidates a new file from being
     *  downloaded.  in other words, if a file matches on more than ~51% of
     *  words, then don't download it.
     */
    private float WORD_INCIDENCE_RATE = .509999f;

    /** what is considered to be a low score, compared to the return value of
     *  the score method...
     */
    private int LOW_SCORE = 95;


    /** the set of words that are already being downloaded.  this can be used
     *  as a heuristic when determining what to download.....
     */
    private Set wordSet = null;

    static {
        matcher.setIgnoreCase(true);
        matcher.setIgnoreWhitespace(true);
        matcher.setCompareBackwards(true);
    }
    
    
    // don't auto dl any more than this number of files....
    public static final int MAX_DOWNLOADS = 1;
    
    // keeps track of committed downloads....
    private int committedDLs = 0;

    /**
     * @param inQuery the standard query string associated with this query.
     * @param inRichQuery the rich query associated with this string.
     * @param inType the mediatype associated with this string.....
     */
    public AutoDownloadDetails(String inQuery, String inRichQuery, 
                               byte[] inGuid, MediaType inType) {
        query = inQuery;
        richQuery = inRichQuery;
        type = inType;
        if(type != null)
            mediaDesc = type.getMimeType();
        else
            mediaDesc = null;
        guid = inGuid;
        dlList = new Vector();
        wordSet = new HashSet();
    }
    
    /**
     * Extended to set the media type.
     */
    private void readObject(ObjectInputStream stream) throws IOException,
                                                    ClassNotFoundException {
        stream.defaultReadObject();

        if(mediaDesc == null)
            type = MediaType.getAnyTypeMediaType();
        else
            type = MediaType.getMediaTypeForSchema(mediaDesc);
        if(type == null)
            type = MediaType.getAnyTypeMediaType();
    }
    
    public String getQuery() {
        return query;
    }
    
    public String getRichQuery() {
        return richQuery;
    }
    
    public MediaType getMediaType() {
        return type;
    }

    /**
     * @param toAdd The RFD you are TRYING to add.
     * @return Whether or not the add was successful. 
     */
    public synchronized boolean addDownload(RemoteFileDesc toAdd) {
        debug("ADD.addDownload(): *-----------");
        debug("ADD.addDownload(): entered.");
        // this is used not only as a return value but to control processing.
        // if it every turns false we just stop processing....
        boolean retVal = true;
        
        // if this hasn't become expired....
        if (!expired()) {
            final String inputFileName = toAdd.getFileName();

            // make sure the file ext is legit....
            if ((type != null) && !(type.matches(inputFileName))) {
                retVal = false;
                debug("ADD.addDownload(): file " +
                      inputFileName + " isn't the right type.");
            }

            // create our xml doc if we need to...
            if( !xmlCreated ) {
                xmlCreated = true;
                if( richQuery != null && !richQuery.equals("") ) {
                    try {
                        xmlDoc = new LimeXMLDocument(richQuery);
                    } catch(SchemaNotFoundException ignored) {
                    } catch(SAXException ignored) {
                    } catch(IOException ignored) {
                    }
                }
            }
            // make sure the score for this file isn't too low....
            int score = ResponseVerifier.score(query, xmlDoc, toAdd);
            if (score < LOW_SCORE) {
                retVal = false;
                debug("ADD.addDownload(): file " +
                      inputFileName + " has low score of " + score);
            }

            // check to see there is a high incidence of words here in stuff we
            // are already downloading....
            if (retVal && (wordSet.size() > 0)) {
                StringTokenizer st = 
                new StringTokenizer(ripExtension(inputFileName),
                                    FileManager.DELIMITERS);
                int additions = 0;
                final int numTokens = st.countTokens();
                while (st.hasMoreTokens()) {
                    String currToken = st.nextToken().toLowerCase();
                    debug("ADD.addDownload(): currToken = " +
                          currToken);
                    if (!wordSet.contains(currToken)) 
                        additions++;
                }
                float matchRate = 
                ((float)(numTokens - additions)/
                 (float)wordSet.size());
                if ((additions == 0) || 
                    (matchRate > WORD_INCIDENCE_RATE)) {
                    retVal = false;
                    debug("ADD.addDownload(): file " +
                          inputFileName + " has many elements similar to" +
                          " other files. matchRate = " + matchRate + 
                          ", additions = " + additions);
                }
            }

            // see if it compares to any other file already being DLed....
            if (retVal && (dlList.size() > 0)) {
                String processedFileName;
                synchronized (matcher) {
                    processedFileName = matcher.process(inputFileName);
                }
                for (int i = 0; i < dlList.size(); i++) {
                    RemoteFileDesc currRFD = (RemoteFileDesc) dlList.get(i);
                    String currFileName = currRFD.getFileName();
                    String currProcessedFileName;
                    int diffs = 0;
                    synchronized (matcher) {
                        currProcessedFileName = matcher.process(currFileName);
                        diffs = matcher.match(processedFileName,
                                              currProcessedFileName);
                    }
                    int smaller = Math.min(processedFileName.length(),
                                           currProcessedFileName.length());
                    if (((float)diffs)/((float)smaller) < MATCH_PRECISION_DL) {
                        retVal = false;
                        debug("ADD.addDownload(): conflict for file " +
                              inputFileName + " and " + currFileName);
                    }

                    // oops, we have already accepted that file for DL, don't
                    // add it and break out of this costly loop....
                    if (!retVal)
                        break;
                }
            }

            // ok, all processing passed, add this...
            if (retVal) {
                // used by the approx. matcher...
                dlList.add(toAdd);
                // used by my hashset comparator....
                StringTokenizer st = 
                new StringTokenizer(ripExtension(inputFileName),
                                    FileManager.DELIMITERS);
                while (st.hasMoreTokens())
                    wordSet.add(st.nextToken().toLowerCase());
                debug("ADD.addDownload(): wordSet = " + wordSet);
            }
        }
        else 
            retVal = false;

        debug("ADD.addDownload(): returning " + retVal);        
        debug("ADD.addDownload(): -----------*");
        return retVal;
    }

    /** Removes the input RFD from the list.  Use this if the DL failed and
     *  you want to back it out....
     */
    public synchronized void removeDownload(RemoteFileDesc toRemove) {
        // used by the approx. matcher...
        dlList.remove(toRemove);
        // used by the hashset comparator....
        // technically, this is bad.  i'm doing it because in practice this will
        // decrease the amount of downloads, which isn't horrible.  also, i
        // don't see a download being removed very frequently.  if i want i can
        // move to a new set which keeps a count for each element of the set and
        // only discards after the appropriate amt. of removes....
        StringTokenizer st = 
        new StringTokenizer(ripExtension(toRemove.getFileName()),
                            FileManager.DELIMITERS);
        while (st.hasMoreTokens())
            wordSet.remove(st.nextToken().toLowerCase());
        
    }

    /** Call this when the DL was 'successful'.
     */
    public synchronized void commitDownload(RemoteFileDesc toCommit) {
        if (dlList.contains(toCommit))
            committedDLs++;
    }

    /** @return true when the AutoDownload process is complete.
     */
    public synchronized boolean expired() {
        boolean retVal = false;
        if (committedDLs >= MAX_DOWNLOADS)
            retVal = true;
        return retVal;
    }


    // take the extension off the filename...
    private String ripExtension(String fileName) {
        String retString = null;
        int extStart = fileName.lastIndexOf('.');
        if (extStart == -1)
            retString = fileName;
        else
            retString = fileName.substring(0, extStart);
        return retString;
    }

    private static final boolean debugOn = false;
    private static void debug(String out) {
        if (debugOn)
            System.out.println(out);
    }
    private static void debug(Exception e) {
        if (debugOn)
            e.printStackTrace();
    }
    
}


