package com.limegroup.gnutella.spam;

import java.util.Locale;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

import com.limegroup.gnutella.RemoteFileDesc;
import com.limegroup.gnutella.messages.QueryRequest;
import com.limegroup.gnutella.settings.SearchSettings;

public class SpamManager {
	private static final Log LOG = LogFactory.getLog(SpamManager.class);

	/**
	 * If an RFDs spam rating is > MAX_THRESHOLD we will not remember the rating
	 * for the Tokens of the RFD because it e.g. a spammer very frequently
	 * sending a bad UrnToken with varying keywords, sizes and addresses may be
	 * able to pollute the filter data
	 */
	public static final float MAX_THRESHOLD = 0.995f;

	/**
	 * inverse rating (1 - probability) for an RFD without SHA1 urn. 
	 */
	private static final float NO_SHA1_URN_RATING = 0.5f;

	/**
	 * incomplete file... save the user the trouble of downloading it, if he has
	 * his spam filter enabled
	 */
	private static final float INCOMPLETE_FILE_RATING = 0.2f;

	private static final SpamManager INSTANCE = new SpamManager();
    
	public static SpamManager instance() {
		return INSTANCE;
	}
    
	private SpamManager() {
	}

	/**
	 * informs the SpamManager of any query that was started and clears bad
	 * ratings for the keywords in the query
	 * 
	 * @param qr
	 *            the QueryRequest for the query.
	 */
	public void startedQuery(QueryRequest qr) {
		if (SearchSettings.ENABLE_SPAM_FILTER.getValue())
			RatingTable.instance().mark(qr, Token.RATING_CLEARED);
	}

	/**
	 * This method will rate a given rfd and return whether or not the
	 * SpamManager believes this is spam
	 * 
	 * @param rfd
	 *            the RemoteFileDesc to rate
	 * @modifies rfd
	 * @return true if the SpamManager internally rated it as spam and false if
	 *         the SpamManager did not rate it as spam
	 */
	public boolean isSpam(RemoteFileDesc rfd) {
		if (!SearchSettings.ENABLE_SPAM_FILTER.getValue())
			return false;

		// rate simple spam...
		float rating = 0.f;
		if (rfd.getSHA1Urn() == null && 
                rfd.getXMLDocument() != null &&
                rfd.getXMLDocument().getAction().length() == 0)
			rating = 1 - (1 - rating) * NO_SHA1_URN_RATING;
        
		if (isIncompleteFile(rfd.getFileName().toLowerCase(Locale.US))) {
			rating = 1 - (1 - rating) * INCOMPLETE_FILE_RATING;
		}

		// apply bayesian filter
		rating = 1 - (1 - rating) * (1 - RatingTable.instance().getRating(rfd));
		rfd.setSpamRating(rating);
		return rating >= Math.max(SearchSettings.FILTER_SPAM_RESULTS.getValue(),
                SearchSettings.QUERY_SPAM_CUTOFF.getValue());
	}

	/**
	 * this method is called if the user marked some RFDs as being spam
	 * 
	 * @param rfds
	 *            an array of RemoteFileDesc that should be marked as good
	 */
	public void handleUserMarkedSpam(RemoteFileDesc[] rfds) {
		for (int i = 0; i < rfds.length; i++)
			rfds[i].setSpamRating(1.f);

		RatingTable.instance().mark(rfds, Token.RATING_USER_MARKED_SPAM);
	}

	/**
	 * this method is called if the user marked some RFDs as not being spam
	 * 
	 * @param rfds
	 *            an array of RemoteFileDesc that should be marked as good
	 */
	public void handleUserMarkedGood(RemoteFileDesc[] rfds) {
		for (int i = 0; i < rfds.length; i++)
			rfds[i].setSpamRating(0.f);

		RatingTable.instance().mark(rfds, Token.RATING_USER_MARKED_GOOD);
	}

	/**
	 * clears all collected filter data
	 */
	public void clearFilterData() {
		RatingTable.instance().clear();
	}
    
	/**
	 * look for
	 * <ul>
	 * <li>__INCOMPLETE</li>
	 * <li>___ARESTRA</li>
	 * <li>___INCOMPLETED</li>
	 * <li>PREVIEW-T-</li>
	 * <li>CORRUPT-(number)-</li>
	 * <li>T-(number)-</li>
	 * 
	 * @param name
	 *            the name of the file from a search result
	 * @return true if we think that this is an incomplete file
	 */
	private boolean isIncompleteFile(String name) {
		if (name.startsWith("__incomplete"))
			return true;
		if (name.startsWith("___incompleted"))
			return true;
		if (name.startsWith("___arestra"))
			return true;
		if (name.startsWith("preview-t-"))
			return true;
		if (name.startsWith("t-")) {
			for (int i = 2; i < name.length(); i++) {
				if (Character.isDigit(name.charAt(i)))
					continue;
				else
					return name.charAt(i) == '-';
			}
		}
		if (name.startsWith("corrupt-")) {
			for (int i = 8; i < name.length(); i++) {
				if (Character.isDigit(name.charAt(i)))
					continue;
				else
					return name.charAt(i) == '-';
			}
		}
		return false;
	}
}
