/*
 * Copyright 2009-2013 the Fess Project and the Others.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
 * either express or implied. See the License for the specific language
 * governing permissions and limitations under the License.
 */

package jp.sf.fess.robot;

import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;

import jp.sf.fess.Constants;
import jp.sf.fess.db.exentity.CrawlingConfig;
import jp.sf.fess.helper.CrawlingConfigHelper;
import jp.sf.fess.helper.CrawlingSessionHelper;

import org.apache.commons.io.IOUtils;
import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.client.solrj.response.UpdateResponse;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList;
import org.codelibs.core.util.DynamicProperties;
import org.codelibs.solr.lib.SolrGroup;
import org.codelibs.solr.lib.SolrGroupManager;
import org.codelibs.solr.lib.policy.QueryType;
import org.seasar.framework.container.SingletonS2Container;
import org.seasar.framework.util.StringUtil;
import org.seasar.robot.S2RobotThread;
import org.seasar.robot.client.S2RobotClient;
import org.seasar.robot.entity.ResponseData;
import org.seasar.robot.entity.UrlQueue;
import org.seasar.robot.log.LogType;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class FessS2RobotThread extends S2RobotThread {
    private static final Logger logger = LoggerFactory
            .getLogger(FessS2RobotThread.class);

    public int maxSolrQueryRetryCount = 5;

    public int childUrlSize = 10000;

    @Override
    protected boolean isContentUpdated(final S2RobotClient client,
            final UrlQueue urlQueue) {
        final DynamicProperties crawlerProperties = SingletonS2Container
                .getComponent("crawlerProperties");
        if (crawlerProperties.getProperty(Constants.DIFF_CRAWLING_PROPERTY,
                Constants.TRUE).equals(Constants.TRUE)) {

            log(logHelper, LogType.CHECK_LAST_MODIFIED, robotContext, urlQueue);
            final long startTime = System.currentTimeMillis();

            final CrawlingConfigHelper crawlingConfigHelper = SingletonS2Container
                    .getComponent(CrawlingConfigHelper.class);
            final CrawlingSessionHelper crawlingSessionHelper = SingletonS2Container
                    .getComponent(CrawlingSessionHelper.class);

            final CrawlingConfig crawlingConfig = crawlingConfigHelper
                    .get(robotContext.getSessionId());
            final Map<String, Object> dataMap = new HashMap<String, Object>();
            dataMap.put("url", urlQueue.getUrl());
            final List<String> browserTypeList = new ArrayList<String>();
            for (final String browserType : crawlingConfig
                    .getBrowserTypeValues()) {
                browserTypeList.add(browserType);
            }
            dataMap.put("type", browserTypeList);
            final List<String> roleTypeList = new ArrayList<String>();
            for (final String roleType : crawlingConfig.getRoleTypeValues()) {
                roleTypeList.add(roleType);
            }
            dataMap.put("role", roleTypeList);
            final String id = crawlingSessionHelper.generateId(dataMap);

            final SolrDocument solrDocument = getSolrDocument(id);
            if (solrDocument == null) {
                final Set<String> childUrlSet = getChildUrlSet(id);
                if (childUrlSet != null) {
                    synchronized (robotContext.getAccessCountLock()) {
                        //  add an url
                        storeChildUrls(
                                childUrlSet,
                                urlQueue.getUrl(),
                                urlQueue.getDepth() != null ? urlQueue
                                        .getDepth() + 1 : 1);
                    }
                }

                return true;
            }

            final String sessionId = (String) solrDocument.get("segment");
            if (StringUtil.isNotBlank(sessionId)
                    && crawlingSessionHelper.expired(sessionId)) {
                return true;
            }

            final Date lastModified = (Date) solrDocument.get("lastModified");
            if (lastModified == null) {
                return true;
            }

            ResponseData responseData = null;
            try {
                //  head method
                responseData = client.doHead(urlQueue.getUrl());
                if (responseData == null) {
                    return true;
                }

                final int httpStatusCode = responseData.getHttpStatusCode();
                if (httpStatusCode == 404) {
                    deleteSolrDocument(id);
                    final Set<String> childUrlSet = getAnchorSet(solrDocument
                            .get("anchor"));
                    if (childUrlSet != null) {
                        synchronized (robotContext.getAccessCountLock()) {
                            //  add an url
                            storeChildUrls(
                                    childUrlSet,
                                    urlQueue.getUrl(),
                                    urlQueue.getDepth() != null ? urlQueue
                                            .getDepth() + 1 : 1);
                        }
                    }

                    return false;
                } else if (responseData.getLastModified() == null) {
                    return true;
                } else if (responseData.getLastModified().getTime() <= lastModified
                        .getTime() && httpStatusCode == 200) {
                    log(logHelper, LogType.NOT_MODIFIED, robotContext, urlQueue);

                    responseData.setExecutionTime(System.currentTimeMillis()
                            - startTime);
                    responseData.setParentUrl(urlQueue.getParentUrl());
                    responseData.setSessionId(robotContext.getSessionId());
                    responseData
                            .setStatus(org.seasar.robot.Constants.NOT_MODIFIED_STATUS);
                    processResponse(urlQueue, responseData);

                    final Set<String> childUrlSet = getAnchorSet(solrDocument
                            .get("anchor"));
                    if (childUrlSet != null) {
                        synchronized (robotContext.getAccessCountLock()) {
                            //  add an url
                            storeChildUrls(
                                    childUrlSet,
                                    urlQueue.getUrl(),
                                    urlQueue.getDepth() != null ? urlQueue
                                            .getDepth() + 1 : 1);
                        }
                    }

                    return false;
                }
            } finally {
                if (responseData != null) {
                    IOUtils.closeQuietly(responseData.getResponseBody());
                }
            }
        }
        return true;
    }

    protected Set<String> getAnchorSet(final Object obj) {
        List<String> anchorList;
        if (obj instanceof String) {
            anchorList = new ArrayList<String>();
            anchorList.add(obj.toString());
        } else if (obj instanceof List<?>) {
            anchorList = (List<String>) obj;
        } else {
            return null;
        }

        if (anchorList.isEmpty()) {
            return null;
        }

        final Set<String> childUrlSet = new LinkedHashSet<String>();
        for (final String anchor : anchorList) {
            childUrlSet.add(anchor);
        }
        return childUrlSet;
    }

    protected SolrDocument getSolrDocument(final String id) {
        final SolrGroupManager solrGroupManager = SingletonS2Container
                .getComponent(SolrGroupManager.class);
        final SolrGroup solrGroup = solrGroupManager
                .getSolrGroup(QueryType.ADD);
        final SolrQuery solrQuery = new SolrQuery();
        solrQuery.setQuery("{!raw f=id v=\"" + id + "\"}");
        solrQuery.setFields("id", "lastModified", "anchor", "segment");
        for (int i = 0; i < maxSolrQueryRetryCount; i++) {
            try {
                final QueryResponse response = solrGroup.query(solrQuery);
                final SolrDocumentList docList = response.getResults();
                if (docList.isEmpty()) {
                    return null;
                }
                if (logger.isDebugEnabled()) {
                    logger.debug("Found solr documents: " + docList);
                }
                return docList.get(0);
            } catch (final Exception e) {
                logger.info("Could not get a response from Solr."
                        + " It might be busy. " + "Retrying.. id:" + id
                        + ", cause: " + e.getMessage());
            }
            try {
                Thread.sleep(500);
            } catch (final InterruptedException e) {
            }
        }
        return null;
    }

    protected Set<String> getChildUrlSet(final String id) {
        final SolrGroupManager solrGroupManager = SingletonS2Container
                .getComponent(SolrGroupManager.class);
        final SolrGroup solrGroup = solrGroupManager
                .getSolrGroup(QueryType.ADD);
        final SolrQuery solrQuery = new SolrQuery();
        solrQuery.setQuery("{!raw f=parentId v=\"" + id + "\"}");
        solrQuery.setFields("url");
        solrQuery.setRows(childUrlSize);
        for (int i = 0; i < maxSolrQueryRetryCount; i++) {
            try {
                final QueryResponse response = solrGroup.query(solrQuery);
                final SolrDocumentList docList = response.getResults();
                if (docList.isEmpty()) {
                    return null;
                }
                if (logger.isDebugEnabled()) {
                    logger.debug("Found solr documents: " + docList);
                }
                final Set<String> urlSet = new HashSet<String>(docList.size());
                for (final SolrDocument doc : docList) {
                    final Object obj = doc.get("url");
                    if (obj != null) {
                        urlSet.add(obj.toString());
                    }
                }
                return urlSet;
            } catch (final Exception e) {
                logger.info("Could not get a response from Solr."
                        + " It might be busy. " + "Retrying.. id:" + id
                        + ", cause: " + e.getMessage());
            }
            try {
                Thread.sleep(500);
            } catch (final InterruptedException e) {
            }
        }
        return null;
    }

    protected void deleteSolrDocument(final String id) {
        final SolrGroupManager solrGroupManager = SingletonS2Container
                .getComponent(SolrGroupManager.class);
        final SolrGroup solrGroup = solrGroupManager
                .getSolrGroup(QueryType.DELETE);
        for (int i = 0; i < maxSolrQueryRetryCount; i++) {
            boolean done = true;
            try {
                for (final UpdateResponse response : solrGroup.deleteById(id)) {
                    if (response.getStatus() != 200) {
                        if (logger.isDebugEnabled()) {
                            logger.debug("Failed to delete: " + response);
                        }
                        done = false;
                    }
                }
            } catch (final Exception e) {
                logger.info("Could not delete a document from Solr."
                        + " It might be busy. " + "Retrying.. id:" + id
                        + ", cause: " + e.getMessage());
                done = false;
            }
            if (done) {
                logger.info("Deleted from Solr: " + id);
                break;
            }
            try {
                Thread.sleep(500);
            } catch (final InterruptedException e) {
            }
        }
    }
}
