/*
 * Copyright 2009-2009 the Fess Project and the Others.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
 * either express or implied. See the License for the specific language
 * governing permissions and limitations under the License.
 */
package jp.sf.fess.helper;

import java.io.Serializable;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import javax.annotation.Resource;

import jp.sf.fess.Constants;
import jp.sf.fess.db.exentity.FileCrawlingConfig;
import jp.sf.fess.interval.FessIntervalController;
import jp.sf.fess.service.FileCrawlingConfigService;
import jp.sf.fess.solr.IndexUpdater;
import jp.sf.fess.solr.SolrServerGroup;
import jp.sf.fess.util.FessProperties;

import org.seasar.framework.container.SingletonS2Container;
import org.seasar.framework.util.StringUtil;
import org.seasar.robot.S2Robot;
import org.seasar.robot.S2RobotContext;
import org.seasar.robot.db.exbhv.AccessResultBhv;
import org.seasar.robot.service.DataService;
import org.seasar.robot.service.UrlQueueService;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class FileSystemIndexHelper implements Serializable {

    private static final long serialVersionUID = 1L;

    private static final Logger logger = LoggerFactory
            .getLogger(FileSystemIndexHelper.class);

    @Resource
    protected FessProperties solrServerProperties;

    @Resource
    protected FessProperties crawlerProperties;

    @Resource
    protected DataService dataService;

    @Resource
    protected UrlQueueService urlQueueService;

    @Resource
    protected FileCrawlingConfigService fileCrawlingConfigService;

    @Resource
    protected CrawlingConfigHelper crawlingConfigHelper;

    @Resource
    protected AccessResultBhv accessResultBhv;

    public long maxAccessCount = 100000;

    public long crawlingExecutionInterval = Constants.DEFAULT_CRAWLING_EXECUTION_INTERVAL;

    public void crawl(String sessionId, SolrServerGroup solrServerGroup) {
        List<FileCrawlingConfig> configList = fileCrawlingConfigService
                .getAllFileCrawlingConfigList();

        if (configList.isEmpty()) {
            // nothing
            if (logger.isInfoEnabled()) {
                logger.info("No crawling target urls.");
            }
            return;
        }

        int multiprocessCrawlingCount = 5;
        String value = crawlerProperties.getProperty(
                Constants.CRAWLING_THREAD_COUNT_PROPERTY, "5");
        try {
            multiprocessCrawlingCount = Integer.parseInt(value);
        } catch (NumberFormatException e) {
            // NOP
        }

        long commitPerCount = Constants.DEFAULT_COMMIT_PER_COUNT;
        value = crawlerProperties.getProperty(
                Constants.COMMIT_PER_COUNT_PROPERTY, Long
                        .toString(Constants.DEFAULT_COMMIT_PER_COUNT));
        try {
            commitPerCount = Long.parseLong(value);
        } catch (NumberFormatException e) {
            // NOP
        }

        long startTime = System.currentTimeMillis();

        int count = 0;
        List<String> sessionIdList = new ArrayList<String>();
        List<S2Robot> s2RobotList = new ArrayList<S2Robot>();
        for (FileCrawlingConfig fileCrawlingConfig : configList) {
            count++;
            String sid = sessionId + "-" + count;

            crawlingConfigHelper.setCrawlingConfig(sid, fileCrawlingConfig);

            // create s2robot
            S2Robot s2Robot = SingletonS2Container.getComponent(S2Robot.class);
            s2Robot.setSessionId(sid);
            sessionIdList.add(sid);

            String pathsStr = fileCrawlingConfig.getPaths();
            if (StringUtil.isBlank(pathsStr)) {
                logger.warn("No target uris. Skipped");
                break;
            }

            int intervalTime = fileCrawlingConfig.getIntervalTime() != null ? fileCrawlingConfig
                    .getIntervalTime()
                    : Constants.DEFAULT_INTERVAL_TIME_FOR_FS;
            ((FessIntervalController) s2Robot.getIntervalController())
                    .setDelayMillisForWaitingNewUrl(intervalTime);

            String includedPathsStr = fileCrawlingConfig.getIncludedPaths();
            String excludedPathsStr = fileCrawlingConfig.getExcludedPaths();

            S2RobotContext robotContext = s2Robot.getRobotContext();
            int numOfThread = fileCrawlingConfig.getNumOfThread() != null ? fileCrawlingConfig
                    .getNumOfThread()
                    : Constants.DEFAULT_NUM_OF_THREAD_FOR_FS;
            robotContext.setNumOfThread(numOfThread);

            int depth = fileCrawlingConfig.getDepth() != null ? fileCrawlingConfig
                    .getDepth()
                    : -1;
            robotContext.setMaxDepth(depth);

            long maxCount = fileCrawlingConfig.getMaxAccessCount() != null ? fileCrawlingConfig
                    .getMaxAccessCount()
                    : maxAccessCount;
            robotContext.setMaxAccessCount(maxCount);

            // set paths
            String[] paths = pathsStr.split("[\r\n]");
            for (String u : paths) {
                if (StringUtil.isNotBlank(u)) {
                    u = u.trim();
                    if (!u.startsWith("file:")) {
                        if (u.startsWith("/")) {
                            u = "file:" + u;
                        } else {
                            u = "file:/" + u;
                        }
                    }
                    s2Robot.addUrl(u);
                }
            }

            // set included paths
            String[] includedPaths = includedPathsStr.split("[\r\n]");
            for (String u : includedPaths) {
                if (StringUtil.isNotBlank(u)) {
                    s2Robot.addIncludeFilter(u.trim());
                }
            }

            // set excluded paths
            String[] excludedPaths = excludedPathsStr.split("[\r\n]");
            for (String u : excludedPaths) {
                if (StringUtil.isNotBlank(u)) {
                    s2Robot.addExcludeFilter(u.trim());
                }
            }

            if (logger.isDebugEnabled()) {
                logger.debug("Crawling " + pathsStr);
            }

            s2Robot.setBackground(true);

            s2RobotList.add(s2Robot);

        }

        // run index update
        IndexUpdater indexUpdater = SingletonS2Container
                .getComponent("indexUpdater");
        indexUpdater.setSessionIdList(sessionIdList);
        indexUpdater.setSolrServerGroup(solrServerGroup);
        indexUpdater.setDaemon(true);
        indexUpdater.setCommitPerCount(commitPerCount);
        indexUpdater.start();

        int startedCrawlerNum = 0;
        int activeCrawlerNum = 0;
        while (startedCrawlerNum < s2RobotList.size()) {
            if (activeCrawlerNum < multiprocessCrawlingCount) {
                // start crawling
                s2RobotList.get(startedCrawlerNum).execute();
                startedCrawlerNum++;
                activeCrawlerNum++;
                try {
                    Thread.sleep(crawlingExecutionInterval);
                } catch (InterruptedException e) {
                    // NOP
                }
                continue;
            }

            // check status
            for (int i = 0; i < startedCrawlerNum; i++) {
                if (!s2RobotList.get(i).getRobotContext().isRunning()) {
                    activeCrawlerNum--;
                }
            }
            try {
                Thread.sleep(crawlingExecutionInterval);
            } catch (InterruptedException e) {
                // NOP
            }
        }

        for (S2Robot s2Robot : s2RobotList) {
            s2Robot.awaitTermination();
        }

        Map<String, String> infoMap = new HashMap<String, String>();

        long execTime = System.currentTimeMillis() - startTime;
        infoMap.put(Constants.FS_CRAWLING_EXEC_TIME, Long.toString(execTime));
        if (logger.isInfoEnabled()) {
            logger.info("[EXEC TIME] crawling time: " + execTime + "ms");
        }

        indexUpdater.setFinishCrawling(true);
        try {
            indexUpdater.join();
        } catch (InterruptedException e) {
            logger.warn("Interrupted index update.", e);
        }

        infoMap.put(Constants.FS_INDEX_EXEC_TIME, Long.toString(indexUpdater
                .getExecuteTime()));
        infoMap.put(Constants.FS_INDEX_SIZE, Long.toString(indexUpdater
                .getDocumentSize()));

        // store info map
        CrawlingSessionHelper crawlingSessionHelper = SingletonS2Container
                .getComponent("crawlingSessionHelper");
        crawlingSessionHelper.put(sessionId, infoMap);

        for (String sid : sessionIdList) {
            // remove config
            crawlingConfigHelper.setCrawlingConfig(sid, null);
        }

        // clear queue
        urlQueueService.deleteAll();

        // clear
        dataService.deleteAll();

    }

}
