/*
 * Copyright 2009-2011 the Fess Project and the Others.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
 * either express or implied. See the License for the specific language
 * governing permissions and limitations under the License.
 */

package jp.sf.fess.exec;

import java.io.Serializable;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import javax.annotation.Resource;
import javax.servlet.ServletContext;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;

import jp.sf.fess.Constants;
import jp.sf.fess.db.allcommon.CDef;
import jp.sf.fess.helper.CrawlingSessionHelper;
import jp.sf.fess.helper.DataIndexHelper;
import jp.sf.fess.helper.DatabaseHelper;
import jp.sf.fess.helper.FileSystemIndexHelper;
import jp.sf.fess.helper.MailHelper;
import jp.sf.fess.helper.OverlappingHostHelper;
import jp.sf.fess.helper.PathMappingHelper;
import jp.sf.fess.helper.SystemHelper;
import jp.sf.fess.helper.WebIndexHelper;
import jp.sf.fess.service.CrawlingSessionService;
import jp.sf.fess.service.PathMappingService;
import jp.sf.fess.solr.SolrServerGroup;
import jp.sf.fess.solr.SolrServerManager;
import jp.sf.fess.util.FessProperties;

import org.apache.commons.lang.time.DateUtils;
import org.kohsuke.args4j.CmdLineException;
import org.kohsuke.args4j.CmdLineParser;
import org.kohsuke.args4j.Option;
import org.mobylet.core.launcher.MobyletLauncher;
import org.seasar.framework.container.ExternalContext;
import org.seasar.framework.container.S2Container;
import org.seasar.framework.container.SingletonS2Container;
import org.seasar.framework.container.factory.SingletonS2ContainerFactory;
import org.seasar.framework.container.servlet.SingletonS2ContainerInitializer;
import org.seasar.framework.mock.servlet.MockHttpServletRequestImpl;
import org.seasar.framework.mock.servlet.MockHttpServletResponseImpl;
import org.seasar.framework.mock.servlet.MockServletContextImpl;
import org.seasar.framework.util.StringUtil;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class Crawler implements Serializable {

    private static final long serialVersionUID = 1L;

    private static final Logger logger = LoggerFactory.getLogger(Crawler.class);

    @Resource
    protected SolrServerManager solrServerManager;

    @Resource
    protected WebIndexHelper webIndexHelper;

    @Resource
    protected FileSystemIndexHelper fileSystemIndexHelper;

    @Resource
    protected DataIndexHelper dataIndexHelper;

    @Resource
    protected PathMappingService pathMappingService;

    @Resource
    protected CrawlingSessionService crawlingSessionService;

    @Resource
    protected FessProperties crawlerProperties;

    @Resource
    protected MailHelper mailHelper;

    public String notificationSubject = "Completed Fess Crawler";

    protected static class Options {
        @Option(name = "-s", aliases = "--sessionId", metaVar = "sessionId", usage = "Session ID")
        protected String sessionId;

        @Option(name = "-w", aliases = "--webConfigIds", metaVar = "webConfigIds", usage = "Web Config IDs")
        protected String webConfigIds;

        @Option(name = "-f", aliases = "--fileConfigIds", metaVar = "fileConfigIds", usage = "File Config IDs")
        protected String fileConfigIds;

        @Option(name = "-d", aliases = "--dataConfigIds", metaVar = "dataConfigIds", usage = "Data Config IDs")
        protected String dataConfigIds;

        protected Options() {
        }

        protected List<Long> getWebConfigIdList() {
            if (StringUtil.isNotBlank(webConfigIds)) {
                final String[] values = webConfigIds.split(",");
                return createConfigIdList(values);
            }
            return null;
        }

        protected List<Long> getFileConfigIdList() {
            if (StringUtil.isNotBlank(fileConfigIds)) {
                final String[] values = fileConfigIds.split(",");
                return createConfigIdList(values);
            }
            return null;
        }

        protected List<Long> getDataConfigIdList() {
            if (StringUtil.isNotBlank(dataConfigIds)) {
                final String[] values = dataConfigIds.split(",");
                return createConfigIdList(values);
            }
            return null;
        }

        private static List<Long> createConfigIdList(final String[] values) {
            final List<Long> idList = new ArrayList<Long>();
            for (final String value : values) {
                final long id = Long.valueOf(value);
                if (id > 0) {
                    idList.add(id);
                }
            }
            return idList;
        }
    }

    public static void main(final String[] args) {
        final Options options = new Options();

        final CmdLineParser parser = new CmdLineParser(options);
        try {
            parser.parseArgument(args);
        } catch (final CmdLineException e) {
            System.err.println(e.getMessage());
            System.err.println("java " + Crawler.class.getCanonicalName()
                    + " [options...] arguments...");
            parser.printUsage(System.err);
            return;
        }

        final ServletContext servletContext = new MockServletContextImpl(
                "/fess");
        final HttpServletRequest request = new MockHttpServletRequestImpl(
                servletContext, "/crawler");
        final HttpServletResponse response = new MockHttpServletResponseImpl(
                request);
        final SingletonS2ContainerInitializer initializer = new SingletonS2ContainerInitializer();
        initializer.setConfigPath("app.dicon");
        initializer.setApplication(servletContext);
        initializer.initialize();

        final S2Container container = SingletonS2ContainerFactory
                .getContainer();
        final ExternalContext externalContext = container.getExternalContext();
        externalContext.setRequest(request);
        externalContext.setResponse(response);

        // initialize mobylet
        MobyletLauncher.launch();

        final Crawler crawler = SingletonS2Container
                .getComponent(Crawler.class);

        final DatabaseHelper databaseHelper = SingletonS2Container
                .getComponent("databaseHelper");
        databaseHelper.optimize();

        if (StringUtil.isBlank(options.sessionId)) {
            // use a default session id
            final SimpleDateFormat sdf = new SimpleDateFormat("yyyyMMddHHmmss");
            options.sessionId = sdf.format(new Date());
        }

        Map<String, String> infoMap = null;

        try {
            infoMap = crawler.doCrawl(options);
        } finally {
            if (infoMap != null && !infoMap.isEmpty()) {
                try {
                    final CrawlingSessionHelper crawlingSessionHelper = SingletonS2Container
                            .getComponent("crawlingSessionHelper");
                    crawlingSessionHelper.put(options.sessionId, infoMap);
                } catch (final Exception e) {
                    logger.warn("Failed to store crawling information.", e);
                }
            }
            databaseHelper.optimize();
        }

        // notification
        try {
            crawler.sendMail(infoMap);
        } catch (final Exception e) {
            logger.warn("Failed to send a mail.", e);
        }

        SingletonS2ContainerFactory.destroy();
    }

    public void sendMail(final Map<String, String> infoMap) {
        final String toStrs = (String) crawlerProperties
                .get(Constants.NOTIFICATION_TO_PROPERTY);
        if (StringUtil.isNotBlank(toStrs)) {
            final String[] toAddresses = toStrs.split(",");
            final StringBuilder buf = new StringBuilder();
            for (final Map.Entry<String, String> entry : infoMap.entrySet()) {
                buf.append(entry.getKey()).append('=').append(entry.getValue())
                        .append('\n');
            }

            mailHelper.send(toAddresses, notificationSubject, buf.toString());
        }
    }

    public Map<String, String> doCrawl(final Options options) {
        if (logger.isInfoEnabled()) {
            logger.info("Starting Crawler..");
        }

        final PathMappingHelper pathMappingHelper = SingletonS2Container
                .getComponent("pathMappingHelper");

        final SimpleDateFormat dateFormat = new SimpleDateFormat(
                "yyyy-MM-dd'T'HH:mm:ss.SSSZ");

        final long totalTime = System.currentTimeMillis();
        final Map<String, String> infoMap = new HashMap<String, String>();

        final SystemHelper systemHelper = SingletonS2Container
                .getComponent("systemHelper");

        boolean completed = false;
        try {
            infoMap.put(Constants.CRAWLER_START_TIME,
                    dateFormat.format(new Date()));

            final SolrServerGroup solrServerGroup = solrServerManager
                    .getUpdateSolrServerGroup();

            // setup path mapping
            final List<CDef.ProcessType> ptList = new ArrayList<CDef.ProcessType>();
            ptList.add(CDef.ProcessType.Crawling);
            ptList.add(CDef.ProcessType.Both);
            pathMappingHelper.setPathMappingList(options.sessionId,
                    pathMappingService.getPathMappingList(ptList));

            // overlapping host
            try {
                final OverlappingHostHelper overlappingHostHelper = SingletonS2Container
                        .getComponent("overlappingHostHelper");
                overlappingHostHelper.init();
            } catch (final Exception e) {
                logger.warn("Could not initialize overlappingHostHelper.", e);
            }

            // crawl web
            infoMap.put(Constants.WEB_CRAWLER_START_TIME,
                    dateFormat.format(new Date()));
            webIndexHelper.crawl(options.sessionId,
                    options.getWebConfigIdList(), solrServerGroup);
            infoMap.put(Constants.WEB_CRAWLER_END_TIME,
                    dateFormat.format(new Date()));

            // Stop a crawling process
            if (systemHelper.isForceStop()) {
                if (logger.isInfoEnabled()) {
                    logger.info("Interrupted this crawling process: "
                            + options.sessionId);
                }
                return infoMap;
            }

            // crawl file system
            infoMap.put(Constants.FS_CRAWLER_START_TIME,
                    dateFormat.format(new Date()));
            fileSystemIndexHelper.crawl(options.sessionId,
                    options.getFileConfigIdList(), solrServerGroup);
            infoMap.put(Constants.FS_CRAWLER_END_TIME,
                    dateFormat.format(new Date()));

            // Stop a crawling process
            if (systemHelper.isForceStop()) {
                if (logger.isInfoEnabled()) {
                    logger.info("Interrupted this crawling process: "
                            + options.sessionId);
                }
                return infoMap;
            }

            // crawl file system
            infoMap.put(Constants.DATA_CRAWLER_START_TIME,
                    dateFormat.format(new Date()));
            dataIndexHelper.crawl(options.sessionId,
                    options.getDataConfigIdList(), solrServerGroup);
            infoMap.put(Constants.DATA_CRAWLER_END_TIME,
                    dateFormat.format(new Date()));

            // Stop a crawling process
            if (systemHelper.isForceStop()) {
                if (logger.isInfoEnabled()) {
                    logger.info("Interrupted this crawling process: "
                            + options.sessionId);
                }
                return infoMap;
            }

            // clean up
            final String dayForCleanupStr = crawlerProperties.getProperty(
                    Constants.DAY_FOR_CLEANUP_PROPERTY, "1");
            int dayForCleanup = -1;
            try {
                dayForCleanup = Integer.parseInt(dayForCleanupStr);
            } catch (final NumberFormatException e) {
            }
            if (dayForCleanup >= 0) {
                final Date date = DateUtils.addDays(new Date(), -1
                        * dayForCleanup);
                final String[] sessionIds = crawlingSessionService
                        .getSessionIdsBefore(date);
                final SolrServerGroup selectSolrServerGroup = solrServerManager
                        .getSelectSolrServerGroup();
                for (final String sid : sessionIds) {
                    // delete 
                    solrServerGroup.deleteByQuery("segment:" + sid);
                    if (selectSolrServerGroup != solrServerGroup) {
                        selectSolrServerGroup.deleteByQuery("segment:" + sid);
                    }
                }
            }

            // optimize
            final String optimizeStr = crawlerProperties.getProperty(
                    Constants.OPTIMIZE_PROPERTY, Constants.TRUE);
            final String commitStr = crawlerProperties.getProperty(
                    Constants.COMMIT_PROPERTY, Constants.TRUE);
            if (Constants.TRUE.equalsIgnoreCase(optimizeStr)) {
                infoMap.put(Constants.OPTIMIZE_START_TIME,
                        dateFormat.format(new Date()));
                long startTime = System.currentTimeMillis();
                solrServerGroup.optimize();
                startTime = System.currentTimeMillis() - startTime;
                infoMap.put(Constants.OPTIMIZE_END_TIME,
                        dateFormat.format(new Date()));
                infoMap.put(Constants.OPTIMIZE_EXEC_TIME,
                        Long.toString(startTime));
                if (logger.isInfoEnabled()) {
                    logger.info("[EXEC TIME] index optimize time: " + startTime
                            + "ms");
                }
            } else if (Constants.TRUE.equalsIgnoreCase(commitStr)) {
                infoMap.put(Constants.COMMIT_START_TIME,
                        dateFormat.format(new Date()));
                long startTime = System.currentTimeMillis();
                solrServerGroup.commit();
                startTime = System.currentTimeMillis() - startTime;
                infoMap.put(Constants.COMMIT_END_TIME,
                        dateFormat.format(new Date()));
                infoMap.put(Constants.COMMIT_EXEC_TIME,
                        Long.toString(startTime));
                if (logger.isInfoEnabled()) {
                    logger.info("[EXEC TIME] index commit time: " + startTime
                            + "ms");
                }
            } else {
                if (logger.isInfoEnabled()) {
                    logger.info("No index commit.");
                }
            }

            final String serverRotationStr = crawlerProperties.getProperty(
                    Constants.SERVER_ROTATION_PROPERTY, Constants.TRUE);
            if (Constants.TRUE.equalsIgnoreCase(serverRotationStr)) {
                // apply
                solrServerManager.applyNewServerGroup();
            }

            if (logger.isInfoEnabled()) {
                logger.info("Finished Crawler");
            }
            completed = true;
        } catch (final Throwable t) {
            if (logger.isWarnEnabled()) {
                logger.warn("Interrupted a crawl task.", t);
            }
        } finally {
            pathMappingHelper.removePathMappingList(options.sessionId);
            infoMap.put(Constants.CRAWLER_STATUS, completed ? Constants.T
                    : Constants.F);
            infoMap.put(Constants.CRAWLER_END_TIME,
                    dateFormat.format(new Date()));
            infoMap.put(Constants.CRAWLER_EXEC_TIME,
                    Long.toString(System.currentTimeMillis() - totalTime));

        }
        return infoMap;
    }
}
