/*
 * Copyright 2009-2012 the Fess Project and the Others.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
 * either express or implied. See the License for the specific language
 * governing permissions and limitations under the License.
 */

package jp.sf.fess.robot;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;

import jp.sf.fess.Constants;
import jp.sf.fess.db.exentity.CrawlingConfig;
import jp.sf.fess.helper.CrawlingConfigHelper;
import jp.sf.fess.helper.CrawlingSessionHelper;
import jp.sf.fess.solr.SolrServerGroup;
import jp.sf.fess.solr.SolrServerManager;
import jp.sf.fess.util.FessProperties;

import org.apache.commons.io.IOUtils;
import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList;
import org.seasar.framework.container.SingletonS2Container;
import org.seasar.robot.S2RobotThread;
import org.seasar.robot.client.S2RobotClient;
import org.seasar.robot.entity.ResponseData;
import org.seasar.robot.entity.UrlQueue;
import org.seasar.robot.log.LogType;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class FessS2RobotThread extends S2RobotThread {
    private static final Logger logger = LoggerFactory
            .getLogger(FessS2RobotThread.class);

    public int maxSolrQueryRetryCount = 5;

    @Override
    protected boolean isContentUpdated(final S2RobotClient client,
            final UrlQueue urlQueue) {
        final FessProperties crawlerProperties = SingletonS2Container
                .getComponent("crawlerProperties");
        if (crawlerProperties.getProperty(Constants.DIFF_CRAWLING_PROPERTY,
                Constants.TRUE).equals(Constants.TRUE)) {
            log(logHelper, LogType.CHECK_LAST_MODIFIED, robotContext, urlQueue);
            final long startTime = System.currentTimeMillis();

            final CrawlingConfigHelper crawlingConfigHelper = SingletonS2Container
                    .getComponent(CrawlingConfigHelper.class);
            final CrawlingSessionHelper crawlingSessionHelper = SingletonS2Container
                    .getComponent(CrawlingSessionHelper.class);

            final CrawlingConfig crawlingConfig = crawlingConfigHelper
                    .getCrawlingConfig(robotContext.getSessionId());
            final Map<String, Object> dataMap = new HashMap<String, Object>();
            dataMap.put("url", urlQueue.getUrl());
            final List<String> browserTypeList = new ArrayList<String>();
            for (final String browserType : crawlingConfig
                    .getBrowserTypeValues()) {
                browserTypeList.add(browserType);
            }
            dataMap.put("type", browserTypeList);
            final List<String> roleTypeList = new ArrayList<String>();
            for (final String roleType : crawlingConfig.getRoleTypeValues()) {
                roleTypeList.add(roleType);
            }
            dataMap.put("role", roleTypeList);
            final String id = crawlingSessionHelper.generateId(dataMap);

            final SolrDocument solrDocument = getSolrDocument(id);
            if (solrDocument == null) {
                return true;
            }

            final Long lastModified = (Long) solrDocument.get("lastModified");
            if (lastModified == null) {
                return true;
            }

            ResponseData responseData = null;
            try {
                //  head method
                responseData = client.doHead(urlQueue.getUrl());
                if (responseData == null
                        || responseData.getLastModified() == null) {
                    return true;
                }
                if (responseData.getLastModified().getTime() <= lastModified
                        .longValue() && responseData.getHttpStatusCode() == 200) {
                    log(logHelper, LogType.NOT_MODIFIED, robotContext, urlQueue);

                    responseData.setExecutionTime(System.currentTimeMillis()
                            - startTime);
                    responseData.setParentUrl(urlQueue.getParentUrl());
                    responseData.setSessionId(robotContext.getSessionId());
                    responseData
                            .setStatus(org.seasar.robot.Constants.NOT_MODIFIED_STATUS);
                    processResponse(urlQueue, responseData);

                    final Set<String> childUrlSet = getAnchorSet(solrDocument
                            .get("anchor"));
                    if (childUrlSet != null) {
                        synchronized (robotContext.getAccessCountLock()) {
                            //  add an url
                            storeChildUrls(
                                    childUrlSet,
                                    urlQueue.getUrl(),
                                    urlQueue.getDepth() != null ? urlQueue
                                            .getDepth() + 1 : 1);
                        }
                    }

                    return false;
                }
            } finally {
                if (responseData != null) {
                    IOUtils.closeQuietly(responseData.getResponseBody());
                }
            }
        }
        return true;
    }

    protected Set<String> getAnchorSet(final Object obj) {
        List<String> anchorList;
        if (obj instanceof String) {
            anchorList = new ArrayList<String>();
            anchorList.add(obj.toString());
        } else if (obj instanceof List<?>) {
            anchorList = (List<String>) obj;
        } else {
            return null;
        }

        if (anchorList.isEmpty()) {
            return null;
        }

        final Set<String> childUrlSet = new LinkedHashSet<String>();
        for (final String anchor : anchorList) {
            childUrlSet.add(anchor);
        }
        return childUrlSet;
    }

    protected SolrDocument getSolrDocument(final String id) {
        final SolrServerManager solrServerManager = SingletonS2Container
                .getComponent(SolrServerManager.class);
        final SolrServerGroup solrServerGroup = solrServerManager
                .getSelectSolrServerGroup();
        final SolrQuery solrQuery = new SolrQuery();
        solrQuery.setQuery("{!raw f=id v=\"" + id + "\"}");
        solrQuery.setFields("id", "lastModified", "anchor");
        for (int i = 0; i < maxSolrQueryRetryCount; i++) {
            try {
                final QueryResponse response = solrServerGroup.query(solrQuery);
                final SolrDocumentList docList = response.getResults();
                if (docList.isEmpty()) {
                    return null;
                }
                if (logger.isDebugEnabled()) {
                    logger.debug("Found solr documents: " + docList);
                }
                return docList.get(0);
            } catch (final Exception e) {
                logger.info("Could not get a response from Solr."
                        + " It might be busy. " + "Retrying.. id:" + id
                        + ", cause: " + e.getMessage());
            }
            try {
                Thread.sleep(500);
            } catch (final InterruptedException e) {
            }
        }
        return null;
    }

}
