/* 
 * Copyright (C) 2005  Network Applied Communication Laboratory Co., Ltd.
 *
 * This file is part of Rast.
 * See the file COPYING for redistribution information.
 *
 */

#include <stdio.h>
#include <stdlib.h>
#include <apr_strings.h>
#include "rast/merger.h"

rast_error_t *
rast_merger_open(rast_db_t **base,
                 rast_db_t **merge_dbs, int num_merge_dbs, apr_pool_t *pool)
{
    const static rast_db_t default_base = {
        NULL,
        NULL,
        rast_merger_close,
        NULL,
        NULL,
        rast_merger_search,
        NULL,
        NULL,
        NULL,
        NULL,
        rast_merger_encoding,
        rast_merger_properties,
        NULL,
    };
    const char *merger_encoding, *encoding;
    int i;

    if (num_merge_dbs <= 0) {
        return rast_error(RAST_ERROR_INVALID_ARGUMENT,
                          "empty indices: %d", num_merge_dbs);
    }

    merger_encoding = rast_db_encoding(merge_dbs[0]);
    for (i = 1; i < num_merge_dbs; i++) {
        encoding = rast_db_encoding(merge_dbs[i]);
        if (strcmp(encoding, merger_encoding) != 0) {
            return rast_error(RAST_ERROR_GENERAL,
                              "merged indices must be same encoding: %s != %s",
                              merger_encoding, encoding);
        }
    }

    rast_merger_t *db = (rast_merger_t *)
        apr_palloc(pool, sizeof(rast_merger_t));
    *base = (rast_db_t *) db;
    db->base = default_base;
    db->base.pool = pool;
    db->encoding = merger_encoding;
    db->num_merged_dbs = num_merge_dbs;
    db->merged_dbs = merge_dbs;
    db->properties = NULL;
    db->num_properties = 0;
    return RAST_OK;
}

rast_error_t *
rast_merger_close(rast_db_t *base)
{
    return RAST_OK;
}

typedef struct {
    rast_result_item_t *item;
    rast_value_t *sort_property;
} merger_sort_data_t;

static int
score_ascending_sort_compare_func(const void *v1, const void *v2)
{
    rast_result_item_t *item1 = *(rast_result_item_t **) v1;
    rast_result_item_t *item2 = *(rast_result_item_t **) v2;

    return item1->score - item2->score;
}

static int
score_descending_sort_compare_func(const void *v1, const void *v2)
{
    return score_ascending_sort_compare_func(v2, v1);
}

static int
property_string_ascending_sort_compare_func(const void *v1, const void *v2)
{
    merger_sort_data_t *data1 = *(merger_sort_data_t **) v1;
    merger_sort_data_t *data2 = *(merger_sort_data_t **) v2;

    if (data1->sort_property->type == RAST_TYPE_UINT) {
        rast_uint_t n1 = rast_value_uint(data1->sort_property);
        rast_uint_t n2 = rast_value_uint(data2->sort_property);
        if (n1 < n2) {
            return -1;
        }
        else if (n1 == n2) {
            return 0;
        }
        else {
            return 1;
        }
    }
    else {
        return strcmp(rast_value_string(data1->sort_property),
                      rast_value_string(data2->sort_property));
    }
}

static int
property_string_descending_sort_compare_func(const void *v1, const void *v2)
{
    return property_string_ascending_sort_compare_func(v2, v1);
}

rast_error_t *
sort_result(rast_search_option_t *options, int sort_property_number,
            rast_result_t *result, apr_pool_t *pool)
{
    merger_sort_data_t **sort_data;
    int (*sort_func)(const void *, const void *);

    if (options->sort_method == RAST_SORT_METHOD_SCORE) {
        if (options->sort_order == RAST_SORT_ORDER_ASCENDING) {
            sort_func = score_ascending_sort_compare_func;
        }
        else {
            sort_func = score_descending_sort_compare_func;
        }
        qsort(result->items, result->num_items,
              sizeof(merger_sort_data_t *), sort_func);
    }
    else {
        int i;

        if (options->sort_order == RAST_SORT_ORDER_DESCENDING) {
            sort_func = property_string_descending_sort_compare_func;
        }
        else {
            sort_func = property_string_ascending_sort_compare_func;
        }
        sort_data = (merger_sort_data_t **)
            apr_palloc(pool, sizeof(merger_sort_data_t *) * result->num_items);

        for (i = 0; i < result->num_items; i++) {
            sort_data[i] = (merger_sort_data_t *)
                apr_palloc(pool, sizeof(merger_sort_data_t));
            sort_data[i]->item = result->items[i];
            sort_data[i]->sort_property =
                &result->items[i]->properties[sort_property_number];
        }
        qsort(sort_data, result->num_items,
              sizeof(merger_sort_data_t *), sort_func);
        for (i = 0; i < result->num_items; i++) {
            result->items[i] = sort_data[i]->item;
        }
    }
    return RAST_OK;
}

static rast_search_option_t *
get_zero_item_options(rast_search_option_t *options, apr_pool_t *pool)
{
    rast_search_option_t *result;

    result = (rast_search_option_t *)
        apr_pmemdup(pool, options, sizeof(rast_search_option_t));
    result->num_items = 0;
    return result;
}

static rast_search_option_t *
get_child_options(rast_search_option_t *options, int *sort_property_number,
                  apr_pool_t *pool)
{
    int i;
    rast_search_option_t *result;

    result = (rast_search_option_t *)
        apr_pmemdup(pool, options, sizeof(rast_search_option_t));
    result->properties = (const char **)
        apr_palloc(pool, sizeof(const char *) * (options->num_properties + 1));
    memcpy(result->properties, options->properties,
           sizeof(char *) * options->num_properties);
    result->start_no = 0;
    result->num_items = options->start_no + options->num_items;

    if (options->sort_method == RAST_SORT_METHOD_PROPERTY) {
        for (i = 0; i < options->num_properties; i++) {
            if (strcmp(options->properties[i], options->sort_property) == 0) {
                *sort_property_number = i;
                return result;
            }
        }

        result->properties[options->num_properties] = options->sort_property;
        result->num_properties++;
        *sort_property_number = options->num_properties;
    }
    return result;
}

static rast_error_t *
set_scoring_options(rast_merger_t *db, const char *query,
                    rast_search_option_t *original_options,
                    rast_search_option_t *options, apr_pool_t *pool)
{
    int i, j;
    rast_search_option_t *zero_item_options;
    rast_result_t *child_results;
    rast_error_t *error;
    apr_pool_t *sub_pool;

    apr_pool_create(&sub_pool, pool);

    zero_item_options = get_zero_item_options(original_options, sub_pool);
    options->all_num_docs = 0;

    for (i = 0; i < db->num_merged_dbs; i++) {
        error = rast_db_search(db->merged_dbs[i], query, zero_item_options,
                               &child_results, sub_pool);
        if (error != RAST_OK) {
            apr_pool_destroy(sub_pool);
            return error;
        }
        options->all_num_docs += child_results->num_docs;
        if (child_results->hit_count > 0) {
            options->num_terms = child_results->num_terms;
            options->terms = (int *)
                apr_palloc(pool, sizeof(int) * options->num_terms);
            for (j = 0; j < child_results->num_terms; j++) {
                options->terms[j] = child_results->terms[j].doc_count;
            }
            i++;
            break;
        }
    }

    for (; i < db->num_merged_dbs; i++) {
        error = rast_db_search(db->merged_dbs[i], query, zero_item_options,
                               &child_results, sub_pool);
        if (error != RAST_OK) {
            apr_pool_destroy(sub_pool);
            return error;
        }
        options->all_num_docs += child_results->num_docs;
        if (child_results->hit_count > 0) {
            for (j = 0; j < options->num_terms; j++) {
                options->terms[j] += child_results->terms[j].doc_count;
            }
        }
    }

    apr_pool_destroy(sub_pool);
    return RAST_OK;
}

static void
merge_child_result(rast_result_t *child_result, rast_result_t *result)
{
    int i;

    result->num_docs += child_result->num_docs;
    if (child_result->hit_count > 0) {
        for (i = 0; i < result->num_terms; i++) {
            result->terms[i].doc_count += child_result->terms[i].doc_count;
        }
        memcpy(result->items + result->num_items,
               child_result->items,
               sizeof(rast_result_item_t *) * child_result->num_items);
        result->num_items += child_result->num_items;
    }
}

static void
set_result_range(rast_search_option_t *options, rast_result_t *result)
{
    if (result->num_items < options->start_no) {
        result->num_items = 0;
    }
    else {
        result->items += options->start_no;
        result->num_items -= options->start_no;
        if (options->num_items != RAST_RESULT_ALL_ITEMS &&
            result->num_items > options->num_items) {
            result->num_items = options->num_items;
        }
    }
}

rast_error_t *
rast_merger_search(rast_db_t *base,
                   const char *query, rast_search_option_t *options,
                   rast_result_t **results, apr_pool_t *pool)
{
    rast_merger_t *db = (rast_merger_t *) base;
    int i, j, all_hit_count, doc_id_prefix;
    int sort_property_number;
    rast_result_t **child_results;
    rast_error_t *error = RAST_OK;
    apr_pool_t *sub_pool;
    rast_search_option_t *child_options;

    apr_pool_create(&sub_pool, pool);

    all_hit_count = 0;
    child_results = (rast_result_t **)
        apr_palloc(sub_pool, sizeof(rast_result_t *) * db->num_merged_dbs);
    doc_id_prefix = 0;
    child_options = get_child_options(options, &sort_property_number,
                                      sub_pool);

    if (options->score_method == RAST_SCORE_METHOD_TFIDF) {
        error = set_scoring_options(db, query, options, child_options,
                                    sub_pool);
        if (error != RAST_OK) {
            apr_pool_destroy(sub_pool);
            return error;
        }
    }

    for (i = 0; i < db->num_merged_dbs; i++) {
        error = rast_db_search(db->merged_dbs[i], query, child_options,
                               child_results + i, pool);
        if (error != RAST_OK) {
            apr_pool_destroy(sub_pool);
            return error;
        }
        all_hit_count += child_results[i]->hit_count;
        for (j = 0; j < child_results[i]->num_items; j++) {
            child_results[i]->items[j]->doc_id += doc_id_prefix << 24;
            child_results[i]->items[j]->db_index = i;
        }
        doc_id_prefix += child_results[i]->num_indices;
    }

    *results = (rast_result_t *) apr_palloc(pool, sizeof(rast_result_t));
    (*results)->num_indices = doc_id_prefix;
    (*results)->hit_count = all_hit_count;
    (*results)->terms = (rast_result_term_t *)
        apr_palloc(pool,
                   sizeof(rast_result_term_t) * child_results[0]->num_terms);
    (*results)->items = (rast_result_item_t **)
        apr_palloc(pool, sizeof(rast_result_item_t *) * all_hit_count);

    (*results)->num_docs = child_results[0]->num_docs;
    memcpy((*results)->terms, child_results[0]->terms,
           sizeof(rast_result_term_t) * child_results[0]->num_terms);

    (*results)->num_terms = child_results[0]->num_terms;
    memcpy((*results)->items, child_results[0]->items,
           sizeof(rast_result_item_t *) * child_results[0]->num_items);
    (*results)->num_items = child_results[0]->num_items;

    for (i = 1; i < db->num_merged_dbs; i++) {
        merge_child_result(child_results[i], *results);
    }

    sort_result(child_options, sort_property_number, *results, sub_pool);
    set_result_range(options, *results);

    apr_pool_destroy(sub_pool);
    return RAST_OK;
}

const char *
rast_merger_encoding(rast_db_t *base)
{
    rast_merger_t *db = (rast_merger_t *) base;

    return db->encoding;
}

const rast_property_t *
rast_merger_properties(rast_db_t *base, int *num_properties)
{
    rast_merger_t *db = (rast_merger_t *) base;
    typedef struct property_list_entry_t {
        APR_RING_ENTRY(property_list_entry_t) link;
        const rast_property_t *property;
    } property_list_entry_t;
    typedef APR_RING_HEAD(property_list_head_t, property_list_entry_t)
        property_list_head_t;
    property_list_head_t *property_list;

    if (db->properties == NULL) {
        int i, j;
        const rast_property_t *child_properties;
        int child_num_properties;
        apr_pool_t *pool;
        property_list_entry_t *entry, *next_entry;

        apr_pool_create(&pool, db->base.pool);
        property_list =
            (property_list_head_t *) apr_palloc(pool,
                                                sizeof(property_list_head_t));
        APR_RING_INIT(property_list, property_list_entry_t, link);

        child_properties = rast_db_properties(db->merged_dbs[0],
                                              &db->num_properties);
        for (i = 0; i < db->num_properties; i++) {
            entry = (property_list_entry_t *)
                apr_palloc(pool, sizeof(property_list_entry_t));
            entry->property = &child_properties[i];
            APR_RING_INSERT_TAIL(property_list, entry,
                                 property_list_entry_t, link);
        }
        for (i = 1; i < db->num_merged_dbs; i++) {
            child_properties = rast_db_properties(db->merged_dbs[i],
                                                  &child_num_properties);
            entry = APR_RING_FIRST(property_list);
            while (entry != APR_RING_SENTINEL(property_list,
                                              property_list_entry_t, link)) {
                for (j = 0; j < child_num_properties; j++) {
                    if (strcmp(child_properties[j].name,
                               entry->property->name) == 0) {
                        break;
                    }
                }

                next_entry = APR_RING_NEXT(entry, link);
                if (j == child_num_properties) {
                    APR_RING_REMOVE(entry, link);
                    db->num_properties--;
                }
                entry = next_entry;
            }
        }

        db->properties = (const rast_property_t *)
            apr_palloc(db->base.pool, sizeof(const rast_property_t) *
                       db->num_properties);
        i = 0;
        for (entry = APR_RING_FIRST(property_list);
             entry != APR_RING_SENTINEL(property_list, property_list_entry_t,
                                        link);
             entry = APR_RING_NEXT(entry, link)) {
            rast_property_t *property = (rast_property_t *) &db->properties[i];
            property->name = entry->property->name;
            property->type = entry->property->type;
            property->flags = entry->property->flags;
            i++;
        }
        apr_pool_destroy(pool);
    }

    *num_properties = db->num_properties;
    return db->properties;
}
