/*
 * ʸδط
 * Copyright (C) 2006 Higashiyama Masahiko (thanks google summer of code program)
 * Copyright (C) 2002-2007 TABATA Yusuke
 *
 * anthy_reorder_candidates_by_relation()
 *
 */
/*
  This library is free software; you can redistribute it and/or
  modify it under the terms of the GNU Lesser General Public
  License as published by the Free Software Foundation; either
  version 2 of the License, or (at your option) any later version.

  This library is distributed in the hope that it will be useful,
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  Lesser General Public License for more details.

  You should have received a copy of the GNU Lesser General Public
  License along with this library; if not, write to the Free Software
  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307  USA
 */

#include <arpa/inet.h>
#include <stdlib.h>

#include <anthy/segclass.h>
#include <anthy/segment.h>
#include <anthy/ordering.h>
#include <anthy/dic.h>
#include <anthy/diclib.h>
#include <anthy/feature_set.h>
#include "sorter.h"

#define KEY_MASK 0x3fffffff
#define BOS_MASK 0x40000000
#define BORDER_MASK 0x80000000
#define MAX_COLLISION 4
#define SEARCH_LIMIT 10


static struct corpus_ {
  /* header */
  void *corpus_bucket;
  void *corpus_array;
  /**/
  int *bucket;
  int *array;
  /**/
  int bucket_size;
  int array_size;
} corpus_info;

struct iterator {
  int key;
  int idx;
  int limit;
};

/** ʸ@seg@from_word_idñȶطˤ
 *  䤬뤫ɤõХ夲롣
 */
static void
reorder_candidate(int from_word_id, struct seg_ent *seg)
{
  int i, pos;
  struct cand_ent *ce = seg->cands[0];
  if (ce->core_elm_index == -1) {
    return ;
  }
  /* 0ܤθʻ */
  pos = anthy_wtype_get_pos(ce->elm[ce->core_elm_index].wt);

  for (i = 0; i < seg->nr_cands; i++) {
    int word_id;
    ce = seg->cands[i];
    if (ce->core_elm_index == -1) {
      continue;
    }
    word_id = ce->elm[ce->core_elm_index].id;
    if (anthy_dic_check_word_relation(from_word_id, word_id) &&
	anthy_wtype_get_pos(ce->elm[ce->core_elm_index].wt) == pos) {
      /* ˥ޥåΤǡΥ򹹿 */
      ce->flag |= CEF_USEDICT;
      ce->score *= 10;
    }
  }
}

static int
get_indep_word_id(struct seg_ent *seg, int nth)
{
  struct cand_ent *ce;
  if (seg->cands[nth]->core_elm_index == -1) {
    /* ܤθ䤬seq_ent줿ǤϤʤ */
    return -1;
  }
  ce = seg->cands[nth];
  /* ΩidФ */
  return ce->elm[ce->core_elm_index].id;
}

static void
reorder_by_use_dict(struct segment_list *sl, int nth)
{
  int i;
  struct seg_ent *cur_seg;
  int word_id;

  cur_seg = anthy_get_nth_segment(sl, nth);
  word_id = get_indep_word_id(cur_seg, 0);
  if (word_id == -1) {
    /**/
    return ;
  }
  /* ʸ˸Ƥ */
  for (i = nth - 2; i < nth + 2 && i < sl->nr_segments; i++) {
    struct seg_ent *target_seg;
    if (i < 0 || i == nth) {
      continue ;
    }
    /* iܤʸjܤʸФ */
    target_seg = anthy_get_nth_segment(sl, i);
    reorder_candidate(word_id, target_seg);
  }
}

static int
find_first_pos(int key)
{
  int i;
  for (i = 0; i < MAX_COLLISION; i++) {
    int bkt = (key + i) % corpus_info.bucket_size;
    if (ntohl(corpus_info.bucket[bkt * 2]) == key) {
      return ntohl(corpus_info.bucket[bkt * 2 + 1]);
    }
  }
  return -1;
}

static int
find_first_from_corpus(int key, struct iterator *it, int limit)
{
  key &= KEY_MASK;
  it->idx = find_first_pos(key);
  it->key = key;
  it->limit = limit;
  return it->idx;
}

static int
find_next_from_corpus(struct iterator *it)
{
  int idx = it->idx;
  it->limit--;
  if (it->limit < 1) {
    it->idx = -1;
    return -1;
  }
  it->idx = ntohl(corpus_info.array[it->idx * 2 + 1]);
  if (it->idx < 0 || it->idx >= corpus_info.array_size ||
      it->idx < idx) {
    it->idx = -1;
  }
  return it->idx;
}

static int
find_word_border(int idx)
{
  int val;
  if (idx < 1) {
    return -1;
  }
  do {
    val = ntohl(corpus_info.array[idx * 2]);
    if (val & BORDER_MASK) {
      return idx;
    }
  } while (idx > -1);
  return -1;
}

static int
find_left_border(int idx)
{
  if (idx < 1) {
    return -1;
  }
  if (!(ntohl(corpus_info.array[idx * 2]) & BOS_MASK)) {
    return -1;
  }
  idx --;
  return find_word_border(idx);
}

static int
find_right_border(int idx)
{
  idx ++;
  while (idx < corpus_info.array_size) {
    int val = ntohl(corpus_info.array[idx * 2]);
    if (val & BOS_MASK) {
      return -1;
    }
    if (val & BORDER_MASK) {
      return idx;
    }
    /**/
    idx ++;
  }
  return -1;
}

static void
scan_context(int left, int this_word, int right,
	     struct segment_list *sl, int nth,
	     struct cand_ent *ce)
{
}

static void
compare_context(struct iterator *it, struct segment_list *sl,
		int nth, struct cand_ent *ce)
{
  int this_word = find_word_border(it->idx);
  int i, tmp, left, right;
  if (this_word == -1) {
    /* something stupid happened */
    return ;
  }
  left = this_word;
  for (i = 0; i < 2; i++) {
    tmp = find_left_border(this_word);
    if (tmp > -1) {
      left = tmp;
    }
  }
  right = this_word;
  for (i = 0; i < 2; i++) {
    tmp = find_right_border(this_word);
    if (tmp > -1) {
      right = tmp;
    }
  }
  scan_context(left, this_word, right, sl, nth, ce);
}

static void
reorder_by_corpus(struct segment_list *sl, int nth)
{
  struct iterator it;
  struct seg_ent *cur_seg;
  int word_id;
  int i;
  cur_seg = anthy_get_nth_segment(sl, nth);
  for (i = 0; i < cur_seg->nr_cands; i++) {
    word_id = get_indep_word_id(cur_seg, i);
    if (word_id == -1) {
      continue ;
    }
    find_first_from_corpus(word_id, &it, SEARCH_LIMIT);
    while (it.idx > 0) {
      compare_context(&it, sl, nth, cur_seg->cands[i]);
      /**/
      find_next_from_corpus(&it);
    }
  }
}

/*
 * ѤƸ¤ؤ
 *  @nthܰʹߤʸоݤȤ
 */
void
anthy_reorder_candidates_by_relation(struct segment_list *sl, int nth)
{
  int i;
  for (i = nth; i < sl->nr_segments; i++) {
    reorder_by_use_dict(sl, i);
    reorder_by_corpus(sl, i);
  }
}

void
anthy_relation_init(void)
{
  corpus_info.corpus_array = anthy_file_dic_get_section("corpus_array");
  corpus_info.corpus_bucket = anthy_file_dic_get_section("corpus_bucket");
  if (!corpus_info.corpus_array ||
      !corpus_info.corpus_array) {
    return ;
  }
  corpus_info.array_size = ntohl(((int *)corpus_info.corpus_array)[1]);
  corpus_info.bucket_size = ntohl(((int *)corpus_info.corpus_bucket)[1]);
  corpus_info.array = &(((int *)corpus_info.corpus_array)[16]);
  corpus_info.bucket = &(((int *)corpus_info.corpus_bucket)[16]);
}
