/*
 * ʸФ򥽡Ȥ롣
 * Ūˤ϶ܤʸ⸫ơñηˤɾ򤹤롣
 * ֤äκ⤹롣
 *
 * Funded by IPA̤Ƨեȥ¤ 2001 9/22
 * Copyright (C) 2000-2001 TABATA Yusuke
 * Copyright (C) 2001 UGAWA Tomoharu
 *
 * $Id: candsort.c,v 1.17 2002/06/11 12:51:49 yusuke Exp $
 *
 */

#include <stdlib.h>
#include <segment.h>
#include <splitter.h>
#include "sorter.h"

/* ؽˤ */
#define OCHAIRE_BASE 1000000
/* seginfoʬ̵ȤΡҤ餬ʥʤΥ */
#define NOCONV_WITH_BIAS 900000
/* ̤θ */
#define NORMAL_BASE 100
/* ñ */
#define SINGLEWORD_BASE 10
/* Ҥ餬ʥʤΥǥեȤΥ */
#define NOCONV_BASE 1

static void eval_segment(struct seg_ent *);
static void eval_candidate(struct seg_ent *, struct cand_ent *, int uncertain);
static void sort_segment(struct seg_ent *);
static int candidate_compare_func(const void *, const void *);
static void check_dupl_candidate(struct seg_ent *);
static void release_redundant_candidate(struct seg_ent *);
static int uncertain_segment_p(struct seg_ent *);

static int
uncertain_segment_p(struct seg_ent *e)
{
  struct seg_info *si;
  if (e->nr_seginfo == 0) {
    return 0;
  }
  si = e->si[0];
  /* Ĺ6 */
  if (e->len * 3 >= (si->info_len + si->dep_len) * 5) {
    return 1;
  }
  return 0;
}

static void
release_redundant_candidate(struct seg_ent *se)
{
  int i, j;
  /* ϥȤƤΤscore0ΤΤ¤Ǥ */
  for (i = 0; i < se->cand_count && se->cands[i]->score; i++);
  /* i */
  if (i < se->cand_count) {
    for (j = i; j < se->cand_count; j++) {
      anthy_release_cand_ent(se->cands[j]);
    }
    se->cand_count = i;
  }
}

/* qsortѤθӴؿ */
static int
candidate_compare_func(const void *p1, const void *p2)
{
  const struct cand_ent *const *c1= p1, *const *c2=p2;
  return (*c2)->score - (*c1)->score;
}

static void
sort_segment(struct seg_ent *s)
{
  qsort(s->cands,s->cand_count,
	sizeof(struct cand_ent *),
	candidate_compare_func);
}

static void
check_dupl_candidate(struct seg_ent *s)
{
  int i,j;
  for (i = 0; i < s->cand_count - 1; i++) {
    for (j = i + 1; j < s->cand_count; j++) {
      if (!anthy_xstrcmp(&s->cands[i]->str, &s->cands[j]->str)) {
	/* 롼ɤޥåΤ֤Ȥ٤ */
	s->cands[j]->score = 0;
	s->cands[i]->flag |= s->cands[j]->flag;
      }
    }
  }
}

/* ɾ */
static void
eval_candidate(struct seg_ent *seg, struct cand_ent *ce, int uncertain)
{
  int i;

  if ((ce->flag &
       (CEF_OCHAIRE | CEF_SINGLEWORD | CEF_HIRAGANA | CEF_KATAKANA)) == 0) {
    /* ʻƤˤä줿 */
    int score = 0, elm_len = 0;
    /* ޤñ٤ˤscoreû */
    for (i = 0; i < ce->nr_words; i++) {
      struct cand_elm *elm = &ce->elm[i];
      if (elm->nth >= 0) {
	score += 
	  anthy_get_nth_dic_ent_freq(elm->se, elm->nth) *
	  elm->str.len * elm->str.len /
	  elm->bias;
	elm_len += elm->str.len;
      }
    }
    score = score *
      (seg->len * seg->len);
    /* ñǹƤ */
    score /= (ce->nr_words * ce->nr_words);
    if (ce->si) {
      /* seginfoΥˤ븺 */
      score *= 10;
      score *= seg->si[0]->score;
      score /= ce->si->score;
      score /= ce->si->score;
      /* °ˤХ */
      score *= ce->si->tail_ratio;
      score /= 256;
    }
    ce->score = NORMAL_BASE + score;
  } else if (ce->flag & CEF_OCHAIRE) {
    ce->score = OCHAIRE_BASE;
  } else if (ce->flag & CEF_SINGLEWORD) {
    ce->score = SINGLEWORD_BASE;
  } else if (ce->flag & (CEF_HIRAGANA | CEF_KATAKANA)) {
    if (uncertain) {
      /*
       * ʸϳʤɤǡҤ餬ʥʤ
       * Ф褤
       */
      ce->score = NOCONV_WITH_BIAS;
      if (CEF_KATAKANA & ce->flag) {
	ce->score ++;
      }
    } else {
      ce->score = NOCONV_BASE;
    }
  }
  ce->score += 1;
}

static void
eval_segment(struct seg_ent *se)
{
  int i;
  int uncertain = uncertain_segment_p(se);
  for (i = 0; i < se->cand_count; i++) {
    eval_candidate(se, se->cands[i], uncertain);
  }
}

/* ƤФ륨ȥݥ */
void
anthy_sort_candidate(struct segment_list *seg, int nth)
{
  int i;
  /* ޤɾ */
  for (i = nth; i < seg->nr_segments; i++) {
    eval_segment(anthy_get_nth_segment(seg, i));
  }
  /* Ĥ¤Ӥ */
  for (i = nth; i < seg->nr_segments; i++) {
    sort_segment(anthy_get_nth_segment(seg, i));
  }
  /* ֤äȥ㤤0դ */
  for (i = nth; i < seg->nr_segments; i++) {
    check_dupl_candidate(anthy_get_nth_segment(seg, i));
  }
  /* ⤦¤Ӥ */
  for ( i = nth ; i < seg->nr_segments ; i++){
    sort_segment(anthy_get_nth_segment(seg, i));
  }
  /* ɾ0θ */
  for (i = nth ;i < seg->nr_segments ; i++) {
    release_redundant_candidate(anthy_get_nth_segment(seg, i));
  }
  /* θ */
  for (i = nth; i < seg->nr_segments; i++){
    anthy_proc_swap_candidate(anthy_get_nth_segment(seg, i));
  }
}
