/*
 * ʸι¤metaword򥽡Ȥ
 *
 * ʸФʣι¤θ򥽡Ȥ
 *
 * Copyright (C) 2000-2007 TABATA Yusuke
 *
 */
/*
  This library is free software; you can redistribute it and/or
  modify it under the terms of the GNU Lesser General Public
  License as published by the Free Software Foundation; either
  version 2 of the License, or (at your option) any later version.

  This library is distributed in the hope that it will be useful,
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  Lesser General Public License for more details.

  You should have received a copy of the GNU Lesser General Public
  License along with this library; if not, write to the Free Software
  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307  USA
 */
#if 0		/* Patched by G-HAL */
#include <stdlib.h>
#include <math.h>

#include <anthy/segment.h>
#include <anthy/ordering.h>
#include <anthy/feature_set.h>
#include <anthy/splitter.h>
#include <anthy/diclib.h>
#include "sorter.h"
#else
#if defined(HAVE_CONFIG_H)
# include "config.h"
#endif

#if defined(HAVE_STDLIB_H)
# include <stdlib.h>
#endif
#if defined(HAVE_SYS_TYPES_H)
# include <sys/types.h>
#endif
#if defined(HAVE_STDIO_H)
# include <stdio.h>
#endif
#if defined(HAVE_MATH_H)
# include <math.h>
#endif
#if defined(HAVE_STRING_H)
# include <string.h>
#endif
#if defined(HAVE_STRINGS_H)
# include <strings.h>
#endif
#if defined(HAVE_ERRNO_H)
# include <errno.h>
#endif

#include "anthy/settings.h"	/* Patched by G-HAL, Sat,22 Nov,2008 */
#include "src-diclib/alternative_mergesort.h"	/* Patched by G-HAL, Sun,02 Nov,2008 */
#include "anthy/cand_ent_score.h"	/* Patched by G-HAL, Sat,17 Oct,2009 */
#include "anthy/segment.h"
#include "anthy/ordering.h"
#include "anthy/feature_set.h"
#include "anthy/splitter.h"
#include "anthy/diclib.h"
#include "src-splitter/wordborder.h"	/* Patched by G-HAL, Wed,19 Nov,2008 */
#include "sorter.h"
#endif

static void *cand_info_array;

#if 0		/* Patched by G-HAL,  */
static double
calc_probability(struct feature_list *fl)
{
  struct feature_freq *res, arg;
  res = anthy_find_feature_freq(cand_info_array,
				fl, &arg);
  if (res) {
    double pos = (double)res->f[15];
    double neg = (double)res->f[14];
    double prob = pos / (pos + neg);
    prob = prob * prob;
    /**/
    return prob;
  }
  return 0;
}
#else
/** ѥγΨͤ
 *@param		cc			ʸʻ쥯饹ʥǥХåɽѡ
 *@param[in]		fl			ѥθ
 *@return					줿Ψ
 *
 *	Patched by G-HAL
 *		Thu,20 Nov,2008
 *		Fri,03 Sep,2010
 */
static double calc_probability( struct feature_list* const fl, int* exist_flag )
{
  struct feature_freq *res, arg;
  res = anthy_find_feature_freq(cand_info_array,
				fl, &arg);
  if (res) {
    double pos = (double)res->f[15];
    double neg = (double)res->f[14];
    double prob1 = pos / (pos + neg);
    double prob2 = (pos + anthy_settings.anthy_mode.lattice.corpus_adj_by_count_numerator) / (pos + neg + anthy_settings.anthy_mode.lattice.corpus_adj_by_count_denominator);
    double prob = (prob1 < prob2) ? prob1 : prob2;
    prob = prob * prob;
    /**/
    if (exist_flag) {
      *exist_flag = 1;
    }
    return prob;
  }
  if (exist_flag) {
    *exist_flag = 0;
  }
  return 0;
}
#endif

#if 0	/* Patched by G-HAL, Wed,19 Nov,2008, Wed,04 Feb,2009 */
static void
mw_eval(struct seg_ent *prev_seg, struct seg_ent *seg,
	struct meta_word *mw)
{
  int pc;
  struct feature_list fl;
  double prob;
  (void)seg;
  anthy_feature_list_init(&fl);
  /**/
  anthy_feature_list_set_cur_class(&fl, mw->seg_class);
  anthy_feature_list_set_dep_word(&fl, mw->dep_word_hash);
  anthy_feature_list_set_dep_class(&fl, mw->dep_class);
  anthy_feature_list_set_mw_features(&fl, mw->mw_features);
  /* ʸ */
  if (prev_seg) {
    pc = prev_seg->best_seg_class;
  } else {
    pc = SEG_HEAD;
  }
  anthy_feature_list_set_class_trans(&fl, pc, mw->seg_class);
  anthy_feature_list_sort(&fl);
  /* ׻ */
  prob = 0.1 + calc_probability(&fl);
  if (prob < 0) {
    prob = (double)1 / (double)1000;
  }
  anthy_feature_list_free(&fl);
  mw->struct_score = RATIO_BASE * RATIO_BASE;
  mw->struct_score *= prob;
  /*
  anthy_feature_list_print(&fl);
  printf(" prob=%f, struct_score=%d\n", prob, mw->struct_score);
  */

  /**/
  if (mw->mw_features & MW_FEATURE_SUFFIX) {
    mw->struct_score /= 2;
  }
  if (mw->mw_features & MW_FEATURE_WEAK_CONN) {
    mw->struct_score /= 10;
  }
}
#else
/** metaword ɾ׻
 *@param[in]		prev_commit		ʸ
 *@param[in]		mw			ʸ
 *@param		with_corpus		ѥѤԤ
 *@return					ɾ
 *
 *	Patched by G-HAL
 *		Sun,16 Nov,2008
 *		Thu,20 Nov,2008
 *		Tue,03 Feb,2009
 *		Wed,04 Feb,2009
 *		Thu,30 Apr,2009
 *		Mon,19 Oct,2009, Thu,22 Oct,2009
 *		Tue,20 Jul,2010 - Wed,21 Jul,2010
 *		Fri,13 Aug,2010
 */
int anthy_mw_eval( const struct seg_dep_info_t* const prev_commit, struct meta_word* const mw, int with_corpus )
{
  struct feature_list fl;
  double prob;
  double fl_prob;
  int exist_flag = 0;

  if (NULL == mw) {
    return;
  }
  if (0 != mw->struct_score) {
    return;
  }

  /* °ܤ¿ˤϥ򲼤롧Σ */
  if (with_corpus) {
    anthy_feature_list_init( &fl );

    anthy_feature_list_set_mw_features( &fl, mw->mw_features );
   #if defined(CORPUS_ANALYSIS_MODE_IID)
    anthy_feature_list_set_dep_word(    &fl, mw->dep_word_hash );
    anthy_feature_list_set_dep_class(   &fl, mw->dep_class );
   #elif defined(CORPUS_ANALYSIS_MODE_IDI)
    if (SEG_HEAD != prev_commit->seg_class) {
      anthy_feature_list_set_dep_word(    &fl, prev_commit->dep_word_hash );
      anthy_feature_list_set_dep_class(   &fl, prev_commit->dep_class );
    }
   #elif defined(CORPUS_ANALYSIS_MODE_HID_IDI_IwDT)
    if (SEG_HEAD == prev_commit->seg_class) {
      /* H+ID ⡼ */
      anthy_feature_list_set_dep_word(    &fl, mw->dep_word_hash );
      anthy_feature_list_set_dep_class(   &fl, mw->dep_class );
    } else {
      /* ID+I ⡼ */
      anthy_feature_list_set_dep_word(    &fl, prev_commit->dep_word_hash );
      anthy_feature_list_set_dep_class(   &fl, prev_commit->dep_class );
    }
    /* IwD+T ⡼ɤ̵ */
   #else
   # error
   #endif
    anthy_feature_list_set_class_trans( &fl, prev_commit->seg_class, mw->seg_class );
    anthy_feature_list_set_cur_class(   &fl, mw->seg_class );

    anthy_feature_list_sort( &fl );
    fl_prob = calc_probability( &fl, &exist_flag );
    anthy_feature_list_free( &fl );
  }
  prob = 0.1;
  if (exist_flag) {
    prob += fl_prob;
  } else {
    /* ѥ̵ metaword ͭ */
    prob = with_corpus ? anthy_settings.anthy_mode.candidate.struct_prob_without_corpus : anthy_settings.anthy_mode.candidate.struct_prob_default;
    if (0 < mw->cand_hint_depth_of_dep) {
      double	prob_ratio = 0.0;
      if (anthy_settings.anthy_mode.depgraph.score.decrease_biasratio_threshold <= mw->cand_hint_depth_of_dep) {
	prob_ratio += anthy_settings.anthy_mode.depgraph.score.decrease_biasratio;
      }
      prob_ratio += (mw->cand_hint_depth_of_dep  * anthy_settings.anthy_mode.depgraph.score.decrease_ratio);
      prob_ratio += (mw->cand_hint_length_of_dep * anthy_settings.anthy_mode.depgraph.score.decrease_ratio_by_length);
      prob -= prob * prob_ratio;
    }
  }
  if (prob < 0.0) {
    prob = 1.0 / 1000.0;
  }

  mw->struct_score = RATIO_BASE * RATIO_BASE;
  mw->struct_score = (int)( prob * mw->struct_score );
  /*
    anthy_feature_list_print(&fl);
    printf(" prob=%f, struct_score=%d\n", prob, mw->struct_score);
   */

  /**/
  if (MW_FEATURE_SUFFIX & mw->mw_features) {
    mw->struct_score /= 2;
  }
  if (MW_FEATURE_WEAK_CONN & mw->mw_features) {
    mw->struct_score /= 10;
  }

  if (mw->struct_score < 1) {
    mw->struct_score = 1;
  }
  return mw->struct_score;
}
#endif

#if 0	/* Patched by G-HAL, Sat,25 Apr,2009 */
static void
seg_eval(struct seg_ent *prev_seg,
	 struct seg_ent *seg)
{
  int i;
  for (i = 0; i < seg->nr_metaword; i++) {
    mw_eval(prev_seg, seg, seg->mw_array[i]);
  }
}

static void
sl_eval(struct segment_list *seg_list)
{
  int i;
  struct seg_ent *prev_seg = NULL;
  for (i = 0; i < seg_list->nr_segments; i++) {
    struct seg_ent *seg;
    seg = anthy_get_nth_segment(seg_list, i);
    seg_eval(prev_seg, seg);
    prev_seg = seg;
  }
}
#else
/** ʸ metaword ɾ׻
 *@param		prev_commit_arg		ƬʸϤ줿Ƥʻ
 *@param[in,out]	seg_list		ϥǡ
 *
 *	Patched by G-HAL
 *		Sat,25 Apr,2009
 *		Tue,20 Jul,2010 - Wed,21 Jul,2010
 */
static void sl_eval( const struct seg_dep_info_t* const prev_commit_arg, struct segment_list* const seg_list )
{
  int i, j;
  struct seg_dep_info_t	prev_commit = *prev_commit_arg;
  for (i = 0; i < seg_list->nr_segments; ++i) {
    struct seg_ent* const	seg = anthy_get_nth_segment( seg_list, i );
    size_t			next_prev_commit = 0;
    int				max_score = -1;

    for (j = 0; j < seg->nr_metaword; ++j) {
      struct meta_word* const	mw = seg->mw_array[j];
      const int			score = anthy_mw_eval( &prev_commit, mw, 1 );
      if (max_score < score) {
	max_score = score;
	next_prev_commit = j;
      }
    }

    if ((0 <= seg->provisional_committed) && (seg->provisional_committed < seg->nr_metaword)) {
      next_prev_commit = seg->provisional_committed;
    }
    if (next_prev_commit < seg->nr_metaword) {
      struct meta_word* const	mw = seg->mw_array[next_prev_commit];
      prev_commit.seg_class     = mw->seg_class;
      prev_commit.dep_class     = mw->dep_class;
      prev_commit.dep_word_hash = mw->dep_word_hash;
    } else {
      /* 䤬¸ߤʤ  ʿ̾졿ʸ󼨤Ƥ */
      prev_commit.seg_class     = SEG_BUNSETSU;
      prev_commit.dep_class     = DEP_NONE;
      prev_commit.dep_word_hash = 0;
    }
  }
  return;
}
#endif

static int
metaword_compare_func(const void *p1, const void *p2)
{
 #if 0		/* Patched by G-HAL, Sat,01 Nov,2008 */
  const struct meta_word * const *s1 = (const struct meta_word* const*) p1;
  const struct meta_word * const *s2 = (const struct meta_word* const*) p2;
  return (*s2)->struct_score - (*s1)->struct_score;
 #else
  const struct meta_word* const* const s1 = (const struct meta_word* const* const) p1;
  const struct meta_word* const* const s2 = (const struct meta_word* const* const) p2;
  return (*s2)->struct_score - (*s1)->struct_score;
 #endif
}

#if 0	/* Patched by G-HAL, Sat,25 Apr,2009, Wed,21 Jul,2010 */
void
anthy_sort_metaword(struct segment_list *seg_list)
#else
void anthy_sort_metaword( const struct seg_dep_info_t* const prev_commit, struct segment_list* const seg_list )
#endif
{
  int i;
  /**/
 #if 0	/* Patched by G-HAL, Sat,25 Apr,2009 */
  sl_eval(seg_list);
 #else
  sl_eval( prev_commit, seg_list );
 #endif
  /**/
  for (i = 0; i < seg_list->nr_segments; i++) {
   #if 0	/* Patched by G-HAL, Sun,02 Nov,2008, Mon,03 Nov,2008, Wed,24 Jun,2009 */
    struct seg_ent *seg = anthy_get_nth_segment(seg_list, i);
    if (seg->mw_array) {    /* ʥꥢԤХν */
    qsort(seg->mw_array, seg->nr_metaword, sizeof(struct meta_word *),
	  metaword_compare_func);
    }
   #else
    struct seg_ent* const seg = anthy_get_nth_segment( seg_list, i );
    if (seg->mw_array && (1 < seg->nr_metaword)) {
      const int ret = mergesort(seg->mw_array, seg->nr_metaword, sizeof(struct meta_word *),
		metaword_compare_func);
      if (0 != ret) {
	anthy_log( 1, "BUG: anthy_sort_metaword(): mergesort(): %d:'%s'\n", errno, strerror(errno) );
	abort();
      }
    } else {
     #if defined(DEBUG) && (1 <= DEBUG)
      anthy_log( 1, "anthy_sort_metaword(): mergesort(): %d.\n", seg->nr_metaword );
     #endif
    }
   #endif
  }
}

void
anthy_infosort_init(void)
{
  cand_info_array = anthy_file_dic_get_section("cand_info");
}
/* vim:ts=8 sw=2 nomodified:
 */
