/*
 * ʸФƸΥꥹȤ롣
 * make_candidates()contextƤФ롣
 * proc_splitter_info() splitter鼫ΩĹ
 * Ʊ˸Ԥ
 * expand_segment()ƽФ¤βϤԤäƤ餦
 * commit_split_ent()ǹ¤ФƸƤ롣
 */
/*
 * Funded by IPA̤Ƨեȥ¤ 2001 9/30
 * Copyright (C) 2000-2001 TABATA Yusuke
 * Copyright (C) 2001 UGAWA Tomoharu
 */
#include <stdio.h>
#include <stdlib.h>

#include <dic.h>
#include <splitter.h>
#include "main.h"
#include "context.h"
#include "segexpand.h"

static void push_back_noconv_candidate(struct seg_ent *);
static void push_back_singleword_candidate(struct seg_ent *);
static void push_back_candidate(struct seg_ent *, struct cand_ent *e);
static int enum_candidates(struct seg_ent *seg, struct split_ent *e,
			   struct cand_ent *, int);
static void print_candidate(struct cand_ent *c);
static struct cand_ent *dup_candidate(struct cand_ent *c);
static void proc_splitter_info(struct seg_ent *, struct splitter_context *c, int, int);
static void make_candidate_from_seginfo(struct seg_ent *, struct seg_info *);

/* Ƶ1ñ줺ĸƤƤ */
int enum_candidates(struct seg_ent *seg, struct split_ent *s,
		    struct cand_ent *c, int n)
{
  int i,p;
  struct cand_ent *cand;
  int nr_cands = 0;

  if (n == s->nr_words) {
    /*  */
    /* ʸβϤʤäʬʸɲ */
    xstr tail;
    tail.len = s->xs.len - (s->from + s->len);
    tail.str = &s->xs.str[s->from+s->len];
    xstrcat(&c->str, &tail);
    push_back_candidate(seg, dup_candidate(c));
    return 1;
  }

  p = get_nr_dic_ents(s->we[n].se, &s->we[n].str);
  /* ʻξˤ̤ѴǼñعԤ */
  if (wtype_get_pos(s->we[n].wt) == POS_INVAL || p == 0) {
    cand = dup_candidate(c);
    cand->elm[n].nth = -1;
    xstrcat(&cand->str, &s->we[n].str);
    nr_cands = enum_candidates(seg, s,cand,n+1);
    release_cand_ent(cand);
    return nr_cands;
  }

  /* ʻ줬ƤƤΤǡʻ˥ޥåΤƤ */
  for (i = 0; i < p; i++) {
    wtype_t wt;
    get_nth_dic_ent_wtype(s->we[n].se, &c->str, i, &wt);
    if ( wtypecmp(s->we[n].wt,wt)){
      xstr xs;
      cand = dup_candidate(c);
      get_nth_dic_ent_str(cand->elm[n].s, &s->we[n].str,i,&xs);
      cand->elm[n].nth = i;
      xstrcat(&cand->str, &xs);
      nr_cands += enum_candidates(seg, s, cand, n+1);
      release_cand_ent(cand);
    }
  }
  return nr_cands;
}

struct cand_ent *dup_candidate(struct cand_ent *c)
{
  struct cand_ent *e;
  int i;
  e = (struct cand_ent *)malloc(sizeof(struct cand_ent));
  e->nr_words = c->nr_words;
  e->str.len = c->str.len;
  e->str.str = xstr_dup_str(&c->str);
  e->elm = malloc(sizeof(struct cand_elm)*c->nr_words);
  e->rule_id = c->rule_id;
  e->flag = c->flag;
  for ( i = 0 ; i < e->nr_words ; i++){
    e->elm[i] = c->elm[i];
  }
  return e;
}

void push_back_candidate(struct seg_ent *s, struct cand_ent *e)
{
  /* Υפ׻ */
  e->type = CAND_NONE;

  /* seg_ent˸eɲ */
  s->cand_count++;
  s->cands = (struct cand_ent **)
    realloc(s->cands,sizeof(struct cand_ent *)*s->cand_count);
  s->cands[s->cand_count-1] = e;
}

void push_back_singleword_candidate(struct seg_ent *seg)
{
  seq_ent_t se;
  struct cand_ent *e;
  wtype_t wt;
  int i, n;
  xstr xs;
  se = get_seq_ent_from_xstr(&seg->str);
  n = get_nr_dic_ents(se, &seg->str);
  for (i = 0; i < n; i++) {
    int ct;
    get_nth_dic_ent_wtype(se, &seg->str, i, &wt);
    ct = wtype_get_ct(wt);
    if (ct == CT_SYUSI || ct == CT_NONE) {
      e = (struct cand_ent *)malloc(sizeof(struct cand_ent));
      get_nth_dic_ent_str(se,&seg->str, i, &xs);
      e->nr_words = 0;
      e->str.str = xs.str;
      e->str.len = xs.len;
      e->elm = 0;
      e->flag = CEF_SINGLEWORD;
      e->rule_id = -2;
      push_back_candidate(seg, e);
    }
  }
}

void push_back_noconv_candidate(struct seg_ent *seg)
{
  /* ̵ѴҲ̾ˤʤʿ̾Τߤˤʤɲ */
  struct cand_ent *e;
  xstr *xs;

  /* Ҥ餬ʤΤ */
  e = (struct cand_ent *)malloc(sizeof(struct cand_ent));
  e->nr_words = 0;
  e->str.str = xstr_dup_str(&seg->str);
  e->str.len = seg->str.len;
  e->elm = 0;
  e->rule_id = 0;
  e->flag = CEF_NOCONV;
  push_back_candidate(seg, e);

  /* ˥ */
  e = (struct cand_ent *)malloc(sizeof(struct cand_ent));
  e->nr_words = 0;
  xs = xstr_hira_to_kata(&seg->str);
  e->str.str = xstr_dup_str(xs);
  e->str.len = xs->len;
  e->elm = 0;
  e->rule_id = 0;
  e->flag = CEF_NOCONV;
  free_xstr(xs);
  push_back_candidate(seg, e);
}

void print_candidate(struct cand_ent *c)
{
  putxstr(&c->str);
  printf(":(%d)%d ",c->rule_id,c->score);
}

void print_segment(struct seg_ent *e)
{
  int i;
  putxstr(&e->str);
  printf("(");
  for ( i = 0 ; i < e->cand_count ; i++) {
    print_candidate(e->cands[i]);
    printf(",");
  }
  printf(")");
  printf(":\n");
}

int commit_split_ent(struct split_ent *e)
{
  /*
   * segexpandsegstructˤ븡η̤
   * ñʻ줬ꤵ줿֤ǥߥåȤ롣
   */
  struct cand_ent *c;
  xstr head;
  int i,n;

  head.str = e->xs.str;
  head.len = e->from;

  /* ʣ(1ޤ)ñǹʸñƤƤ */
  c = (struct cand_ent *)malloc(sizeof(struct cand_ent));
  c->nr_words = e->nr_words;
  c->str.str = xstr_dup_str(&head);
  c->str.len = head.len;
  c->elm = malloc(sizeof(struct cand_elm)*c->nr_words);
  for (i = 0; i < e->nr_words; i++) {
    c->elm[i].s = e->we[i].se;
    c->elm[i].wt = e->we[i].wt;
    c->elm[i].bias = e->we[i].bias;
    c->elm[i].str = e->we[i].str;
  }
  c->rule_id = e->rule;
  c->flag = CEF_NONE;
  n = enum_candidates(e->seg, e, c, 0);
  release_cand_ent(c);
  return n;
}

void make_candidate_from_seginfo(struct seg_ent *e, struct seg_info *s)
{
  struct split_ent *se;
  int nr, i;
  if (s->prefix.len || s->postfix.len) {
    return ; /* To be done*/
  }

  se = malloc(sizeof(*se));
  nr = 1;

  if (s->prefix.len) {
    nr ++;
  }
  if (s->postfix.len) {
    nr ++;
  }

  se->nr_words = nr;
  se->we = malloc(sizeof(struct word_ent) * se->nr_words);
  i = 0;

  /* Ω쥳 */
  se->we[i].str.str = &e->str.str[s->prefix.len];
  se->we[i].str.len = s->core.len;
  se->we[i].se = get_seq_ent_from_xstr(&se->we[i].str);
  se->we[i].wt = s->core.wt;
  se->we[i].bias = 10 / s->bias;
  i++;

  se->from = 0;
  se->len = s->core.len + s->prefix.len + s->postfix.len;
  se->xs.str = e->str.str;
  se->xs.len = e->str.len;
  se->rule = -1;
  se->seg = e;
  commit_split_ent(se);
  free_split_ent(se);
}

/*
 * splitterξѤƸ
 */
void proc_splitter_info(struct seg_ent *se, struct splitter_context *c,
			int len, int f)
{
  int n, i;
  int *l = alloca((len+1) * sizeof(int));

  for (i = 0; i <= len; i++) {
    l[i] = 0;
  }

  n = get_nr_seginfo(c, se->from, len);
  for (i = 0; i < n; i++) {
    struct seg_info s;

    get_nth_seginfo(c, &s, se->from, len, i);
    switch (s.type) {
    case SI_NORMAL:
      if (f || (s.prefix.len == 0 && s.postfix.len == 0)) {
	make_candidate_from_seginfo(se, &s);
	l[s.prefix.len + s.postfix.len + s.core.len] = 1;
      }
      break;
    case SI_CAND:
      {
	if (se->len == len) {
	  struct cand_ent *ce;
	  ce = (struct cand_ent*)malloc(sizeof(struct cand_ent));
	  ce->nr_words = 0;
	  ce->str = s.cand; /* s.cand.str ϲʤ */
	  ce->elm = 0;
	  ce->rule_id = 0;
	  ce->flag = CEF_OCHAIRE;
	  push_back_candidate(se, ce);
	}
	break;
      }
    }
  }

  /* ñʸˤꤢƤ */
  for (i = 0; i <= len; i++) {
    if (l[i]) {
      expand_segment(se, i);
    }
  }
}

/*
 * context.cƽФäȤʪ
 * İʾθɬ
 */
void make_candidates(struct anthy_context *ac, struct seg_ent *e)
{
  int i;

  /* ̾ʸ */
  for (i = e->len; i > 0; i--) {
    /* ǸȤĤƤľʸ */
    if (i < e->len &&
	get_xchar_type(e->str.str[i]) & XCT_PART) {
      /* FIXME Ȥꤨʤ¤Ӥ򤷤Ƥ */
      i--;
      continue ;
    }
    if (get_nr_seginfo(&ac->word_split_info, e->from, i)) {
      proc_splitter_info(e, &ac->word_split_info, i, 0);
      if (e->cand_count == 0) {
	proc_splitter_info(e, &ac->word_split_info, i, 1);
      }
      break;
    }
  }
  /* ñʤɤθ */
  push_back_singleword_candidate(e);
  /* Ҥ餬ʡʤ̵Ѵȥ */
  push_back_noconv_candidate(e);
}
