/*
 * ʸκǾñ̤Ǥwordlist
 *
 * init_word_seq_tab() °ơ֥ΥΡɤؤ
 *  ݥ󥿤ν
 * release_word_seq_tab() °ơ֥β
 * make_word_list_all() ʸηʬʸ󤹤
 *  å
 */

//#define DEBUG_CONJUGATE_TABLE

#include <stdlib.h>
#include <stdio.h>
#include <string.h>

#include <alloc.h>
#include <xstr.h>
#include <wtype.h>
#include <conf.h>
#include <ruleparser.h>
#include <dic.h>
#include <splitter.h>
#include "wordborder.h"

int param_pow_of_len = DEFAULT_PARAM_POW_OF_LEN;
static allocator wordseq_rule_ator;
static wtype_t wt_noun;

/* ñ³롼 */
static struct wordseq_rule{
  wtype_t wt;
  char *name; /* ʻ̾ */
  int node_id; /* °쥰ΥΡid */
  struct wordseq_rule *next;
}gRules;

/* Rule */
static void make_pre_words(struct splitter_context *, struct word_list *);
static void make_suc_words(struct splitter_context *, struct word_list *);
static void make_following_word_list(struct splitter_context *,
				     struct word_list *);
static void parse_line(char **, int nr);
static int calc_suffix_affinity(seq_ent_t core, seq_ent_t suf,
				int len, int suc);
static int ipow(int x, int y);
static void wordseq_rule_dtor(void *);

static void make_word_list(struct splitter_context *c, seq_ent_t se, int from, int len);
static void make_dummy_head(struct splitter_context *c);
static void setup_word_list(struct word_list *, int, int);

/* ǥХå */
void print_word_list(struct splitter_context *c, struct word_list *wl)
{
  xstr xs;
  char *wn = "---";
  if (!wl) {
    printf("--\n");
    return ;
  }
  /* Ƭ */
  xs.len = wl->core_from - wl->from;
  xs.str = c->ce[wl->from].c;
  putxstr(&xs);
  printf(".");
  /* Ω */
  xs.len = wl->core_len;
  xs.str = c->ce[wl->core_from].c;
  putxstr(&xs);
  printf(".");
  /*  */
  xs.len = wl->postfix_len;
  xs.str = c->ce[wl->core_from + wl->core_len].c;
  putxstr(&xs);
  printf("-");
  /* ° */
  xs.len = wl->follow_count;
  xs.str = c->ce[wl->core_from + wl->core_len + wl->postfix_len].c;
  putxstr(&xs);
  if (wl->core_wt_name) {
    wn = wl->core_wt_name;
  }
  printf(" %s %d %d\n", wn, wl->score, wl->conn_score);
}

void wordseq_rule_dtor(void *p)
{
  struct wordseq_rule *r = p;
  free(r->name);
}

int ipow(int x, int y)
{
  int i,z=1;
  for (i = 0; i < y; i++) {
    z *= x;
  }
  return z;
}

/* äword_listΥ׻Ƥ饳ߥåȤ */
void commit_word_list(struct splitter_context *c, struct word_list *wl)
{
  int len = wl->core_len;
  int f = ffs(wl->freq)+3;
  if (f > 6) {
    f = 6;
  }

  wl->score += 5*ipow(len, DEFAULT_PARAM_POW_OF_LEN)*f;

  /*ƬФ븺*/
  wl->score -= 30000*(wl->postfix_len + wl->prefix_len);

  /*°Ф*/
  if (wl->follow_count) {
    wl->score += (500 * wl->follow_count);
  }

  /*Ĺˤ*/
  wl->score += wl->len * SCORE_PER_LEN;

  wl->score /= wl->conn_score;

  wl->next = c->word_split_info->lists[wl->from].next;
  c->word_split_info->lists[wl->from].next = wl;
  //print_word_list(c, wl);
}

struct word_list *alloc_word_list(struct splitter_context *c)
{
  return smalloc(c->word_split_info->WlAllocator);
}

/* ΩƬοå */
int calc_suffix_affinity(seq_ent_t core, seq_ent_t suf,
			 int len, int isSuc)
{
  int a = 0;
  if ((get_seq_flag(suf) & SF_NUM ) &&
       (get_seq_flag(core) & NF_NUM)) {
    a += 40000 * len;
  }
  /* Ȥaffinity */
  if (isSuc) {
    if ((get_seq_flag(suf) & SF_JN) &&
	 (get_seq_flag(core) & NF_NAME)) {
      a += 20000 * len;
    }
  }
  
  return a;
}

/* ³ν졢ưդ */
void make_following_word_list(struct splitter_context *c, struct word_list *tmpl)
{
  xstr xs;
  xs.str = c->ce[tmpl->from+tmpl->len].c;
  xs.len = c->char_count - tmpl->from - tmpl->len;
  if (tmpl->node_id == -1) {
    struct wordseq_rule *r;
    struct word_list new_tmpl;
    new_tmpl = *tmpl;
    for (r = gRules.next; r; r = r->next) {
      new_tmpl.core_wt = r->wt;
      new_tmpl.core_wt_name = r->name;
      new_tmpl.node_id = r->node_id;
      scan_node(c, &new_tmpl, &xs, new_tmpl.node_id);
    }
  } else {
    scan_node(c, tmpl, &xs, tmpl->node_id);
  }
}

/* 򤯤äĤ */
void make_suc_words(struct splitter_context *c, struct word_list *tmpl)
{
  int i;
  for (i = 1; 
       i <= c->word_split_info->seq_len[tmpl->core_from + tmpl->core_len];
       i++){
    xstr xs;
    seq_ent_t s;
    xs.str = c->ce[tmpl->core_from+tmpl->core_len].c;
    xs.len = i;
    s = get_seq_ent_from_xstr(&xs);
    if (get_seq_ent_pos(s, POS_SUC)) {
      struct word_list new_tmpl;
      new_tmpl = *tmpl;
      new_tmpl.len += i;
      new_tmpl.postfix_len += i;
      new_tmpl.score += calc_suffix_affinity(new_tmpl.core_seq, s, i, 1);
      make_following_word_list(c, &new_tmpl);
    }
  }
}

/* Ƭ򤯤äĤ */
void make_pre_words(struct splitter_context *c, struct word_list *tmpl)
{
  int i;
  /* Ƭ󤹤 */
  for (i = 1; 
       i <= c->word_split_info->rev_seq_len[tmpl->core_from]; i++) {
    xstr xs;
    seq_ent_t s;
    xs.str = c->ce[tmpl->core_from-i].c;
    xs.len = i;
    s = get_seq_ent_from_xstr(&xs);
    if (get_seq_ent_pos(s, POS_PRE)) {
      struct word_list new_tmpl;
      new_tmpl = *tmpl;
      new_tmpl.from = tmpl->from - i;
      new_tmpl.len = tmpl->len + i;
      new_tmpl.score += calc_suffix_affinity(new_tmpl.core_seq, s, i, 0);
      new_tmpl.prefix_len += i;
      make_following_word_list(c, &new_tmpl);
      if (get_seq_flag(tmpl->core_seq) & NF_NUM) {
	/* ξ⤯äĤ */
	make_suc_words(c, &new_tmpl);
      }
    }
  }
}

void setup_word_list(struct word_list *wl, int from, int len)
{
  wl->from = from;
  wl->len = len;
  wl->core_from = from;
  wl->core_len = len;
  wl->postfix_len = 0;
  wl->prefix_len = 0;
  wl->follow_count = 0;
  wl->score = 0;
  wl->conn_score = 1;
  wl->core_wt = wt_none;
  wl->core_seq = 0;
  wl->core_wt_name = NULL;
  wl->node_id = -1;
  wl->prefix_wt = wt_none;
  wl->postfix_wt = wt_none;
  wl->freq = 1;/* ٤㤤ñȤƤ */
}

/*
 * ΩФơƬ°դΤ
 * ʸθ(=word_list)Ȥcacheɲä
 */
void make_word_list(struct splitter_context *c, seq_ent_t se, int from, int len)
{
  struct word_list tmpl;
  struct wordseq_rule *r;

  /* ƥץ졼Ȥν */
  setup_word_list(&tmpl, from, len);
  tmpl.core_seq = se;

  for (r = gRules.next; r; r = r->next) {
    int freq = get_seq_ent_wtype_freq(se, r->wt);
    if (freq) {
      /* ΩʻϤΥ롼ˤäƤ */
#ifdef DEBUG_CONJUGATE_TABLE
      xstr xs;
      xs.str = c->ce[tmpl.core_from].c;
      xs.len = tmpl.core_len;
      putxstr(&xs);
      printf(" %s %d\n", r->name, freq);
#endif
      tmpl.core_wt = r->wt;
      tmpl.freq = freq;
      tmpl.core_wt_name = r->name;
      tmpl.node_id = r->node_id;
      if (wtype_get_pos(r->wt) == POS_NOUN) {
	/* Ƭ̾ˤդʤȤˤƤ */
	make_pre_words(c, &tmpl);
	make_suc_words(c, &tmpl);
      }
      /* Ƭ̵ǽưĤ */
      make_following_word_list(c, &tmpl);
    }
  }
}

void make_dummy_head(struct splitter_context *c)
{
  struct word_list tmpl;
  setup_word_list(&tmpl, 0, 0);
  tmpl.core_seq = 0;
  tmpl.core_wt = wt_noun;
  make_suc_words(c, &tmpl);
}

/* ƥȤʸƤword_list󤹤 */
void make_word_list_all(struct splitter_context *c)
{
  int i, j;
  xstr xs;
  seq_ent_t se;
  struct depword_ent{
    struct depword_ent *next;
    int from, len;
    seq_ent_t se;
  }head, *de;
  struct word_split_info_cache *info;
  allocator de_ator;

  info = c->word_split_info;
  head.next = 0;
  de_ator = create_allocator(sizeof(struct depword_ent), 0);

  /* ƤμΩ */
  /* Υ롼 */
  for (i = 0; i < c->char_count ; i++) {
    int search_len = c->char_count - i;
    int search_from = 0;
    if (search_len > 30) {
      search_len = 30;
    }
    /* ʸʤ */
    if (get_xchar_type(*c->ce[i].c) & XCT_ASCII) {
    }
    /* ʸĹΥ롼 */
    for (j = search_len; j > search_from; j--) {
      xs.len = j;
      xs.str = c->ce[i].c;
      se = get_seq_ent_from_xstr(&xs);
      if (se) {
	/* ơʬʸñʤ */
	if (j > info->seq_len[i]) {
	  info->seq_len[i] = j;
	}
	if (j > info->rev_seq_len[i + j]) {
	  info->rev_seq_len[i + j] = j;
	}
	/* ȯΩꥹȤɲ */
	if (get_seq_ent_indep(se)) {
	  de = (struct depword_ent *)smalloc(de_ator);
	  de->from = i;
	  de->len = j;
	  de->se = se;
	  de->next = head.next;
	  head.next = de;
	}
      }
    }
  }

  /* ȯΩƤФ°ѥθ */
  for (de = head.next; de; de = de->next) {
    make_word_list(c, de->se, de->from, de->len);
  }
  /* Ƭ0ʸμΩդ */
  make_dummy_head(c);

  free_allocator(de_ator);
}

void parse_line(char **tokens, int nr)
{
  struct wordseq_rule *r;
  if (nr < 2) {
    printf("Syntex error in indepword defs"
	   " :%d.\n", get_line_number());
    return ;
  }
  /* ԤƬˤʻ̾äƤ */
  r = smalloc(wordseq_rule_ator);
  r->name = strdup(tokens[0]);
  name_to_wtype(tokens[0], &r->wt);
  /* μˤϥΡ̾äƤ */
  r->node_id = get_node_id_by_name(tokens[1]);
  /* 롼ɲ */
  r->next = gRules.next;
  gRules.next = r;
}

/* °쥰դɤ߹ */
int init_word_seq_tab()
{
  char *fn;
  char **tokens;
  int nr;

  wordseq_rule_ator = create_allocator(sizeof(struct wordseq_rule),
				       wordseq_rule_dtor);

  fn = conf_get_str("INDEPWORD");
  if (!fn){
    printf("independent word dict unspecified.\n");
    return -1;
  }
  if (open_file(fn) == -1) {
    printf("Failed to open indep word dict (%s).\n", fn);
    return -1;
  }
  gRules.next = 0;
  while (!read_line(&tokens, &nr)) {
    parse_line(tokens, nr);
    free_line();
  }
  close_file();

  return 0;
}

int init_wordlist()
{
  if (name_to_wtype("̾", &wt_noun) == -1) {
    return -1;
  }
  return init_word_seq_tab();
}
