/*
 * ʸ⤷ñİʾ奻åȤˤmetawordȤư
 * ǤϳƼmetaword
 *
 * init_metaword_tab() metawordΤξ
 * make_metaword_all() contextmetaword
 * print_metaword() ꤵ줿metawordɽ
 *
 * Funded by IPA̤Ƨեȥ¤ 2001 10/29
 * Copyright (C) 2000-2001 TABATA Yusuke, UGAWA Tomoharu
 */
#include <stdlib.h>
#include <stdio.h>

#include <record.h>
#include <splitter.h>
#include <xstr.h>
#include "wordborder.h"

/* metawordΤ */
static void commit_metaword(struct splitter_context *, struct meta_word *);
static struct meta_word *alloc_metaword(struct splitter_context *c);

/* metaword */
static void make_dummy_metaword(struct splitter_context *,
                                int from, int len, int orig_len);
static void make_simple_metaword(struct splitter_context *);
static void make_unkseq_metaword(struct splitter_context *);
static void make_ochaire_metaword(struct splitter_context *sc, int from, int len);
static void make_ochaire_metaword_all(struct splitter_context *sc);
static void make_metaword_with_depchar_all(struct splitter_context *sc);
static void make_metaword_with_depchar(struct splitter_context *sc,
				       struct meta_word *mw);
/* metawordη */
static void try_combine_name(struct splitter_context *,
			     struct meta_word *, struct meta_word *);
static void try_combine_v_renyou_a(struct splitter_context *,
				   struct meta_word *, struct meta_word *);
static void try_combine_v_renyou_t(struct splitter_context *,
				   struct meta_word *, struct meta_word *);
static void try_combine_metaword(struct splitter_context *,
				 struct meta_word *, struct meta_word *);
static void combine_metaword(struct splitter_context *);

/* ʻ */
static wtype_t wtype_a_tail_of_v_renyou;

/* ƥmetawordɲä */
void commit_metaword(struct splitter_context *sc, struct meta_word *mw)
{
  struct word_split_info_cache *info = sc->word_split_info;
  mw->next = info->metawords[mw->from].next;
  info->metawords[mw->from].next = mw;
}

void print_metaword(struct splitter_context *c, struct meta_word *mw)
{
  printf("*meta word %d(%d-%d)*\n", mw->type, mw->from, mw->len);
  if (mw->wl) {
    print_word_list(c, mw->wl);
  }
}

struct meta_word *alloc_metaword(struct splitter_context *c)
{
  struct meta_word *mw;
  mw = smalloc(c->word_split_info->MwAllocator);
  mw->type = MW_SINGLE;
  mw->score = 0;
  mw->wl = 0;
  mw->mw1 = 0;
  mw->mw2 = 0;
  mw->parent = 0;
  return mw;
}

/*
 * ƤζˤϤޤ᥿ɤѰդ
 */
void make_unkseq_metaword(struct splitter_context *sc)
{
  int i;
  struct word_split_info_cache *info = sc->word_split_info;

  /* ̤Τε伭̵ʸѥ */
  for (i = 0; i < sc->char_count; i++) {
    if (!info->metawords[i].next) {
      struct meta_word *mw = alloc_metaword(sc);
      mw->from = i;
      mw->len = 1;
      commit_metaword(sc, mw);
    }
  }
}

/*
 * wordlistĤʤ롢metaword򥳥ߥåȤ
 */
void make_simple_metaword(struct splitter_context *sc)
{
  int i;
  for (i = 0; i < sc->char_count; i++) {
    struct word_list *wl;
    for (wl = sc->word_split_info->lists[i].next; wl; wl = wl->next) {
      struct meta_word *mw = alloc_metaword(sc);
      mw->wl = wl;
      mw->from = wl->from;
      mw->len = wl->len;
      mw->score = wl->score;
      mw->type = MW_SINGLE;
      commit_metaword(sc, mw);
    }
  }
}

/*
 * ưϢѷ + ƻ첽 ֡䤹פʤ
 */
void try_combine_v_renyou_a(struct splitter_context *sc,
			    struct meta_word *mw, struct meta_word *mw2)
{
  wtype_t w1 = mw->wl->core_wt;
  wtype_t w2 = mw2->wl->core_wt;
  if (wtype_get_pos(w1) == POS_V
      && wtype_get_ct(w1) == CT_RENYOU
      && wtype_get_pos(w2) == POS_A) {
    /* ƻǤϤΤǼΥå */
    if (get_seq_ent_wtype_freq(mw2->wl->core_seq, wtype_a_tail_of_v_renyou)) {
      struct meta_word *n;
      n = alloc_metaword(sc);
      n->from = mw->from;
      n->len = mw->len + mw2->len;
      n->score = mw->score + mw2->score;
      n->type = MW_V_RENYOU_A;
      n->mw1 = mw;
      n->mw2 = mw2;
      commit_metaword(sc, n);
    }
  }
}

/*
 * ưϢѷ + ̾첽  (Τ)פʤ
 */
void try_combine_v_renyou_t(struct splitter_context *sc,
			    struct meta_word *mw, struct meta_word *mw2)
{
  wtype_t w1 = mw->wl->core_wt;
  wtype_t w2 = mw2->wl->core_wt;
  if (wtype_get_pos(w1) == POS_V
      && wtype_get_ct(w1) == CT_RENYOU
      && wtype_get_pos(w2) == POS_NOUN
      && wtype_get_scos(w2) == SCOS_T40) {
    struct meta_word *n;
    n = alloc_metaword(sc);
    n->from = mw->from;
    n->len = mw->len + mw2->len;
    n->score = mw->score + mw2->score;
    n->type = MW_V_RENYOU_T;
    n->mw1 = mw;
    n->mw2 = mw2;
    commit_metaword(sc, n);
  }
}

/*
 *  + ̾礹
 */
void try_combine_name(struct splitter_context *sc,
		      struct meta_word *mw, struct meta_word *mw2)
{
  int f, f2;
  f = get_seq_flag(mw->wl->core_seq);
  f2 = get_seq_flag(mw2->wl->core_seq);
  if ((f & NF_FAMNAME) && (f2 & NF_FSTNAME)) {
    struct meta_word *n;
    n = alloc_metaword(sc);
    n->from = mw->from;
    n->len = mw->len + mw2->len;
    n->score = (mw->score + mw2->score) / 2;
    n->type = MW_NAMEPAIR;
    n->mw1 = mw;
    n->mw2 = mw2;
    commit_metaword(sc, n);
  }
}

/* ٤metawordȷǤ뤫å */
void try_combine_metaword(struct splitter_context *sc,
			  struct meta_word *mw, struct meta_word *mw2)
{
  if (mw->wl && mw2->wl) {
    if (mw->wl->postfix_len + mw->wl->follow_count == 0 
	&& mw->wl->prefix_len == 0
	&& mw2->wl->prefix_len == 0) {
      try_combine_name(sc, mw, mw2);
      try_combine_v_renyou_a(sc, mw, mw2);
      try_combine_v_renyou_t(sc, mw, mw2);
    }
  }
}


void combine_metaword(struct splitter_context *sc)
{
  int i;

  struct word_split_info_cache *info = sc->word_split_info;
  /* metawordκüˤ롼 */
  for (i = 0; i < sc->char_count; i++){
    struct meta_word *mw, *mw2;
    /* metawordΥ롼 */
    for (mw = info->metawords[i].next; mw; mw = mw->next) {
      /* metawordüãƤʤ */
      if (mw->len + i < sc->char_count) {
	/* metawordαmetawordΤĤҤȤĤ */
	for (mw2 = info->metawords[mw->len+i].next ; 
	     mw2; mw2 = mw2->next) {
	  /* Ǥ뤫å */
	  try_combine_metaword(sc, mw, mw2);
	}
      }
    }
  }
}

void make_dummy_metaword(struct splitter_context *sc,int from,
                         int len, int orig_len)
{
  int score = 0;
  struct meta_word *mw, *n;
  struct word_split_info_cache *info = sc->word_split_info;
  for (mw = info->metawords[from].next; mw; mw = mw->next) {
    if (mw->len == orig_len && mw->score > score) {
      score = mw->score;
    }
  }

  n = alloc_metaword(sc);
  n->type = MW_DUMMY;
  n->from = from;
  n->len = len;
  n->score = score + SCORE_PER_LEN * (len - orig_len) * 2;
  commit_metaword(sc, n);
}

/*
 * ʸ򿭤Ф餽ФƤ
 */
void make_expanded_metaword_all(struct splitter_context *c)
{
  int i, j;
  if (select_section("EXPANDPAIR", 0) == -1) {
    return ;
  }
  for (i = 0; i < c->char_count; i++) {
    for (j = 1; j < c->char_count - i; j++) {
      xstr xs;
      xs.len = j;
      xs.str = c->ce[i].c;
      if (select_column(&xs, 0) == 0) {
        int k;
        int nr = get_nr_values();
        for (k = 0; k < nr; k++) {
          xstr *exs;
          exs = get_nth_xstr(k);
          if (exs && exs->len <= c->char_count - i) {
            xstr txs;
            txs.str = c->ce[i].c;
            txs.len = exs->len;
            if (!xstrcmp(&txs, exs)) {
              make_dummy_metaword(c, i, txs.len, j);
            }
          }
        }
      }
    }
  }
}

void make_ochaire_metaword(struct splitter_context *sc, int from, int len)
{
  struct meta_word *mw, *mw0;
  int count;
  int s;
  int j;
  int seg_len;

  count = get_nth_value(0);
  for (s = 0, j = 0; j < count - 1; j++) {
    s += get_nth_value(j * 2 + 1);
  }
  seg_len = get_nth_value((count - 1) * 2 + 1);
  mw = alloc_metaword(sc);
  mw->type = MW_OCHAIRE_LEAF;
  mw->from = from + s;
  mw->len = seg_len;
  mw->cand_hint = get_nth_xstr((count - 1) * 2 + 2);
  for (j-- ; j >= 0; j--) {
    struct meta_word *n;
    seg_len = get_nth_value(j * 2 + 1);
    s -= seg_len;
    n = alloc_metaword(sc);
    n->type = MW_OCHAIRE_LEAF;
    n->mw1 = mw;
    n->from = from + s;
    n->len = seg_len;
    n->cand_hint = get_nth_xstr(j * 2 + 2);
    commit_metaword(sc, mw);
    mw = n;
  }
  commit_metaword(sc, mw);
  mw0 = alloc_metaword(sc);
  mw0->type = MW_OCHAIRE;
  mw0->mw1 = mw;
  mw0->from = from;
  mw0->len = len;
  mw0->score = 100000;
  commit_metaword(sc, mw0);
  from += len - 1;
}

/*
 * ʣʸȤ򤫤鸡
 */
void make_ochaire_metaword_all(struct splitter_context *sc)
{
  int i;
  if (select_section("OCHAIRE", 0) == -1) {
    return ;
  }
  for (i = 0; i < sc->char_count; i++) {
    xstr xs;
    xs.len = sc->char_count - i;
    xs.str = sc->ce[i].c;
    if (select_longest_column(&xs) == 0) {
      xstr* key;
      int len;
      mark_column_used();
      key = get_index_xstr();
      len = key->len;
      make_ochaire_metaword(sc, i, len);
      i += len - 1; /* 󸫤Ĥä meta_word μʸϤ */
      break;
    }
  }
}

void make_metaword_with_depchar(struct splitter_context *sc,
				struct meta_word *mw)
{
  int j;
  for (j = 0; mw->from + mw->len + j <sc->char_count; j++) {
    int p = mw->from + mw->len + j;
    if (!(get_xchar_type(*sc->ce[p].c) & XCT_PART)) {
      break;
    }
  }
  if (j > 0) {
    struct meta_word *n;
    n = alloc_metaword(sc);
    n->type = MW_DUMMY;
    n->from = mw->from;
    n->len = mw->len + j;
    n->score = mw->score + SCORE_PER_LEN * j;
    commit_metaword(sc, n);
  }
}

void make_metaword_with_depchar_all(struct splitter_context *sc)
{
  int i;
  struct word_split_info_cache *info = sc->word_split_info;
  for (i = 0; i < sc->char_count; i++) {
    struct meta_word *mw;
    for (mw = info->metawords[i].next; mw; mw = mw->next) {
      make_metaword_with_depchar(sc, mw);
    }
  }
}

void make_metaword_all(struct splitter_context *sc)
{
  /* ޤword_listämetaword */
  make_simple_metaword(sc);

  /* metaword礹 */
  combine_metaword(sc);

  /* 礵줿ʸ */
  make_expanded_metaword_all(sc);

  /* Ĺʤɤε桢¾ε */
  make_metaword_with_depchar_all(sc);

  /* 򤤤 */
  make_ochaire_metaword_all(sc);

  /* ̤ʸ򥫥Сmetaword */
  make_unkseq_metaword(sc);

}

void init_metaword_tab()
{
  if (name_to_wtype("ƻ첽",&wtype_a_tail_of_v_renyou) == -1) {
    printf("Fail\n");
  }
}
