/*
 * ʸμΩ(Ƭޤ)³
 * 졢ưʤɤ°Υѥ򤿤ɤ롣
 * ѥϥդȤեѰդ롣
 *
 *
 *  +------+
 *  |      |
 *  |branch+--cond--+--transition--> node
 *  |      |        +--transition--> node
 *  | NODE |
 *  |      |
 *  |branch+--cond-----transition--> node
 *  |      |
 *  |branch+--cond-----transition--> node
 *  |      |
 *  +------+
 *
 * Copyright (C) 2005 YOSHIDA Yuichi
 * Copyright (C) 2000-2005 TABATA Yusuke
 */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <netinet/in.h>

#include "config.h"
#include <anthy.h>

#include <conf.h>
#include <ruleparser.h>
#include <xstr.h>
#include <filemap.h>
#include <logger.h>
#include <segclass.h>
#include <splitter.h>
#include <wtype.h>
#include "wordborder.h"

/* ܥ */
static struct file_dep fdep;

#define  NORMAL_CONNECTION 1
#define  WEAKER_CONNECTION 2
#define  WEAK_CONNECTION 8


static void
match_branch(struct splitter_context *sc,
	     struct word_list *tmpl,
	     xstr *xs, xstr *cond_xs, struct dep_branch *db);
static void
match_nodes(struct splitter_context *sc,
	    struct word_list *wl,
	    xstr follow_str, int node);


static int
anthy_xstrcmp_with_ondisk(xstr *xs,
			  ondisk_xstr *dxs)
{
  int *d = (int *)dxs;
  int len = ntohl(d[0]);
  int i;
  if (len != xs->len) {
    return 1;
  }
  d++;
  for (i = 0; i < len; i++) {
    if (xs->str[i] != d[i]) {
      return 1;
    }
  }
  return 0;
}

static ondisk_xstr *
anthy_next_ondisk_xstr(ondisk_xstr *dxs)
{
  int *d = (int *)dxs;
  int len = ntohl(d[0]);
  return &d[len+1];
}

static int
anthy_ondisk_xstr_len(ondisk_xstr *dxs)
{
  int *d = (int *)dxs;
  return ntohl(d[0]);
}

/*
 * ƥΡɤˤܾƥȤ
 *
 * wl Ωword_list
 * follow_str Ωʹߤʸ
 * node 롼ֹ
 */
static void
match_nodes(struct splitter_context *sc,
	    struct word_list *wl,
	    xstr follow_str, int node)
{
  struct dep_node *dn = &fdep.nodes[node];
  struct dep_branch *db;
  int i,j;

  /* ƥ롼 */
  for (i = 0; i < dn->nr_branch; i++) {
    ondisk_xstr *dep_xs;
    db = &dn->branch[i];
    dep_xs = db->xstrs;
    /* ܾ */
    for (j = 0; j < db->nr_strs;
	 j++, dep_xs = anthy_next_ondisk_xstr(dep_xs)) {
      xstr cond_xs;
      /* °ܾĹȤɬ */
      if (follow_str.len < anthy_ondisk_xstr_len(dep_xs)) {
	continue;
      }
      /* ܾʬڤФ */
      cond_xs.str = follow_str.str;
      cond_xs.len = anthy_ondisk_xstr_len(dep_xs);

      /* ܾӤ */
      if (!anthy_xstrcmp_with_ondisk(&cond_xs, dep_xs)) {
	/* ܾmatch */
	struct word_list new_wl = *wl;
	struct part_info *part = &new_wl.part[PART_DEPWORD];
	xstr new_follow;

	part->len += cond_xs.len;
	new_follow.str = &follow_str.str[cond_xs.len];
	new_follow.len = follow_str.len - cond_xs.len;
	/* ܤƤߤ */
	match_branch(sc, &new_wl, &new_follow, &cond_xs, db);
      }
    }
  }
}

/*
 * ܤ¹ԤƤߤ
 *
 * tmpl ޤǤ˹word_list
 * xs Ĥʸ
 * cond_xs ܤ˻Ȥ줿ʸ
 * db Ĵbranch
 */
static void
match_branch(struct splitter_context *sc,
	     struct word_list *tmpl,
	     xstr *xs, xstr *cond_xs, struct dep_branch *db)
{
  struct part_info *part = &tmpl->part[PART_DEPWORD];
  int i;

  /* ˥ȥ饤 */
  for (i = 0; i < db->nr_transitions; i++) {
    int conn_ratio = part->ratio; /* score¸ */
    int weak_len = tmpl->weak_len;/* weakܤĹ¸*/ 
    int head_pos = tmpl->head_pos; /* ʻξ */
    enum dep_class dc = part->dc;
    struct dep_transition *transition = &db->transition[i];

    /* ܤΥ */
    part->ratio *= ntohl(transition->trans_ratio);
    part->ratio /= RATIO_BASE;
    if (ntohl(transition->weak) || /* 夤 */
	(ntohl(transition->dc) == DEP_END && xs->len > 0)) { /* üʤΤ˽ü°*/
      tmpl->weak_len += cond_xs->len;
    } else {
      /* ܤ°˲ */
      part->ratio += cond_xs->len * cond_xs->len * cond_xs->len * 3;
    }

    tmpl->tail_ct = ntohl(transition->ct);
    /* ܤγѷʻ */
    if (ntohl(transition->dc) != DEP_NONE) {
      part->dc = ntohl(transition->dc);

    }
    /* ̾첽ưʻ̾ */
    if (ntohl(transition->head_pos) != POS_NONE) {
      tmpl->head_pos = ntohl(transition->head_pos);
    }

    /* ܤü */
    if (ntohl(transition->next_node)) {
      /*  */
      match_nodes(sc, tmpl, *xs, ntohl(transition->next_node));
    } else {
      struct word_list *wl;
      xstr xs_tmp;

      /* 
       * üΡɤãΤǡ
       * word_listȤƥߥå
       */
      wl = anthy_alloc_word_list(sc);
      *wl = *tmpl;
      wl->len += part->len;

      /* ʸ°Ƕ³ΤΤɤȽꤹ */
      xs_tmp = *xs;
      xs_tmp.str--;
      if (wl->part[PART_DEPWORD].len == 1 &&
	  (anthy_get_xchar_type(xs_tmp.str[0]) & XCT_STRONG)) {
	wl->part[PART_DEPWORD].ratio *= 3;
	wl->part[PART_DEPWORD].ratio /= 2;
      }
      /**/
      anthy_commit_word_list(sc, wl);
    }
    /* ᤷ */
    part->ratio = conn_ratio;
    part->dc = dc;
    tmpl->weak_len = weak_len;
    tmpl->head_pos = head_pos;
  }
}

/** 
 */
void
anthy_scan_node(struct splitter_context *sc,
		struct word_list *tmpl,
		xstr *follow, int node)
{
  /* °դƤʤ֤鸡򳫻Ϥ */
  match_nodes(sc, tmpl, *follow, node);
}




static void
read_xstr(struct file_dep* fdep, int* offset)
{
  int len = ntohl(*(int*)&fdep->file_ptr[*offset]);
  *offset += sizeof(int);
  *offset += sizeof(xchar) * len;
}

static void
read_branch(struct file_dep* fdep, struct dep_branch* branch, int* offset)
{
  int i;

  /* ܾοɤ */
  branch->nr_strs = ntohl(*(int*)&fdep->file_ptr[*offset]);
  *offset += sizeof(int);
  /* ܾʸɤ߼ */
  branch->xstrs = (ondisk_xstr *)&fdep->file_ptr[*offset];

  for (i = 0; i < branch->nr_strs; ++i) {
    read_xstr(fdep, offset);
  }

  branch->nr_transitions = ntohl(*(int*)&fdep->file_ptr[*offset]);
  *offset += sizeof(int);
  branch->transition = (struct dep_transition*)&fdep->file_ptr[*offset];
  *offset += sizeof(struct dep_transition) * branch->nr_transitions;
}

static void
read_node(struct file_dep* fdep, struct dep_node* node, int* offset)
{
  int i;
  node->nr_branch = ntohl(*(int*)&fdep->file_ptr[*offset]);
  *offset += sizeof(int);
    
  node->branch = malloc(sizeof(struct dep_branch) * node->nr_branch);
  for (i = 0; i < node->nr_branch; ++i) {
    read_branch(fdep, &node->branch[i], offset);
  }
}

static int
map_file_dep(const char* file_name, struct file_dep* fdep)
{
  fdep->mapping = anthy_mmap(file_name);
  if (!fdep->mapping) {
    return -1;
  }
  fdep->file_ptr = anthy_mmap_address(fdep->mapping);
  return 0;
}

static void
read_file(const char* file_name)
{
  int i;

  int offset = 0;

  map_file_dep(file_name, &fdep);

  /* ǽ˥롼ο */
  fdep.nrRules = ntohl(*(int*)&fdep.file_ptr[offset]);
  offset += sizeof(int);

  /* ƥ롼 */
  fdep.rules = (struct ondisk_wordseq_rule*)&fdep.file_ptr[offset];
  offset += sizeof(struct ondisk_wordseq_rule) * fdep.nrRules;
  /* Ρɤο */
  fdep.nrNodes = ntohl(*(int*)&fdep.file_ptr[offset]);
  offset += sizeof(int);

  /* ƥΡɤɤ߹ */
  fdep.nodes = malloc(sizeof(struct dep_node) * fdep.nrNodes);
  for (i = 0; i < fdep.nrNodes; ++i) {
    read_node(&fdep, &fdep.nodes[i], &offset);
  }
}

int
anthy_get_nr_dep_rule()
{
  return fdep.nrRules;
}

void
anthy_get_nth_dep_rule(int index, struct wordseq_rule *rule)
{
  /* ǥξ󤫤ǡФ */
  struct ondisk_wordseq_rule *r = &fdep.rules[index];
  rule->wt = anthy_get_wtype(r->wt[0], r->wt[1], r->wt[2], r->wt[3], r->wt[4], r->wt[5]);
  rule->ratio = ntohl(r->ratio);
  rule->node_id = ntohl(r->node_id);
}

int
anthy_init_depword_tab()
{
  const char *fn;

  fn = anthy_conf_get_str("DEPGRAPH");
  if (!fn) {
    anthy_log(0, "Dependent word dictionary is unspecified.\n");
    return -1;
  }
  read_file(fn);
  return 0;
}

void
anthy_release_depword_tab(void)
{
  int i;
  for (i = 0; i < fdep.nrNodes; i++) {
    struct dep_node* node = &fdep.nodes[i];
    free(node->branch);
  }
  free(fdep.nodes);

  anthy_munmap(fdep.mapping);
}

