/*
 * ʸι¤ɽեɤǡ
 * βϥĥ꡼롣
 * 1,ޥŸʤɤäƥȡꥹȤ
 * 2,ȡꥹȤ鹽ʸڤ
 * 3,ʸڤ饪ȥޥȥΥդ
 * 4,ȥޥȥ礹
 *
 * 2ϳ̤ʤɤѥ
 * "|"ʤɤα黻Ҥѥ2Ĥʬ롣
 *
 * ȥޥȥѤˤĤƤϡޤäݤʽ
 * ȤΤȡ
 * ɽѸ Closure = *, Positive Closure = +, Negative Closure = ?
 *
 * init_seg_struct_tab()ƤФ
 * դregister_segstruct()Ͽ
 */
#include <stdlib.h>
#include <stdio.h>
#include <string.h>

#include <dic.h>
#include <wtype.h>
#include <conf.h>
#include <alloc.h>
#include <ruleparser.h>

#include "main.h"
#include "segstruct.h"

/* ǡ¤(ʸ) */
#define REXT_SEQ 1
#define REXT_LIST 2
#define REXT_WTYPE 3
#define REXT_OR 4
#define REXT_CLOS 5
#define REXT_POSCLOS 6
#define REXT_NEGCLOS 7
/* ʲpostprocessˤϽиʤ */
#define REXT_VBAR 8
#define REXT_STAR 9
#define REXT_PLUS 10
#define REXT_QUESTION 11

typedef struct REx_{
  struct REx_ *next, *prev;
  int type;
  int opt;
  struct REx_ *ref;/*ñ黻Ҥξ*/
  struct REx_ *left, *right;/*黻Ҥξ*/
  struct REx_ *parent;/*黻оݤ黻Ҥ򤵤*/
  seq_ent_t seq;
  wtype_t wt;
  State *initial_state;
  Arrow *arrow_to_final;
}REx;

typedef struct Token_{
  char *str;/*ȡʸȤƤμ*/
  char *literal;/*֥륯ȤǰϤޤ줿*/
  char *opt;/*#ǻϤޤ*/
  char *name;/*ʻ̾*/
  struct Token_ *next, *prev;
}Token;

/*ǡ¤(ޥν)*/
typedef struct Macro_{
  char *name;
  Token token_list;
  struct Macro_ *next;
}Macro;

/*
 * ܤνܾ֤ʻ祢르ꥺϡ
 * 1. ƥ롼ν+epsilon򿷤ʽ֤Ȥ
 * ʥ롼ɲä롣
 * 2. ʥ롼ɲä줿֤Фơ
 * ɲä줿֤Ǥξ֤ܤɲä줿֤
 * ɲä롣줾ܤãǤ֤ν򤽤줾
 * μ֤Ȥʥ롼ɲä롣
 * 3. 롼ɲä֤ʤʤޤ2.򷫤֤
 * ʲܤνܾ֤ʻѤι¤4
 */

/*
 * ֤ν
 * ǽ state == 0 Ƕ
 */
typedef struct StateSet_{
  State *state;	/*ɥ쥹߽ˤʤ褦¤٤*/
  struct StateSet_ *next;
}StateSet;

/*ʾ֤*/
typedef struct NewArrow_{
  Arrow* arrow; /*ܤξΥǥ*/
  struct NewState_ *to_state;
  struct NewArrow_ *next;
}NewArrow;

/*ʾ*/
typedef struct NewState_{
  StateSet *olds;
  NewArrow arrows;
  State *state; /*ʾ֤StateѴ뤿˻*/
}NewState;

/*
  ʾ֤ν
  ǽ state == 0 Ƕ
*/
typedef struct NewStateSet_{
  NewState *state;
  struct NewStateSet_ *next;
}NewStateSet;


/*Хѿ*/
/*ޡν֤ν٥뤴ȤʬƤ*/
static StateSet *gInitialStateSet[MAX_RULE_LEVEL];
/*ʸΥ顼ɽΡ*/
static REx *error_rex = (REx *)-1;
/*եΥޥ¸*/
static Macro gMacros;
/*¤ΤΥ*/
static allocator StateAllocator, ArrowAllocator;
static allocator RExAllocator;
static allocator FinalAllocator, StateSetAllocator, NewArrowAllocator;
static allocator NewStateAllocator, NewStateSetAllocator;


/*ؿ*/
/*롼빽ۤδؿ*/
static void link_rex_node(REx *);
static void compile_rex_node(REx *);
static void compile_rex_list_node(REx *);
static void compile_rex_or_node(REx *);// 'or' node
static void compile_rex_negclos_node(REx *);
static void compile_rex_posclos_node(REx *);
static REx *post_parse_rex(REx *);
static REx *post_parse_binary_rex(REx *);
static REx *post_parse_unary_rex(REx *);
static REx *make_rex_tree(Token *t);
static REx *parse_token_list(Token **t);
static void parse_line(char **, int);
static void parse_MacroDef(char *, char **, int);
/*ȡꥹȤ*/
static void print_token_list(Token *);
static void append_token(Token *, char *);
static void tokens_to_list(Token *, char **, int);
static void split_token(Token *);
static int parse_opt_str(char *);
static void add_elm_to_token(Token *, char *);
static Macro *find_macro_by_name(char *, int );
/*ǡ(¤)*/
static State *alloc_state();
static Arrow *alloc_arrow();
static REx *alloc_rex();
static Final *alloc_final();
static StateSet *alloc_state_set();
static NewArrow *alloc_new_arrow();
static NewState *alloc_new_state();
static NewStateSet *alloc_new_state_set();
static Arrow *dup_arrow(Arrow *);
static void free_token(Token *);
static void add_arrow_to_state(State *s, Arrow *a);
/*debug*/
static void print_rex_tree(REx *rex);
static void print_rex_unary_op(REx *rex);
/*֤ʻ*/
static int state_set_equal(StateSet *, StateSet *);
static int add_to_state_set(StateSet *, State *);
static int arrow_cond_eq(Arrow *, Arrow *);
static int arrow_contains(NewArrow *, Arrow *);
static void add_arrow_to_new_state(NewState *ns, NewArrow *);
static void add_new_state_set(NewStateSet *, NewState *);
static NewState *new_state_set_find(NewStateSet *, StateSet *);
static void add_to_state_set_epsilon_eq(StateSet *, State *);
static NewState *merge_states(StateSet *, NewStateSet *);
static void convert_to_state(NewState *);
static State *merge_rules(StateSet *);

State *alloc_state()
{
  State *s;
  s = smalloc(StateAllocator);
  s->final.next = 0;
  s->arrows.next = 0;
  return s;
}

Arrow *alloc_arrow()
{
  Arrow *a;
  a =smalloc(ArrowAllocator);
  return a;
}

Arrow *dup_arrow(Arrow *orig)
{
  Arrow *a;
  a = smalloc(ArrowAllocator);
  *a = *orig;
  return a;
}

REx *alloc_rex()
{
  REx *r = smalloc(RExAllocator);
  r->parent = 0;
  return r;
}

Final *alloc_final()
{
  Final *f;
  f = smalloc(FinalAllocator);
  return f;
}

StateSet *alloc_state_set()
{
  StateSet *ss;
  ss = smalloc(StateSetAllocator);
  ss->state = 0;
  ss->next = 0;
  return ss;
}

NewArrow *alloc_new_arrow()
{
  NewArrow *na;
  na = smalloc(NewArrowAllocator);
  na->arrow = 0;
  na->to_state = 0;
  na->next = 0;
  return na;
}

NewState *alloc_new_state()
{
  NewState *ns;
  ns = smalloc(NewStateAllocator);
  ns->olds = 0;
  ns->arrows.next = 0;
  ns->state = 0;
  return ns;
}

NewStateSet *alloc_new_state_set()
{
  NewStateSet *nss;
  nss = smalloc(NewStateSetAllocator);
  nss->state = 0;
  nss->next = 0;
  return nss;
}

void free_token(Token *t)
{
  Token *p=t->next, *q;
  while(p != t){
    q = p;
    p = p->next;
    free(q->str);
    free(q);
  }
}

/*֤ФƤξ֤ܤɲä*/
void add_arrow_to_state(State *s, Arrow *a)
{
  a->next = s->arrows.next;
  s->arrows.next= a;
}

/*
  ֤֤ˤ(ޡʣμ֤Ĳǽ)
  ǤƱ֤ϿƤʤ 0 ֤
  ǤʤϿ 1 ֤
  */
int add_final_state_mark(State *s, Final *f)
{
  Final *g;
  /*ǤƱ֤ϿƤСϿʤ*/
  for (g = s->final.next; g; g = g->next) {
    if (g->id == f->id) {
      return 0;
    }
  }
  f->next = s->final.next;
  s->final.next = f;
  return 1;
}

void append_token(Token *t, char *s)
{
  if (s[0] == '@') {
    /*ޥʤΤŸ*/
    Macro *m;
    Token *cur;
    append_token(t, "(");
    m = find_macro_by_name(s, 1);
    for (cur = m->token_list.next; cur!= &m->token_list; cur = cur->next) {
      append_token(t, cur->str);
    }
    append_token(t, ")");
  }else{
    Token *n;
    n = (Token *)malloc(sizeof(Token));
    n->str = strdup(s);
    n->next = t;
    n->prev = t->prev;
    t->prev->next = n;
    t->prev = n;
    t->literal = 0;
  }
}

void print_token_list(Token *t)
{
  Token *c;
  for (c = t->next; c != t; c = c->next) {
    if (c->literal) {
      printf("%s", c->literal);
    }
    printf("*%s\n", c->str);
  }
  printf("-\n");
}

void print_rex_unary_op(REx *rex)
{
  printf("clos");
  switch(rex->type){
  case REXT_POSCLOS:
    printf("+");
    break;
  case REXT_NEGCLOS:
    printf("-");
    break;
  }
  printf("( ");
  print_rex_tree(rex->ref);
  printf(") ");
}

void print_rex_tree(REx *rex)
{
  if (!rex) {
    printf("_ ");
    return ;
  }
  if (rex == error_rex) {
    printf("{error} ");
    return ;
  }
  switch(rex->type){
  case REXT_SEQ:
    printf("seq(%x) ", (unsigned)rex->seq);
    break;
  case REXT_WTYPE:
    printf("wtype ");
    break;
  case REXT_LIST:
    printf("( ");
    print_rex_tree(rex->ref);
    printf(") ");
    break;
  case REXT_VBAR:
    printf("| ");
    break;
  case REXT_OR:
    printf("or(");
    print_rex_tree(rex->left);
    printf(",");
    print_rex_tree(rex->right);
    printf(")");
    break;
  case REXT_STAR:
    printf("* ");
    break;
  case REXT_PLUS:
    printf("+ ");
    break;
  case REXT_QUESTION:
    printf("? ");
    break;
  case REXT_NEGCLOS:
  case REXT_POSCLOS:
  case REXT_CLOS:
    print_rex_unary_op(rex);
    break;
  }
  if (rex->next && rex->next->prev != rex) {
    printf("rex link is broken.\n");
  }
  print_rex_tree(rex->next);
}

void tokens_to_list(Token *l, char **tokens, int nr)
{
  int i;
  l->next = l;
  l->prev = l;
  for (i = 0; i < nr; i++) {
    append_token(l, tokens[i]);
  }
}

/*ȡǤϤ*/
void add_elm_to_token(Token *t, char *s)
{
  switch(*s){
  case '"':
    s[strlen(s)-1] = 0;
    t->literal = &s[1];
    break;
  case '#':
    t->opt = s;
    break;
  default:
    t->name = s;
    break;
  }
}

/*tokenǤʬ䤹*/
void split_token(Token *t)
{
  int len= strlen(t->str);
  int i, inQuote=0;
  char *cur_elm=0;
  char *cc;
  t->literal = 0;
  t->opt = 0;
  t->name = 0;
  for (i = 0; i < len; i++) {
    cc = &t->str[i];
    if (!cur_elm) {
      cur_elm = cc;
    }
    switch(*cc){
    case '"':
      inQuote = 1 - inQuote;
      break;
    case '.':
      if (!inQuote) {
	*cc = 0;
	add_elm_to_token(t, cur_elm);
	cur_elm = 0;
      }
      break;
    }
  }
  add_elm_to_token(t, cur_elm);
}

void parse_MacroDef(char *name, char **tokens, int nr)
{
  Token tok;
  Macro *m;

  /*ȡꥹȤŸ*/
  tokens_to_list(&tok, tokens, nr);

  /*ޥФƤƤ*/
  m = find_macro_by_name(name, 0);
  if (&tok == tok.next) {
    /*ꥹ*/
    m->token_list.next = &m->token_list;
    m->token_list.prev = &m->token_list;
  }else{
    /*⤷ǥޥnameƤʤС
     Ťϥ꡼*/
    m->token_list.next = tok.next;
    m->token_list.prev = tok.prev;
    tok.next->prev = &m->token_list;
    tok.prev->next = &m->token_list;
  }
}

void parse_line(char **tokens, int nr)
{
  /*롼ѡ
    饹ɽ
   */
  REx *rex;
  Token tok_list;
  int level;
  tok_list.str = 0 ;

  if (nr >2 && tokens[0][0] == '@' && !strcmp(tokens[1], ":=")) {
    /*饹*/
    parse_MacroDef(tokens[0], &tokens[2], nr - 2);
    return ;
  }

  /*ɽä*/
  /*ޤޥŸ򤷤ȡꥹȤ*/
  level = atoi(tokens[0]);
  if (level < 0 || MAX_RULE_LEVEL <= level) {
    fprintf(stderr, "illegal rule level in segment struct definition file"
	    "(: %d).\n", get_line_number());
  }
  tokens_to_list(&tok_list, &tokens[1], nr-1);

  /*ȡꥹȤRExΥĥ꡼*/
  rex = make_rex_tree(&tok_list);
  //print_rex_tree(rex);printf("\n");
  //print_token_list(&tok_list);
  free_token(&tok_list);
  if (!rex || rex == error_rex) {
    fprintf(stderr, "syntax error in segment struct definition file"
	    "(: %d).\n", get_line_number());
    return ;
  }
  /*ƥΡɤ򥳥ѥ뤹*/
  compile_rex_node(rex);
  /*󥯤*/
  link_rex_node(rex);
  /*롼뽸˲ä*/
  add_to_state_set(gInitialStateSet[level], rex->initial_state);
}

Macro *find_macro_by_name(char *name, int f)
{
  Macro *m;
  for (m = gMacros.next; m; m = m->next) {
    if (!strcmp(m->name, name)) {
      return m;
    }
  }
  if (f) {
    fprintf(stderr, "Anthy: Macro(%s) (line:%d) not defined "
	    "in segment struct definition.\n",
	    name, get_line_number());
  }
  m = (Macro *)malloc(sizeof(Macro));
  m->name = strdup(name);
  m->token_list.next = &m->token_list;
  m->token_list.prev = &m->token_list;
  m->next = gMacros.next;
  gMacros.next = m;
  return m;
}

int parse_opt_str(char *str)
{
  int mask = 0;
  if (!str) {
    return 0;
  }
  for (; *str; str++) {
    switch(*str){
    case 'S':
      mask |= OPT_SUFFIX;
      break;
    }
  }
  return mask;
}

REx *parse_token_list(Token **t)
{
  REx *r;
  Token *tok = (*t);
  char *s = (*t)->str;

  if (!s) {
    /*顼*/
    return error_rex;
  }
  *t = (*t)->next;
  
  if (!strcmp(")", s)) {
    /*")"ǥꥹȽλ*/
    return 0;
  }

  r = alloc_rex();
  r->prev = 0;
  r->opt = parse_opt_str(tok->opt);
  if (!strcmp("(", s)) {
    /*ä*/
    r->type = REXT_LIST;
    r->ref = parse_token_list(t);
    r->next = parse_token_list(t);
    if (r->ref == error_rex || r->next == error_rex) {
      return error_rex;
    }
    if (r->next) {
      r->next->prev = r;
    }
    return r;
  }
  /*ȥξ*/
  if (tok->literal) {
    /*ƥ*/
    xstr *xs;
    r->type = REXT_SEQ;
    xs = cstr_to_xstr(tok->literal);
    r->seq = get_seq_ent_from_xstr(xs);
    if (!r->seq) {
      fprintf(stderr, "Anthy: Invalid word(%s) in structdef.\n", tok->literal);
    }
    r->wt = wt_none;
    free_xstr(xs);
  }else if (!strcmp("|", s)) {
    r->type = REXT_VBAR;
  }else if (!strcmp("*", s)) {
    r->type = REXT_STAR;
  }else if (!strcmp("?", s)) {
    r->type = REXT_QUESTION;
  }else if (!strcmp("+", s)) {
    r->type = REXT_PLUS;
  }else{
    /* ʻ */
    wtype_t wt;
    name_to_wtype(tok->name, &wt);
    r->type = REXT_WTYPE;
    r->wt = wt;
  }
  r->next = parse_token_list(t);
  if (r->next == error_rex) {
    return error_rex;
  }
  if (r->next) {
    r->next->prev = r;
  }
  return r;
}

REx *post_parse_unary_rex(REx *head)
{
  REx *op;
again:;
  for (; head->parent; head = head->parent);
  for (op = head->next; op; op = op->next) {
    int p = 0;
    switch (op->type) {
    case REXT_STAR:
      p = REXT_CLOS;
      break;
    case REXT_PLUS:
      p = REXT_POSCLOS;
      break;
    case REXT_QUESTION:
      p = REXT_NEGCLOS;
      break;
    }
    if (p) {
      op->type = p;
      op->ref = op->prev;
      op->ref->parent = op;
      if (op->prev->prev) {
	op->prev->prev->next = op;
      }
      op->prev = op->prev->prev;
      op->ref->next = 0;
      op->ref->prev = 0;
      goto again;
    }
  }
  for (; head->parent; head = head->parent);
  return head;
}

REx *post_parse_binary_rex(REx *head)
{
  REx *op;
again:;
  for (; head->parent; head = head->parent);
  for (op = head->next; op && op->next; op = op->next) {
    if (op->type == REXT_VBAR) {
      op->left = op->prev;
      op->right = op->next;
      op->type = REXT_OR;
      if (op->prev->prev) {
	op->prev->prev->next = op;
      }
      op->prev = op->prev->prev;
      if (op->next->next) {
	op->next->next->prev = op;
      }
      op->next = op->next->next;
      op->left->prev = 0;
      op->right->prev = 0;
      op->left->next = 0;
      op->right->next = 0;
      op->left->parent = op;
      op->right->parent = op;
      goto again;
    }
  }
  for (; head->parent; head = head->parent);
  return head;
}

REx *post_parse_rex(REx *r)
{
  REx *rr;
  if (!r) {
    return 0;
  } 
  for (rr = r; rr; rr = rr->next) {
    if (rr->type == REXT_LIST) {
      rr->ref = post_parse_rex(rr->ref);
    }
  }
  r = post_parse_unary_rex(r);
  r = post_parse_binary_rex(r);
  return r;
}

REx *make_rex_tree(Token *token_list)
{
  Token *tok;
  REx *rex;
  append_token(token_list, ")");/* ü */
  for (tok = token_list->next; tok != token_list;
	tok = tok->next) {
    split_token(tok);
  }
  tok = token_list->next;

  rex = parse_token_list(&tok);
  if (rex == error_rex) {
    return error_rex;
  }

  rex = post_parse_rex(rex);
  if (rex == error_rex) {
    return error_rex;
  }
  return rex;
}

void link_rex_node(REx *r)
{
  if (!r) {
    return ;
  }
  if (r->next) {
    r->arrow_to_final->to_state = r->next->initial_state;
  }else{
    /*
     * ξ硢֤̤γؤ
     * Ԥξcompile_rex_list_node³Ƥ롣
     * Ԥξcompile_rex_nodealloc_state()ľ˽Ƥ
     * Τǡ֤򤯤äĤ롣
     */
    if (r->arrow_to_final->to_state == 0) {
      State *s;
      Final *f;
      s = alloc_state();
      f = alloc_final();
      f->id = get_line_number();
      add_final_state_mark(s, f);
      r->arrow_to_final->to_state = s;
    }
  }
    
  switch(r->type) {
  case REXT_LIST:
  case REXT_CLOS:
  case REXT_NEGCLOS:
  case REXT_POSCLOS:
    link_rex_node(r->ref);
    break;
  case REXT_OR:
    link_rex_node(r->left);
    link_rex_node(r->right);
    break;
  }
  link_rex_node(r->next);
}

void compile_rex_list_node(REx *rex)
{
  REx *p;
  Arrow *a;
  State *s;
  compile_rex_node(rex->ref);
  for (p = rex->ref; p && p->next; p= p->next);
  /* pϥꥹȤκǸΥΡɤؤƤ */
  if (!p) {
    /* ꥹȤǤ̵ */
    rex->arrow_to_final->type = AT_EPSILON;
    add_arrow_to_state(rex->initial_state, rex->arrow_to_final);
    return ;
  }

  /* ꥹȤƬؤarrow */
  a = alloc_arrow();
  a->type = AT_EPSILON;
  a->to_state = rex->ref->initial_state;
  add_arrow_to_state(rex->initial_state, a);
  /* ꥹȤκǸߡξ֤ؤ */
  s = alloc_state();
  p->arrow_to_final->to_state = s;
  /* ߡ֤to_final³ */
  add_arrow_to_state(s, rex->arrow_to_final);
  rex->arrow_to_final->type = AT_EPSILON;
}

void compile_rex_or_node(REx *rex)
{
  Arrow *a;
  State *s;
  compile_rex_node(rex->left);
  compile_rex_node(rex->right);
  /*  */
  a = alloc_arrow();
  a->type = AT_EPSILON;
  add_arrow_to_state(rex->initial_state, a);
  a->to_state = rex->left->initial_state;
  a = alloc_arrow();
  a->type = AT_EPSILON;
  add_arrow_to_state(rex->initial_state, a);
  a->to_state = rex->right->initial_state;
  /* Ǥ */
  s = alloc_state();
  rex->left->arrow_to_final->to_state = s;
  rex->right->arrow_to_final->to_state = s;
  rex->arrow_to_final->type = AT_EPSILON;
  add_arrow_to_state(s, rex->arrow_to_final);
}

void compile_rex_negclos_node(REx *rex)
{
  Arrow *a;
  State *s;
  compile_rex_node(rex->ref);
  /*  */
  a = alloc_arrow();
  a->type = AT_EPSILON;
  a->to_state = rex->ref->initial_state;
  add_arrow_to_state(rex->initial_state, a);
  /* и */
  s = alloc_state();
  rex->ref->arrow_to_final->to_state = s;
  add_arrow_to_state(s, rex->arrow_to_final);
  rex->arrow_to_final->type = AT_EPSILON;
  /* Хѥ */
  a = alloc_arrow();
  a->type = AT_EPSILON;
  a->to_state = s;
  add_arrow_to_state(rex->initial_state, a);
}

void compile_rex_posclos_node(REx *rex)
{
  Arrow *a;
  State *s;
  compile_rex_node(rex->ref);
  /*  */
  a = alloc_arrow();
  a->type = AT_EPSILON;
  a->to_state = rex->ref->initial_state;
  add_arrow_to_state(rex->initial_state, a);
  /* и */
  s = alloc_state();
  rex->ref->arrow_to_final->to_state = s;
  add_arrow_to_state(s, rex->arrow_to_final);
  rex->arrow_to_final->type = AT_EPSILON;
  /* ϩʬnegclosȰ㤦 */
  a = alloc_arrow();
  a->type = AT_EPSILON;
  a->to_state = rex->initial_state;
  add_arrow_to_state(s, a);
}

void compile_rex_node(REx *rex)
{
  if (!rex) {
    return ;
  }
  rex->initial_state = alloc_state();
  rex->arrow_to_final = alloc_arrow();
  rex->arrow_to_final->to_state = 0;
  rex->arrow_to_final->seq = rex->seq;
  rex->arrow_to_final->wt = rex->wt;
  rex->arrow_to_final->opt = rex->opt;
  compile_rex_node(rex->next);

  switch (rex->type) {
  case REXT_SEQ:
    rex->arrow_to_final->type = AT_SEQ;
    add_arrow_to_state(rex->initial_state, rex->arrow_to_final);
    break;
  case REXT_WTYPE:
    rex->arrow_to_final->type = AT_WTYPE;
    add_arrow_to_state(rex->initial_state, rex->arrow_to_final);
    break;
  case REXT_LIST:
    compile_rex_list_node(rex);
    break;
  case REXT_OR:
    compile_rex_or_node(rex);
    break;
  case REXT_NEGCLOS:
    compile_rex_negclos_node(rex);
    break;
  case REXT_POSCLOS:
    compile_rex_posclos_node(rex);
    break;
  default:
    printf("Sorry rex = %x , type = %x\n",
	   (unsigned )rex, (unsigned)rex->type);
    break;
  }
}

int state_set_equal(StateSet *ss1, StateSet *ss2)
{
  while (ss1 && ss2 && ss1->state == ss2->state) {
    ss1 = ss1->next;
    ss2 = ss2->next;
  }

  return ss1 == ss2;
}

/*ֽ˾֤ä
  Ǥ˾ֽ¸ߤ֤ä褦Ȥ 0 ֤
  ǤʤС1 ֤*/
int add_to_state_set(StateSet *ss, State *s)
{
  if (ss->state == 0) {
    ss->state = s;
    return 1;
  }else if (s > ss->state) {
    StateSet *s1;
    /*߽¤٤뤿ᡢƬ򤺤餹*/
    s1 = alloc_state_set();
    s1->state = ss->state;
    s1->next = ss->next;
    ss->state = s;
    ss->next = s1;
    return 1;
  }else{
    StateSet **p;
    for (p = &ss; *p && (*p)->state > s; p = &(*p)->next)
      ;
    if (!*p || (*p)->state < s) {
      StateSet *next;
      next = *p;
      *p = alloc_state_set();
      (*p)->state = s;
      (*p)->next = next;
      return 1;
    }else{
      /*(*p)->state == sξˤˤ*/
      return 0;
    }
  }
}

/*2ĤܤƱΩ뤫Ĵ٤*/
int arrow_cond_eq(Arrow *a1, Arrow *a2)
{
  if (a1->type != a2->type){
    return 0;
  }
  switch (a1->type) {
  case AT_EPSILON:
    return 1;
  case AT_SEQ:
    if (a1->opt != a2->opt) {
      return 0;
    }
    return !seq_ent_cmp(a1->seq, a2->seq);
  case AT_WTYPE:
    if (a1->opt != a2->opt) {
      return 0;
    }
    return wtypesame(a1->wt, a2->wt);
  }
  return 0;
}

int arrow_contains(NewArrow *arrows, Arrow *a)
{
  while (arrows) {
    if (arrow_cond_eq(arrows->arrow, a)) {
      return 1;
    }
    arrows = arrows->next;
  }
  return 0;
}

void add_arrow_to_new_state(NewState *ns, NewArrow *na)
{
  na->next = ns->arrows.next;
  ns->arrows.next = na;
}

void add_new_state_set(NewStateSet *nss, NewState *ns)
{
  if (nss->state == 0) {
    nss->state = ns;
  }else{
    NewStateSet *next;
    next = alloc_new_state_set();
    next->state = ns;
    next->next = nss->next;
    nss->next = next;
  }
}

/*礦ssƱǤĿ餷֤õ*/
NewState *new_state_set_find(NewStateSet *nss, StateSet *ss)
{
  if (nss->state == 0) {
    return 0;
  }
  while (nss) {
    if (state_set_equal(nss->state->olds, ss)) {
      return nss->state;
    }
    nss = nss->next;
  }
  return 0;
}

void add_to_state_set_epsilon_eq(StateSet *ss, State *s)
{
  /*ñʺŬ:1ĤΦܤ⤿ʤΡɤɲäʤ*/
  while (s->arrows.next &&
	 s->arrows.next->next == 0 &&
	 s->arrows.next->type == AT_EPSILON) {
    s = s->arrows.next->to_state;
  }

  if (add_to_state_set(ss, s)) {
    Arrow *a;
    for (a = s->arrows.next; a ;a = a->next) {
      if (a->type == AT_EPSILON){
	add_to_state_set_epsilon_eq(ss, a->to_state);
      }
    }
  }
}

NewState *merge_states(StateSet *ss, NewStateSet *graph_nodes)
{
  NewState *this_state;
  StateSet *p;

  this_state = alloc_new_state();
  this_state->olds = ss;
  add_new_state_set(graph_nodes, this_state);

  /*ƤǤƤܤˤĤƽĴ٤*/
  for (p = this_state->olds; p && p->state; p = p->next) {
    State *s = p->state;
    Arrow *a;
    for (a = s->arrows.next; a; a = a->next) {
      /*ܰʳο˸դäܾ郎ߤĤ
	ܤɲä*/
      if (a->type != AT_EPSILON && 
	  !arrow_contains(this_state->arrows.next, a)) {
	StateSet *to_ss;
	NewArrow *na;
	StateSet *q;
	to_ss = alloc_state_set();
	/*ƱܤƤܤõ*/
	for (q = p; q && q->state; q = q->next) {
	  State *t = q->state;
	  Arrow *b;
	  for (b = t->arrows.next; b; b = b->next) {
	    if (arrow_cond_eq(a, b)) {
	      /*Ĥäܤȡܤãǽ
		֤to_ss˲ä*/
	      add_to_state_set_epsilon_eq(to_ss, b->to_state);
	    }
	  }
	} /* for each arrow */
	/*礦to_ssƱǤĿ֤Ǥˤʤ顢
	  ξ֤ˤ롣ʤп˾֤äƤ
	  ܤ*/
	na = alloc_new_arrow();
	na->to_state = new_state_set_find(graph_nodes, to_ss);
	if (na->to_state == 0) {
	  na->to_state = merge_states(to_ss, graph_nodes);
	}
	na->arrow = a;
	add_arrow_to_new_state(this_state, na);
      } /* if ( new arrow found ) */
    }
  } /* for each arrow */

  return this_state;
}

void convert_to_state(NewState *ns)
{
  NewArrow *na;
  StateSet *ss;

  ns->state = alloc_state();
  /*Ť֤ξʻ礷ƿ֤Ͽ*/
  for (ss = ns->olds; ss && ss->state; ss = ss->next) {
    Final *f;
    for (f = ss->state->final.next; f; f = f->next)
      add_final_state_mark(ns->state, f);
  }
  /*NewStateSateѴơѴΤΤˤĤʤ*/
  for (na = ns->arrows.next; na; na = na->next) {
    Arrow *a;
    if (na->to_state->state == 0){
      convert_to_state(na->to_state);
    }
    a = dup_arrow(na->arrow);
    a->to_state = na->to_state->state;
    add_arrow_to_state(ns->state, a);
  }
}

State *merge_rules(StateSet *ss)
{
  NewStateSet *graph_nodes;
  NewState *ns;
  StateSet *initial_state;

  initial_state = alloc_state_set();
  while (ss != 0 && ss->state != 0) {
    add_to_state_set_epsilon_eq(initial_state, ss->state);
    ss = ss->next;
  }
  graph_nodes = alloc_new_state_set();
  ns = merge_states(initial_state, graph_nodes);
  convert_to_state(ns);

  return ns->state;
}

void print_finals(State *s)
{
  Final *f;

  for (f = s->final.next; f; f = f->next) {
    printf("(%d)", f->id);
  }
}

#include "../src-diclib/dic_ent.h"
#define AT_END -1
void print_rule_rec(State *s)
{
  Arrow *a;

  for (a = s->arrows.next; a; a = a->next) {
    switch(a->type){
    case AT_EPSILON:
      printf("arrow: EPSILON ");
      break;
    case AT_SEQ:
      printf("arrow: SEQ ");
      if (a->seq && 
	  a->seq->nr_dic_ents > 0) {
	printf("\"");
	putxstr(&a->seq->dic_ents[0]->str);
	printf("\" ");
      }else{
	printf("NULL ");
      }
      break;
    case AT_WTYPE:
      printf("arrow: WTYPE ");
      break;
    default:
      continue;
    }
    printf("[%p]->[%p", s, a->to_state);
    print_finals(a->to_state);
    printf("]\n");
    a->type = AT_END;
    print_rule_rec(a->to_state);
  }
}

void print_rule(State *s)
{
  printf("Initial: [%p]\n", s);
  print_rule_rec(s);
}

/* ƤФ */
int init_seg_struct_tab()
{
  char *fn;
  char **tokens;
  int nr;
  State* merged_rule;
  int i;
  fn = conf_get_str("STRUCT");
  if (!fn) {
    fprintf(stderr, "Anthy: Segment dictonary unspecified.\n");
    return -1;
  }
  if (open_file(fn) == -1) {
    fprintf(stderr, "Anthy: Failed to open segment dict (%s).\n",fn);
    return -1;
  }

  StateAllocator = create_allocator(sizeof(State), 0);
  ArrowAllocator = create_allocator(sizeof(Arrow), 0);

  RExAllocator = create_allocator(sizeof(REx), 0);
  FinalAllocator = create_allocator(sizeof(Final), 0);
  StateSetAllocator = create_allocator(sizeof(StateSet), 0);
  NewArrowAllocator = create_allocator(sizeof(NewArrow), 0);
  NewStateAllocator = create_allocator(sizeof(NewState), 0);
  NewStateSetAllocator = create_allocator(sizeof(NewStateSet), 0);
  
  for ( i = 0 ; i < MAX_RULE_LEVEL ; i++){
    gInitialStateSet[i] = alloc_state_set();
  }
  dic_activate_session(0);
  /*ե뤫롼Ĥɤ*/
  while (!read_line(&tokens, &nr)) {
    parse_line(tokens, nr);
    free_line();
  }
  close_file();

  /*롼٥뤴Ȥʻ礷1ĤΥ롼ˤϿ*/
  for (i = 0; i < MAX_RULE_LEVEL; i++) {
    /*ΤȤϿʤ*/
    if (gInitialStateSet[i]->state != 0) {
      merged_rule = merge_rules(gInitialStateSet[i]);
#if 0
      printf("merged rule level %d\n", i);
      print_rule(merged_rule);
      printf("\n");
#endif
      register_segstruct(merged_rule, i);
    }
  }

#if 0
  while (rules) {
    print_rule(rules->state);
    rules = rules->next;
    printf("\n");
    fflush(stdout);
  }
#endif

  /* State,Arrow,Finalʳ */
  free_allocator(RExAllocator);
  free_allocator(StateSetAllocator);
  free_allocator(NewArrowAllocator);
  free_allocator(NewStateAllocator);
  free_allocator(NewStateSetAllocator);

  return 0;
}
