/*
 * segcompse.cƤФơ̤segstruct.c
 * assign_word_type()ƤӽФ
 *
 * ʸñʬ䤹ѥ򸡺롣
 * Ȥ
 *  ֤ˤ -> ֤ˡ  
 *           -> ֤ˤ  ð
 *          Τ褦ʸζڤ󤹤롣
 * ޤͥڤ󤷡segstruct
 * ʸι¤å
 *
 * Copyright (C) 2000-2001 TABATA Yusuke
 */
#include <stdlib.h>
#include <stdio.h>

#include <dic.h>
#include <wtype.h>
#include "main.h"
#include "segexpand.h"

/* ΡɤΥҡ */
#define SPLIT_HEAP_SIZE 32

/* ޤǤ˺ä */
int nr_split_candidates;
/* ŸѤΥХåե*/
struct split_ent *split_heap[SPLIT_HEAP_SIZE];

static void free_split_ent_heap();
static int nr_words_in_split_ent(struct seg_ent *, int *);
static int make_word_array(struct seg_ent *, int *, struct split_ent *);
static int get_word_len_from_split_ent(int *, int, int);

static void depth_first_search(struct seg_ent *e, int *, int);
static void push_back_split_ent_candidate(struct seg_ent *e, int *);

/* μΥޡõ */
int get_word_len_from_split_ent(int *s, int begin, int end)
{
  int i;
  for (i = 1; i + begin <= end; i++) {
    if (s[begin+i]) {
      return i;
    }
  }
  return 1;
}

/* Υޡο */
int nr_words_in_split_ent(struct seg_ent *s, int *mark)
{
  int i,n;
  for (i = s->core_from, n= 0; 
       i < s->core_from + s->core_len; i++) {
    if (mark[i]) {
      n++;
    }
  }
  return n;
}

/* split_entseq_ent */
int make_word_array(struct seg_ent *s, int *w, struct split_ent *sp)
{
  int i,j;
  for (i = 0, j = s->core_from; i < sp->nr_words; i++) {
    xstr *xs = &sp->we[i].str;
    xs->len = get_word_len_from_split_ent(w, j, s->str.len);
    xs->str = &s->str.str[j];
    j += xs->len;
    sp->we[i].se = get_seq_ent_from_xstr(xs);
  }
  return 0;
}

void print_split_ent(struct split_ent *s)
{
  int i, j;
  printf(" %d: ", s->rule);
  for (i = 0; i < s->nr_words; i++) {
    for (j = 0; j < s->we[i].str.len; j++) {
      putxchar(s->we[i].str.str[j]);
    }
    printf(".");
  }
  printf("\n");
}

void free_split_ent(struct split_ent *e)
{
  free(e->we);
  free(e);
}

/* ҡפˤ */
void free_split_ent_heap()
{
  int i;
  for (i = 0; i < nr_split_candidates; i++) {
    free_split_ent(split_heap[i]);
  }
}

/* 줿ʬΥѥ˽äsplit_ent¤Τheappush */
void push_back_split_ent_candidate(struct seg_ent *seg, int *w)
{
  struct split_ent *e;
  e = malloc(sizeof(struct split_ent));
  e->nr_words = nr_words_in_split_ent(seg, w);
  e->we = malloc(sizeof(struct word_ent) * e->nr_words);
  e->xs = seg->str;
  e->len = seg->core_len;
  e->from = seg->core_from;
  e->rule = 0;
  e->seg = seg;

  /* wˤñζΥޡåȤƤΤǤñѴ */
  make_word_array(seg, w, e);

  /* ҡפpush */
  split_heap[nr_split_candidates] = e;
  nr_split_candidates ++;
}

/* ͥʬΥѥ򸡺 */
void depth_first_search(struct seg_ent *e, int *w, int from)
{
  int n;
  if ((e->core_from + e->core_len - from) == 0) {
    /* ʸΰʬ䤬Ǥ */
    push_back_split_ent_candidate(e, w);
    return ;
  }
  if (nr_split_candidates >= SPLIT_HEAP_SIZE) {
    return ;
  }
  for (n = e->core_from + e->core_len - from; n > 0; n --) {
    seq_ent_t se;
    xstr xs;
    xs.str = &e->str.str[from];
    xs.len = n;
    se = get_seq_ent_from_xstr(&xs);
    if (get_seq_ent_type(se) & ST_WORD) {
      w[from + n] = 1;
      depth_first_search(e, w, from + n);
      w[from + n] = 0;
    }
  }
}

/* ʸʬ䤷ʻ */
void expand_segment(struct seg_ent *e, int seg_len)
{
  int level;
  int *w, i, n;

  e->core_from = 0;
  e->core_len = seg_len;

  nr_split_candidates = 0;
  w = alloca(sizeof(int) * (e->str.len + 1));
  for (i = 0; i <= e->str.len; i++) {
      w[i] = 0;
  }
  w[e->core_from] = 1;
  w[e->core_from + e->core_len] = 1;

  /* ʬ䤷heapѤ */
  depth_first_search(e, w, e->core_from);

  /* Фƥ롼ŬѤ */
  level = 0;
  n = 0;
  for (level = 0; level < 10 && n == 0; level ++) {
    for (i = 0; i < nr_split_candidates; i++) {
      n += assign_word_type(split_heap[i], level);
      //print_split_ent(split_heap[i]);
    }
  }

  /* heapΤΤ */
  free_split_ent_heap();
}
