/* Copyright (c) 1991-2002 Doshita Lab. Speech Group, Kyoto University */
/* Copyright (c) 2000-2002 Speech and Acoustics Processing Lab., NAIST */
/*   All rights reserved   */

/* cdset.c --- left/right-context HMM set for inter-word CD handling */

/* $Id: cdset.c,v 1.4 2002/09/11 22:01:50 ri Exp $ */

#include <sent/stddefs.h>
#include <sent/htk_param.h>
#include <sent/htk_hmm.h>

#define CD_STATE_SET_STEP 10	/* malloc step */


static int maxlcnum;
static void
cdset_init(HTK_HMM_INFO *hmminfo)
{
  hmminfo->cdset_info.cdroot = NULL;
  hmminfo->cdset_info.cdtree = NULL;
}

static CD_Set *
cdset_new(HTK_HMM_INFO *hmminfo)
{
  return((CD_Set *)mybmalloc(sizeof(CD_Set)));
}
static void
cdset_add(HTK_HMM_INFO *hmminfo, CD_Set *new)
{
  new->next = hmminfo->cdset_info.cdroot;
  hmminfo->cdset_info.cdroot = new;
}


/* main cdset lookup function */
CD_Set *
cdset_lookup(HTK_HMM_INFO *hmminfo, char *cdstr)
{
  CD_Set *cd;
  cd = aptree_search_data(cdstr, hmminfo->cdset_info.cdtree);
  if (strmatch(cdstr, cd->name)) {
    return cd;
  } else {
    return NULL;
  }
}

/* find cdset of the same left-context for given HMM name */
CD_Set *
lcdset_lookup_by_hmmname(HTK_HMM_INFO *hmminfo, char *hmmname)
{
  char *buf;
  CD_Set *ret;

  buf = strcpy((char *)mymalloc(strlen(hmmname)+1), hmmname);
  ret = cdset_lookup(hmminfo, leftcenter_name(hmmname, buf));
  free(buf);
  return(ret);
}

/* find cdset of the same right context for given HMM name */
CD_Set *
rcdset_lookup_by_hmmname(HTK_HMM_INFO *hmminfo, char *hmmname)
{
  char *buf;
  CD_Set *ret;

  buf = strcpy((char *)mymalloc(strlen(hmmname)+1), hmmname);
  ret = cdset_lookup(hmminfo, rightcenter_name(hmmname, buf));
  free(buf);
  return(ret);
}


/* output information of cdset to stdout */
static void
put_cdset(CD_Set *a)
{
  int i;
  printf("name: %s\n", a->name);
  /* printf("state_num: %d\n", a->state_num); */
  for(i=0;i<a->state_num;i++) {
    if (a->stateset[i].num == 0) {
      printf("\t[state %d]  not exist\n", i);
    } else {
      printf("\t[state %d]  %d variants\n", i, a->stateset[i].num);
    }
    /*
      for(j=0;j<a->stateset[i].num;j++) {
        put_htk_state(a->stateset[i].s[j]);
      }
    */
  }
}

/* output all cdset info to stdout */
void
put_all_cdinfo(HTK_HMM_INFO *hmminfo)
{
  CD_Set *cd;

  for (cd = hmminfo->cdset_info.cdroot; cd; cd = cd->next) {
    put_cdset(cd);
  }
}


/* register HMM `d' as cdset member of HMM "cdname" */
/* return FALSE if already registered */
boolean
regist_cdset(HTK_HMM_INFO *hmminfo, HTK_HMM_Data *d, char *cdname)
{
  boolean need_new;
  CD_State_Set *tmp;
  CD_Set *lset, *lmatch;
  int j,n;
  boolean changed = FALSE;
  
  /* check if the cdset already exist */
  need_new = TRUE;
  if (hmminfo->cdset_info.cdtree != NULL) {
    lmatch = aptree_search_data(cdname, hmminfo->cdset_info.cdtree);
    if (strmatch(lmatch->name, cdname)) {
      /* exist, add to it later */
      lset = lmatch;
      need_new = FALSE;
      /* if the state num is larger than allocated, expand the lset */
      if (d->state_num > lset->state_num) {
	lset->stateset = (CD_State_Set *)myrealloc(lset->stateset, sizeof(CD_State_Set) * d->state_num);
	/* 0 1 ... (lset->state_num-1) */
	/* N A ... N                   */
	/* 0 1 ...                     ... (d->state_num-1) */
	/* N A ... A ..................... N                */
	/* malloc new area to expanded state (N to A above) */
	for(j = lset->state_num - 1; j < d->state_num - 1; j++) {
	  lset->stateset[j].maxnum = CD_STATE_SET_STEP;
	  lset->stateset[j].s = (HTK_HMM_State **)mymalloc(sizeof(HTK_HMM_State *) * lset->stateset[j].maxnum);
	  lset->stateset[j].num = 0;
	}
	lset->stateset[d->state_num-1].s = NULL;
	lset->stateset[d->state_num-1].num = 0;
	lset->stateset[d->state_num-1].maxnum = 0;
	
	lset->state_num = d->state_num;

	/* update transition table */
	lset->tr = d->tr;

	changed = TRUE;
      }
    }
  }

  if (need_new) {
    /* allocate as new with blank data */
    lset = cdset_new(hmminfo);
    lset->name = mybstrdup(cdname);
    lset->state_num = d->state_num;
    lset->stateset = (CD_State_Set *)mymalloc(sizeof(CD_State_Set) * lset->state_num);
    /* assume first and last state has no outprob */
    lset->stateset[0].s = lset->stateset[lset->state_num-1].s = NULL;
    lset->stateset[0].num = lset->stateset[lset->state_num-1].num = 0;
    lset->stateset[0].maxnum = lset->stateset[lset->state_num-1].maxnum = 0;
    for(j=1;j<lset->state_num-1; j++) {
      /* pre-allocate only the first step */
      lset->stateset[j].maxnum = CD_STATE_SET_STEP;
      lset->stateset[j].s = (HTK_HMM_State **)mymalloc(sizeof(HTK_HMM_State *) * lset->stateset[j].maxnum);
      lset->stateset[j].num = 0;
    }
    /* assign transition table of first found HMM (ad-hoc?) */
    lset->tr = d->tr;
    cdset_add(hmminfo, lset);
    /* add to search index tree */
    if (hmminfo->cdset_info.cdtree == NULL) {
      hmminfo->cdset_info.cdtree = aptree_make_root_node(lset);
    } else {
      aptree_add_entry(lset->name, lset, lmatch->name, &(hmminfo->cdset_info.cdtree));
    }

    changed = TRUE;
  }
    
  /*j_printerr("add to \"%s\"\n", lset->name);*/
  /* register each HMM states to the lcdset */
  for (j=1;j<d->state_num-1;j++) {
    tmp = &(lset->stateset[j]);
    /* check if the state has already registered */
    for(n = 0; n < tmp->num ; n++) {
      if (tmp->s[n] == d->s[j]) { /* compare by pointer */
	/*j_printerr("\tstate %d has same\n", n);*/
	break;
      }
    }
    if (n < tmp->num ) continue;	/* same state found, cancel regist. */
    
    /* expand storage area if necessary */
    if (tmp->num >= tmp->maxnum) {
      tmp->maxnum += CD_STATE_SET_STEP;
      tmp->s = (HTK_HMM_State **)myrealloc(tmp->s, sizeof(HTK_HMM_State *) * tmp->maxnum);
    }
    
    tmp->s[tmp->num] = d->s[j];
    tmp->num++;

    changed = TRUE;
  }

  return(changed);
}

/* construct whole cdsets for HMM info */
boolean
make_cdset(HTK_HMM_INFO *hmminfo)
{
  HMM_Logical *lg;
  char buf[50];

  cdset_init(hmminfo);
  /* make cdset name from logical HMM name */
  /* left-context set: "a-k" for /a-k+i/, /a-k+o/, ...
     for 1st pass (word end) */
  for(lg = hmminfo->lgstart; lg; lg = lg->next) {
    if (lg->is_pseudo) continue;
    regist_cdset(hmminfo, lg->body.defined, leftcenter_name(lg->name, buf));
  }
  /* right-context set: "a+o" for /b-a+o/, /t-a+o/, ...
     for 2nd pass (word beginning) */
  for(lg = hmminfo->lgstart; lg; lg = lg->next) {
    if (lg->is_pseudo) continue;
    regist_cdset(hmminfo, lg->body.defined, rightcenter_name(lg->name, buf));
  }
  /* both-context set: "a" for all triphone with same base phone "a"
     for 1st pass (1 phoneme word, with no previous word hypo.) */
  for(lg = hmminfo->lgstart; lg; lg = lg->next) {
    if (lg->is_pseudo) continue;
    regist_cdset(hmminfo, lg->body.defined, center_name(lg->name, buf));
  }

  /* now that cdset is completely built, so */
  /* add those `pseudo' biphone and monophone to the logical HMM names */
  /* they points not to the defined HMM, but to the CD_Set structure */
  hmm_add_pseudo_phones(hmminfo);
  
  return(TRUE);
}
