/* Copyright (c) 1991-2002 Doshita Lab. Speech Group, Kyoto University */
/* Copyright (c) 2000-2002 Speech and Acoustics Processing Lab., NAIST */
/*   All rights reserved   */

/* wchmm.c --- build lexicon tree (word-conjunction HMM) */

/* $Id: wchmm.c,v 1.9 2002/09/11 22:02:33 ri Exp $ */

/* wchmm = word conjunction HMM = lexicon tree */

#include <julius.h>


static int bogus_arc = 0;	/* reduced arc (for debug) */
static int homophone_word_num;	/* number of homophone words in vocabulary */
static int dupcount = 0;	/* number of same phone words */

/* #define WCHMM_SIZE_CHECK */


/**************************************************************/
/*********** wchmm initialize *********************************/
/**************************************************************/

/* wchmm $B$r(B malloc $B$9$k(B */
WCHMM_INFO *
wchmm_new()
{
  WCHMM_INFO *w;
  w = (WCHMM_INFO *)mymalloc(sizeof(WCHMM_INFO));
#ifdef USE_NGRAM
  w->ngram = NULL;
#endif
#ifdef USE_DFA
  w->dfa = NULL;
#endif
  w->winfo = NULL;
  w->malloc_root = NULL;
  return w;
}

/* wchmm $B$r=i4|2=$9$k(B */
/* initialize wchmm data */
static void
wchmm_init(WCHMM_INFO *wchmm)
{
  wchmm->maxwcn = MAXWCNSTEP;
  wchmm->state = (WCHMM_STATE *)mymalloc(sizeof(WCHMM_STATE)*wchmm->maxwcn);
  wchmm->ststart = (WORD_ID *)mymalloc(sizeof(WORD_ID)*wchmm->maxwcn);
  wchmm->stend = (WORD_ID *)mymalloc(sizeof(WORD_ID)*wchmm->maxwcn);
  wchmm->offset = (int **)mymalloc(sizeof(int *)*wchmm->winfo->num);
  wchmm->wordend = (int *)mymalloc(sizeof(int)*wchmm->winfo->num);
  wchmm->startnode = (int *)mymalloc(sizeof(int)*wchmm->winfo->num);
  wchmm->wordend_a = (LOGPROB *)mymalloc(sizeof(LOGPROB)*wchmm->winfo->num);
#ifdef UNIGRAM_FACTORING
  wchmm->start2isolate = NULL;
#endif
#ifndef CATEGORY_TREE
  wchmm->state2scid = NULL;
#endif
  wchmm->n = 0;
}

/* $BLZ$NBg$-$5$K1~$8$FNN0h$r(B MAXWCNSTEP $BJ,?-D9(B */
/* expand wchmm data area by MAXWCNSTEP */
static void
wchmm_extend(WCHMM_INFO *wchmm)
{
  wchmm->maxwcn += MAXWCNSTEP;
  wchmm->state = (WCHMM_STATE *)myrealloc(wchmm->state, sizeof(WCHMM_STATE)*wchmm->maxwcn);
  wchmm->ststart = (WORD_ID *)myrealloc(wchmm->ststart, sizeof(WORD_ID)*wchmm->maxwcn);
  wchmm->stend = (WORD_ID *)myrealloc(wchmm->stend, sizeof(WORD_ID)*wchmm->maxwcn);
}

/* wchmm $B$r(B free $B$9$k(B */
void
wchmm_free(WCHMM_INFO *w)
{
  /* wchmm->state[i].ac malloced by mybmalloc2() */
  /* wchmm->offset[][] malloced by mybmalloc2() */
#ifdef PASS1_IWCD
  /* LRC_INFO, RC_INFO in wchmm->state[i].outsty malloced by mybmalloc2() */
#endif
  /* they all will be freed by a single mybfree2() call */
  mybfree2(&(w->malloc_root));
#ifndef CATEGORY_TREE
  {
    int i;
    S_CELL *sc, *sctmp;
    for(i=0;i<w->n;i++) {
      sc = w->state[i].sc;
      while(sc) {
	sctmp = sc->next;
	free(sc);
	sc = sctmp;
      }
    }
  }
#endif
#ifndef CATEGORY_TREE
  if (w->state2scid != NULL) free(w->state2scid);
#endif
#ifdef UNIGRAM_FACTORING
  if (w->start2isolate != NULL) free(w->start2isolate);
#endif
  free(w->wordend_a);
  free(w->startnode);
  free(w->wordend);
  free(w->offset);
  free(w->stend);
  free(w->ststart);
  free(w->state);
  free(w);
}


/**************************************************************/
/*********** word sort functions ******************************/
/**************************************************************/

static WORD_INFO *local_winfo;	/* temporary for sort function callbacks */

/* $BC18l$r2;AG$N$J$i$S$G%=!<%H$9$k(Bqsort$B4X?t(B */
/* qsort function to sort words by their phoneme sequence */
static int
compare_wseq(WORD_ID *widx1, WORD_ID *widx2)
{
  int len1, len2, n;
  int p=0;
  
  len1 = local_winfo->wlen[*widx1];
  len2 = local_winfo->wlen[*widx2];

  n=0;
  /*  while (n < len1 && n < len2 && (p = (int)winfo->wseq[*widx1][n] - (int)winfo->wseq[*widx2][n]) == 0 ) n++;*/
  while (n < len1 && n < len2 && (p = strcmp((local_winfo->wseq[*widx1][n])->name, (local_winfo->wseq[*widx2][n])->name)) == 0 ) n++;
  if (n < len1) {
    if (n < len2) {
      /* differ */
      return(p);
    } else {
      /* 2 is part of 1 */
      return(1);
    }
  } else {
    if (n < len2) {
      /* 1 is part of 2 */
      return(-1);
    } else {
      /* same */
      return(0);
    }
  }
}

/* $BC18l(BID$B$N=89g(B windex[bgn..bgn+len-1] $B$rC18l$N2;AG$J$i$S$G%=!<%H$9$k(B */
/* sort word IDs in windex[bgn..bgn+len-1] by their phoneme sequence order */
static void
wchmm_sort_idx_by_wseq(WORD_INFO *winfo, WORD_ID *windex, WORD_ID bgn, WORD_ID len)
{
  local_winfo = winfo;
  qsort(&(windex[bgn]), len, sizeof(WORD_ID), (int (*)(const void *, const void *))compare_wseq);
}

#ifdef CATEGORY_TREE
/* $BC18l$r%+%F%4%j(BID$B$G%=!<%H$9$k(Bqsort$B4X?t(B */
/* qsort function to sort words by their category ID */
static int
compare_category(WORD_ID *widx1, WORD_ID *widx2)
{
  int c1,c2;
  c1 = local_winfo->wton[*widx1];
  c2 = local_winfo->wton[*widx2];
  return(c1 - c2);
}

/* $BC18l(BID$B=89g(B windex[0..len-1] $B$r%+%F%4%j(BID$B$G%=!<%H$9$k(B */
/*  sort word IDs in windex[0..len-1] by their category ID */
static void
wchmm_sort_idx_by_category(WORD_INFO *winfo, WORD_ID *windex, WORD_ID len)
{
  local_winfo = winfo;
  qsort(windex, len, sizeof(WORD_ID), (int (*)(const void *, const void *))compare_category);
}
#endif /* CATEGORY_TREE */
  

/*******************************************************/
/************** link part of words  ********************/
/*******************************************************/

/* 2$BC18l(B i, j $B$rHf3S$7(B, $B@hF,$+$i8_$$$K6&M-2DG=$J2;AG?t$rJV$9(B
   $BJV$jCM(B: $B6&M-2;AG?t(B */
/* compare 2 words 'i', 'j' from start phoneme, and return the number
   of sharable phonemes. */
static int
wchmm_check_match(WORD_INFO *winfo, int i, int j)
{
  int k,tmplen;

  for (tmplen=0,k=0;k<winfo->wlen[i];k++) {
    if (k > winfo->wlen[j]-1)
      break;
    if (! (strmatch(winfo->wseq[i][k]->name, winfo->wseq[j][k]->name)))
      break;
    tmplen++;
  }
  return(tmplen);
}

/* wchmm $B>e$N%N!<%I(B node $B$K(B $B%N!<%I(B arc $B$X$NBP?t3NN((B a $B$NA+0\$rDI2C(B */
/* add a transition from 'node' to 'arc' with logprob 'a' on wchmm */
static void
add_wacc(WCHMM_INFO *wchmm, int node, LOGPROB a, int arc)
{
  A_CELL *ac;
  ac       = (A_CELL *) mybmalloc2(sizeof(A_CELL), &(wchmm->malloc_root));
  ac->a    = a;
  ac->arc  = arc;
  ac->next = wchmm->state[node].ac;
  wchmm->state[node].ac   = ac;
}  

/* $B$"$k2;AG$NKvHx$N>uBV$HJL$N2;AG$N@hF,$N>uBV4V$KA+0\$rDI2C$9$k(B */
/* add a transition from end node of a phone HMM to start node of another HMM */
static void
wchmm_link_hmm(
	       WCHMM_INFO *wchmm, /* WCHMM info */
	       int from_node,	/* end node of previous phone */
	       int to_node,	/* start node of next phone */
	       HTK_HMM_Trans *tinfo /* transition info of previous phone */
	       )
{     
  A_CELL *actmp;
  LOGPROB a, atmp;
  int i;
  boolean tflag;

  /* get transition probability to outer state in tinfo */
  for(i = tinfo->statenum - 2; i >= 0; i--) {
    if ((a = tinfo->a[i][tinfo->statenum-1]) != LOG_ZERO) { /* found */
      atmp = a;
      /* check if the arc already exist */
      tflag = FALSE;
      for (actmp = wchmm->state[from_node].ac; actmp; actmp = actmp->next) {
	if (actmp->arc == to_node && actmp->a == atmp) {
	  bogus_arc++;
	  tflag = TRUE;
	  break;
	}
      }
      if (tflag) break;
      /* add the arc to wchmm */
      add_wacc(wchmm, from_node, atmp, to_node);
      return;			/* exit function here */
    }
  }      
  j_error("Error: No arc to endstate?\n");
}

/* $BC18l(B[from_word]$B$N(B[from_seq]$BHVL\$N2;AG$+$i(B
   $BC18l(B[ to_word ]$B$N(B[ to_seq ]$BHVL\$N2;AG$X$NA+0\$rDI2C(B */
/* add arc from the 'from_seq'th phoneme of word 'from_word'
   to the 'to_seq'th phoneme of word 'to_seq' */
static void
wchmm_link_subword(WCHMM_INFO *wchmm, int from_word, int from_seq, int to_word, int to_seq)
{     
  HMM_Logical *last;
  int lastp;

  last = wchmm->winfo->wseq[from_word][from_seq];
  lastp = wchmm->offset[from_word][from_seq] + hmm_logical_state_num(last)-2 -1;
  wchmm_link_hmm(wchmm, lastp, wchmm->offset[to_word][to_seq],
		 hmm_logical_trans(last));
}

/**************************************************************/
/******** homophone processing: duplicating leaf nodes ********/
/**************************************************************/
/* $BF12;8l=hM}(B:
   $BLZ9=B$2=<-=q$K$*$$$F$9$Y$F$NC18l$OFHN)$7$?:G=*>uBV$r;}$DI,MW$,$"$k$?$a!$(B
   $BF12;8l$OCm0U?<$/07$&I,MW$,$"$k!%$3$N$?$a!$:G=i$NLZ9=B$2=<-=q$r9=C[$7$?8e(B, 
   $BJL$NC18l$H40A4$K6&M-$5$l$?C18l(B($BF12;8l(B), $B$"$k$$$OJL$NC18l$N0lIt$H$7$F(B
   $BKd$a9~$^$l$F$7$^$C$F$$$kC18l$rH/8+$9$k$H$H$b$K(B, $B$=$N:G=*%N!<%I$r(B
   $B%3%T!<$7$F?7$?$JC18l=*C<%N!<%I$r:n$kI,MW$,$"$k!%(B*/
/* homophones:
   As all words need to have an uniq state as a final state in a lexicon tree,
   homophones should be handled carefully.  After primal tree has been made,
   we look through the tree to find the fully shared or embedded words
   (homophone or part of other word), and duplicate the last leaf node 
   to have uniq end state. */

/* ($B4{$K$"$kC18l$N=*C<$G$"$k(B)$B%N!<%I(B node $B$r(B,
   word $B$KBP$9$k:G=*>uBV$H$7$F%3%T!<!&FHN)2=$5$;$k(B */
/* copy node 'node' (which is already an end node
   of a word) as a new, independent node.  And assign the new state as
   a final node of another word 'word' */
static void
wchmm_duplicate_state(WCHMM_INFO *wchmm, int node, int word) /* source node, new word */
{
  int n;
  int n_src, n_prev;
  A_CELL	*ac;
  HMM_Logical *lastphone;

  /* 1 state will newly created: expand tree if needed */
  if (wchmm->n + 1 >= wchmm->maxwcn) {
    wchmm_extend(wchmm);
  }
  /* n: the target new node to which 'node' is copied */
  n = wchmm->n;

  n_src = node;

  /* copy output probability info */
#ifdef PASS1_IWCD
  {
    RC_INFO *rcnew;
    LRC_INFO *lrcnew;
    wchmm->state[n].outstyle = wchmm->state[n_src].outstyle;
    if (wchmm->state[n].outstyle == AS_RSET) {
      /* duplicate RC_INFO because it has its own cache */
      rcnew = (RC_INFO *)mybmalloc2(sizeof(RC_INFO), &(wchmm->malloc_root));
      memcpy(rcnew, wchmm->state[n_src].out.rset, sizeof(RC_INFO));
      wchmm->state[n].out.rset = rcnew;
    } else if (wchmm->state[n].outstyle == AS_LRSET) {
      /* duplicate LRC_INFO because it has its own cache */
      lrcnew = (LRC_INFO *)mybmalloc2(sizeof(LRC_INFO), &(wchmm->malloc_root));
      memcpy(lrcnew, wchmm->state[n_src].out.lrset, sizeof(LRC_INFO));
      wchmm->state[n].out.lrset = lrcnew;
    } else {
      /* share same info, simply copy the pointer */
      memcpy(&(wchmm->state[n].out), &(wchmm->state[n_src].out), sizeof(ACOUSTIC_SPEC));
    }
  }
#else  /* ~PASS1_IWCD */
  memcpy(&(wchmm->state[n].out), &(wchmm->state[n_src].out), sizeof(HTK_HMM_State *));
#endif

  lastphone = wchmm->winfo->wseq[word][wchmm->winfo->wlen[word]-1];
  wchmm->state[n].ac = NULL;

  /* add self transition arc */
  for(ac=wchmm->state[n_src].ac; ac; ac=ac->next) {
    if (ac->arc == n_src) {
      add_wacc(wchmm, n, ac->a, n);
    }
  }
  
  /* copy transition arcs whose destination is the source node to new node */
  if (hmm_logical_state_num(lastphone) == 3) { /* = 1 state */
    /* phone with only 1 state should be treated carefully */
    if (wchmm->winfo->wlen[word] == 1) { /* word consists of only this phone */
      /* no arcs need to be copied: this is also a start node of a word */
      wchmm->ststart[n] = word;
      wchmm->offset[word][0] = n;
    } else {
      /* copy arcs from the last state of the previous phone */
      n_prev = wchmm->offset[word][wchmm->winfo->wlen[word]-2]
	+ hmm_logical_state_num(wchmm->winfo->wseq[word][wchmm->winfo->wlen[word]-2]) - 3;
      for (ac=wchmm->state[n_prev].ac; ac; ac=ac->next) {
	if (ac->arc == n_src) {
	  add_wacc(wchmm, n_prev, ac->a, n);
	}
      }
      /* also update the last offset (== wordend in this case) */
      wchmm->offset[word][wchmm->winfo->wlen[word]-1] = n;
      /* the new node should not be a start node of a word */
      wchmm->ststart[n] = WORD_INVALID;
    }
  } else {			/* phone with more than 2 states */
    /* copy arcs from/to the source node to new node */
    for (n_prev = wchmm->offset[word][wchmm->winfo->wlen[word]-1]; n_prev < n_src; n_prev++) {
      for (ac=wchmm->state[n_prev].ac; ac; ac=ac->next) {
	if (ac->arc == n_src) {
	  add_wacc(wchmm, n_prev, ac->a, n);
	}
      }
      for (ac=wchmm->state[n_src].ac; ac; ac=ac->next) {
	if (ac->arc == n_prev) {
	  add_wacc(wchmm, n, ac->a, n_prev);
	}
      }
    }
    /* the new node should not be a start node of a word */
    wchmm->ststart[n] = WORD_INVALID;
  }

  /* map word <-> node */
  wchmm->stend[n]   = word;	/* 'n' is an end node of word 'word' */
  wchmm->wordend[word] = n;	/* the word end node of 'word' is 'n' */

  /* new state has been created: increment the size */
  wchmm->n++;
  
}

/* $B40A4$K6&M-$5$l$?$jKd$a9~$^$l$F$$$kC18l$r8+$D$1(B wchmm_dupliate_state() $B$r8F$V(B*/
/* find fully shared or embedded word and call wchmm_duplicate_state() */
static void
wchmm_duplicate_leafnode(WCHMM_INFO *wchmm)
{
  int w, nlast, n, narc, narc_model;
  boolean *dupw;		/* node marker */
  A_CELL *actmp;

  nlast = wchmm->n;
  dupw = (boolean *)mymalloc(sizeof(boolean) * nlast);
  for(n=0;n<nlast;n++) dupw[n] = FALSE;	/* initialize all marker */

  for (w=0;w<wchmm->winfo->num;w++) {
    n = wchmm->wordend[w];
    if (dupw[n]) {		/* if already marked (2nd time or later */
      wchmm_duplicate_state(wchmm, n, w); dupcount++; /* duplicate */
    } else {			/* if not marked yet (1st time) */
      /* try to find an arc outside the word */
      {
	/* count number of model-internal arc from the last state */
	HMM_Logical *lastphone;
	HTK_HMM_Trans *tinfo;
	int laststate, i;
	lastphone = wchmm->winfo->wseq[w][wchmm->winfo->wlen[w]-1];
	laststate = hmm_logical_state_num(lastphone) - 2;
	tinfo = hmm_logical_trans(lastphone);
	narc_model=0;
	for(i=1;i<hmm_logical_state_num(lastphone)-1;i++) {
	  if (tinfo->a[laststate][i] != LOG_ZERO) narc_model++;
	}
	/* count number of actual arc from the last state in the tree */
	narc = 0;
	for(actmp=wchmm->state[n].ac;actmp;actmp=actmp->next) narc++;
      }
      /* if both number does not match, it means it is not a single word tail */
      if (narc_model != narc) {
	/* word 'w' is embedded as part of other words at this node 'n' */
	/* duplicate this node now */
	wchmm_duplicate_state(wchmm, n, w); dupcount++;
	/* as new node has been assigned as word end node of word 'w',
	   reset this source node as it is not the word end node */
	wchmm->stend[n] = WORD_INVALID;
      } else {
	/* no arc to other node found, it means it is a single word tail */
	/* as this is first time, only make sure that this node is word end of [w] */
	wchmm->stend[n] = w;
      }
      /* mark node 'n' */
      dupw[n] = TRUE;
    }
  }
  free(dupw);
}

/**************************************************************/
/*************** add a word to wchmm lexicon tree *************/
/**************************************************************/
/* $BC18l(B word ($BC18l(Bmatchword$B$H(Bmatchlen$B?t$N2;AG?t%^%C%A$9$k(B)$B$r(B wchmm $B$K(B
   $BIU$12C$($k(B */
/* add a word 'word' that matches with 'matchword' for 'matchlen' length
   to wchmm */
static void
wchmm_add_word(
     WCHMM_INFO *wchmm,		/* wchmm info */
     int word,			/* word ID to add */
     int matchlen,		/* num of longest matched phone from start (0=add all) */
     int matchword)		/* longest matched word ID already added in wchmmtree */
{
  int   j,k,n;
  int   add_head, add_tail, add_to;
  int   word_len, matchword_len;
  
/* 
 *   if (matchlen > 0) {
 *     printf("--\n");
 *     put_voca(wchmm->winfo, word);
 *     put_voca(wchmm->winfo, matchword);
 *     printf("matchlen=%d\n", matchlen);
 *   }
 */
  
  /* variable abbreviations */
  n = wchmm->n;
  word_len      = wchmm->winfo->wlen[word];
  matchword_len = wchmm->winfo->wlen[matchword];

  /* malloc phone offset area */
  if((wchmm->offset[word]=(int *)mybmalloc2(sizeof(int)*word_len, &(wchmm->malloc_root)))==NULL){ 
    j_error("malloc failed at wchmm_add_word()\n");
  }
  
  /* allocate unshared (new) part */
  add_head = matchlen;
  add_tail = word_len - 1;
  add_to   = matchlen - 1;
  if (add_tail - add_head + 1 > 0) { /* there are new phones to be created */
    {
      HMM_Logical *ltmp;
      int ato;
      LOGPROB prob;
      int ntmp = n, ltmp_state_num;
#ifdef PASS1_IWCD
      CD_Set *lcd = NULL;
#endif
      
      ntmp = n;
      for (j=add_head; j <= add_tail; j++) { /* for each new phones */
	ltmp = wchmm->winfo->wseq[word][j];
	ltmp_state_num = hmm_logical_state_num(ltmp);
#ifdef PASS1_IWCD
	if (ccd_flag) {
	  /* in the triphone lexicon tree, the last phone of a word has
	     left-context cdset */
	  if (wchmm->winfo->wlen[word] > 1 && j == wchmm->winfo->wlen[word] - 1) {
#ifdef CATEGORY_TREE
	    if (! old_iwcd_flag) {
	      lcd = lcdset_lookup_with_category(hmminfo, ltmp, wchmm->winfo->wton[word]);
	      if (lcd == NULL) {
		/* no category-aware cdset found.  This is case when no word
		   can follow this word grammatically.
		   so fallback to normal state */
		j_printerr("Warning: no lcdset found for [%s::%d], fallback to [%s]\n", ltmp->name, wchmm->winfo->wton[word], ltmp->name);
		lcd = lcdset_lookup_by_hmmname(hmminfo, ltmp->name);
	      }
	    } else {
	      lcd = lcdset_lookup_by_hmmname(hmminfo, ltmp->name);
	    }
#else
	    lcd = lcdset_lookup_by_hmmname(hmminfo, ltmp->name);
#endif
	    if (lcd == NULL) {
	      j_error("Error: no lcdset found for [%s]\n",ltmp->name);
	    }
	  }
	}
#endif /* PASS1_IWCD */
	for (k = 1; k < ltmp_state_num - 1; k++) { /* for each state in the phone */
	  /* set state output prob info */
#ifdef PASS1_IWCD
	  if (ccd_flag) {
	    /* output info of triphones needs special handling */
	    if (wchmm->winfo->wlen[word] == 1) { /* word with only 1 phone */
	      wchmm->state[ntmp].outstyle = AS_LRSET;
	      wchmm->state[ntmp].out.lrset = (LRC_INFO *)mybmalloc2(sizeof(LRC_INFO), &(wchmm->malloc_root));
	      (wchmm->state[ntmp].out.lrset)->hmm       = ltmp;
	      (wchmm->state[ntmp].out.lrset)->state_loc = k;
#ifdef CATEGORY_TREE
	      (wchmm->state[ntmp].out.lrset)->category  = wchmm->winfo->wton[word];
#endif
	    } else if (j == 0) {	/* head phone of a word */
	      wchmm->state[ntmp].outstyle = AS_RSET;
	      wchmm->state[ntmp].out.rset = (RC_INFO *)mybmalloc2(sizeof(RC_INFO), &(wchmm->malloc_root));
	      (wchmm->state[ntmp].out.rset)->hmm       = ltmp;
	      (wchmm->state[ntmp].out.rset)->state_loc = k;
	    } else if (j == wchmm->winfo->wlen[word] - 1) { /* last phone of a word */
	      wchmm->state[ntmp].outstyle = AS_LSET;
	      wchmm->state[ntmp].out.lset = &(lcd->stateset[k]);
	    } else {
	      wchmm->state[ntmp].outstyle = AS_STATE;
	      if (ltmp->is_pseudo) {
		j_printerr("Warning: word-internal phone should not be pseudo\n");
		put_voca(wchmm->winfo, word);
	      }
	      wchmm->state[ntmp].out.state = ltmp->body.defined->s[k];
	    }
	  } else {
	    /* monophone */
	    if (ltmp->is_pseudo) {
	      j_printerr("InternalError: CDSET phoneme exist in monophone?\n");
	      put_voca(wchmm->winfo, word);
	    }
	    wchmm->state[ntmp].outstyle = AS_STATE;
	    wchmm->state[ntmp].out.state = ltmp->body.defined->s[k];
	  }
#else  /* ~PASS1_IWCD */
	  if (ltmp->is_pseudo) {
	    j_printerr("InternalError: CDSET phone exist in monophone?\n");
	    put_voca(wchmm->winfo, word);
	  }
	  wchmm->state[ntmp].out = ltmp->body.defined->s[k];
#endif /* PASS1_IWCD */
	  
	  /* make transition arc from HMM transition info */
	  wchmm->state[ntmp].ac = NULL;
	  for (ato = 1; ato < ltmp_state_num; ato++) {
	    prob = (hmm_logical_trans(ltmp))->a[k][ato];
	    if
	      (prob != LOG_ZERO)
	      {
	      if (j == add_tail && k == ltmp_state_num - 2 && ato == ltmp_state_num - 1) {
		/* arc outside new part will be handled later */
	      } else {
		add_wacc(wchmm, ntmp, prob, ntmp + ato - k);
	      }
	    }
	  }
	  
	  /* initialize other info */
	  wchmm->ststart[ntmp] = WORD_INVALID;
	  wchmm->stend[ntmp] = WORD_INVALID;
	  
	  ntmp++;
	  /* expand wchmm if neccesary */
	  if (ntmp >= wchmm->maxwcn) wchmm_extend(wchmm);
        } /* end of state loop */
      }	/* end of phone loop */
    }
  }
  
  /* make mapping: word <-> node on wchmm */
  for (j=0;j<word_len;j++) {
    if (j < add_head) {	/* shared part */
      wchmm->offset[word][j] = wchmm->offset[matchword][j];
    } else if (add_tail < j) { /* shared tail part (should not happen..) */
      wchmm->offset[word][j] = wchmm->offset[matchword][j+(matchword_len-word_len)];
    } else {			/* newly created part */
      wchmm->offset[word][j] = n;
      n += hmm_logical_state_num(wchmm->winfo->wseq[word][j]) - 2;
    }
  }
  wchmm->n = n;
  wchmm->ststart[wchmm->offset[word][0]] = word; /* word head */
  k = wchmm->offset[word][word_len-1] + hmm_logical_state_num(wchmm->winfo->wseq[word][word_len-1])-2 -1;
  wchmm->wordend[word] = k;	/* tail node of 'word' is 'k' */
  wchmm->stend[k] = word;	/* node 'k' is a tail node of 'word' */
  
  if (matchlen != 0 && add_tail - add_head + 1 > 0) {
    /* new part has been created in the above procedure: */
    /* now make link from shared part to the new part */
    wchmm_link_subword(wchmm, matchword,add_to,word,add_head);	
  }
}

/**************************************************************/
/**** calculate overall info (after wchmm has been built) *****/
/**************************************************************/
/* $BC18l$N@hF,>uBV$N%$%s%G%C%/%9$r:n@.(B ($BC18l4VA+0\7W;;MQ(B) */
/* make index of word-beginning nodes (for inter-word transition) */
static void
wchmm_index_ststart(WCHMM_INFO *wchmm)
{
  int n;
  int id;

  id = 0;
  for (n=0;n<wchmm->n;n++) {
    if (wchmm->ststart[n] != WORD_INVALID) {
#ifdef USE_NGRAM
      /* $B@hF,C18l$N;OC<$X$OA+0\$5$;$J$$$N$G!$%$%s%G%C%/%9$K4^$a$J$$(B */
      /* exclude silence model on beginning of a sentence from the index:
	 It cannot come after other words */
      if (wchmm->ststart[n] == wchmm->winfo->head_silwid) continue;
#endif
      wchmm->startnode[id] = n;
      id++;
      if (id > wchmm->winfo->num) {
	j_printerr("Error: start node num exceeded %d\n", wchmm->winfo->num);
      }
    }
  }
  wchmm->startnum = id;		/* total num */
}

/* $BC18l$N=*C<>uBV$+$i$N<!A+0\3NN($r(B wchmm->wordend_a[wordID] $B$K5a$a$F$*$/(B */
/* calculate transition probability of word end node to outside to
   wchmm->wordend_a[wordID] */
static void
wchmm_calc_wordend_arc(WCHMM_INFO *wchmm)
{
  WORD_ID w;
  HTK_HMM_Trans *tr;
  LOGPROB a;

  for (w=0;w<wchmm->winfo->num;w++) {
    tr = hmm_logical_trans(wchmm->winfo->wseq[w][wchmm->winfo->wlen[w]-1]);
    a = tr->a[tr->statenum-2][tr->statenum-1];
    wchmm->wordend_a[w] = a;
  }
}

/***************************************************************/
/****** for separation of high-frequent words from tree  *******/
/***************************************************************/
#ifdef USE_NGRAM
#ifdef SEPARATE_BY_UNIGRAM

/* unigram$B3NN($G%=!<%H$9$k$?$a$N(B qsort $B4X?t(B */
/* qsort function to sort unigram values */
static int
compare_prob(LOGPROB *a, LOGPROB *b)
{
  if (*a < *b)  return (1);
  if (*a > *b)  return (-1);
  return(0);
}

/* 1-gram$B%9%3%"$N>e0L(B N $BHVL\$NCM$r5a$a$k(B */
/* get the Nth-best unigram probability from all words */
static LOGPROB
get_nbest_uniprob(WORD_INFO *winfo, int n)
{
  LOGPROB *u_p;
  WORD_ID w;
  LOGPROB x;

  if (n < 1) n = 1;
  if (n > winfo->num) n = winfo->num;

  /* store all unigram probability to u_p[] */
  u_p = (LOGPROB *)mymalloc(sizeof(LOGPROB) * winfo->num);
  for(w=0;w<winfo->num;w++) u_p[w] =
#ifdef CLASS_NGRAM
			     class_uni_prob(ngram, winfo->wton[w]);
#else
                             uni_prob(ngram, winfo->wton[w]);
#endif
  /* sort them downward */
  qsort(u_p, winfo->num, sizeof(LOGPROB),
	(int (*)(const void *,const void *))compare_prob);

  /* return the Nth value */
  x = u_p[n-1];
  free(u_p);
  return(x);
}

#endif
#endif /* USE_NGRAM */


/**********************************************************/
/****** MAKE WCHMM (LEXICON TREE) --- main function *******/
/**********************************************************/
static int separated_word_count; /* words num actually separated */

#ifdef CATEGORY_TREE

/*
  $BCm0U(B: build_wchmm() $B$O2a5n$N%P!<%8%g%s$H$N%A%'%C%/MQ$K;D$7$F$"$k$N$_!%(B
  $BDL>o$O$h$jAa$$(B build_wchmm2() $B$r;H$$$^$9!%(B
  (build_wchmm() $B$O(B Julian $B%b!<%I$G(B "-oldtree" $B%*%W%7%g%s$G;HMQ$G$-$^$9(B)
/*
  NOTE: build_wchmm() is old and only left for historical reason.
  please use build_wchmm2().  it's faster.
  (build_wchmm() is used when Julian mode and "-oldtree" specified)
*/

#define COUNT_STEP 500         /* count log period */

/*
  $BLZ9=B$2=<-=q:n@.(B($B8E$$(B)
  $B:n$jJ}(B: $B4{$KEPO?:Q$NC18l$NCf$+$i2;AGNs$,:GD9%^%C%A$9$kC18l$r$_$D$1$F(B
  $B$=$3$KIU$12C$($k!%(B
*/
/* old construction function
   tree-organization algorithm:
      search already added words to search for the best resemble word
*/
void
build_wchmm(WCHMM_INFO *wchmm)
{
  int i,j;
  int matchword=0, sharelen=0, maxsharelen=0;
  int counter = COUNT_STEP;
#ifdef SEPARATE_BY_UNIGRAM
  LOGPROB separate_thres;
#endif

  /* lingustic infos must be set before build_wchmm() is called */
  /* check if necessary lingustic info is already assigned (for debug) */
  if (wchmm->winfo == NULL
#ifdef USE_NGRAM
      || wchmm->ngram == NULL
#endif
#ifdef USE_DFA
      || wchmm->dfa == NULL
#endif
      ) {
    j_exit("InternalError: build_wchmm: lingustic info not available!!\n");
  }
  
#ifdef SEPARATE_BY_UNIGRAM
  /* $B>e0L(B[separate_wnum]$BHVL\$N(B1-gram$B%9%3%"$r5a$a$k(B */
  /* 1-gram$B%9%3%"$,$3$NCM0J>e$N$b$N$OLZ$+$iJ,$1$k(B */
  separate_thres = get_nbest_uniprob(wchmm->winfo, separate_wnum);
#endif

#ifdef PASS1_IWCD
#ifdef CATEGORY_TREE
  if (ccd_flag && !old_iwcd_flag) {
    /* $BA4$F$N%+%F%4%j(BID$BIU$-(B lcd_set $B$r:n@.(B */
    lcdset_register_with_category_all(hmminfo, winfo, dfa);
  }
#endif /* CATEGORY_TREE */
#endif /* PASS1_IWCD */
  

  /* wchmm$B$r=i4|2=(B */
  wchmm_init(wchmm);

  /* $BF12;8l%+%&%s%?%j%;%C%H(B */
  homophone_word_num = 0;
  separated_word_count=0;

  j_printerr("Building HMM lexicon tree (left-to-right)...\n");
  for (i=0;i<wchmm->winfo->num;i++) {
    if (verbose_flag) {
      if (i >= counter) {
       j_printerr("\r %5d words proceeded (%6d nodes)",i,wchmm->n);
       counter += COUNT_STEP;
      }
    }
#ifdef USE_NGRAM
    if (i == wchmm->winfo->head_silwid || i == wchmm->winfo->tail_silwid) {
      /* $B@hF,(B/$BKvHx$NL52;%b%G%k$OLZ9=B$2=$;$:!$(B
       * $B@hF,$NL52;C18l$N@hF,$X$NA+0\!$KvHxC18l$NKvHx$+$i$NA+0\$O:n$i$J$$(B*/
      wchmm_add_word(wchmm, i,0,0); /* sharelen=0$B$G$=$N$^$^(B */
      continue;
    }
#ifndef NO_SEPARATE_SHORT_WORD
    if (wchmm->winfo->wlen[i] <= SHORT_WORD_LEN) {
      /* $BD9$5$NC;$$C18l$rLZ9=B$2=$7$J$$(B($B$3$3$G$O(B1$B2;@a(B) */
      wchmm_add_word(wchmm, i,0,0); /* sharelen=0$B$G$=$N$^$^(B */
      separated_word_count++;
      continue;
    }
#endif
#ifdef SEPARATE_BY_UNIGRAM
    if (
#ifdef CLASS_NGRAM
       class_uni_prob(wchmm->ngram, wchmm->winfo->wton[i])
#else
       uni_prob(wchmm->ngram, wchmm->winfo->wton[i])
#endif
       >= separate_thres && separated_word_count < separate_wnum) {
      /* $BIQEY$N9b$$C18l$rLZ9=B$2=$7$J$$(B */
      /* separate_thres $B$O>e0L(Bseparate_wnum$BHVL\$N%9%3%"(B */
      wchmm_add_word(wchmm, i,0,0);
      separated_word_count++;
      continue;
    }
#endif
#endif /* USE_NGRAM */
    /* $B:G$bD9$/2;AG$r6&M-=PMh$kC18l$rC5$9(B */
    maxsharelen=0;
    for (j=0;j<i;j++) {
#ifdef CATEGORY_TREE
      if (wchmm->winfo->wton[i] != wchmm->winfo->wton[j]) continue;
#endif
      sharelen = wchmm_check_match(wchmm->winfo, i, j);
      if (sharelen == wchmm->winfo->wlen[i] && sharelen == wchmm->winfo->wlen[j]) {
       /* word $B$KF12;8l$,B8:_$9$k(B */
       homophone_word_num++;
       /* $BI,$::GBg$ND9$5$G$"$j!$=EJ#%+%&%s%H$rHr$1$k$?$a$3$3$GH4$1$k(B */
       maxsharelen = sharelen;
       matchword = j;
       break;
      }
      if (sharelen > maxsharelen) {
       matchword = j;
       maxsharelen = sharelen;
      }
    }
    wchmm_add_word(wchmm, i,maxsharelen,matchword);
  }
  j_printerr("\r %5d words ended     (%6d nodes)\n",i,wchmm->n);

#if 0
  /* $BLZ9=B$$r:n$i$J$$(B */
  for (i=0;i<wchmm->winfo->num;i++) {
    if (verbose_flag) {
      if (i >= counter) {
       j_printerr("  %5d words proceeded (%6d nodes)\n",i,wchmm->n);
       counter += COUNT_STEP;
      }
    }
    wchmm_add_word(wchmm, i,0,0); /* sharelen=0$B$G$=$N$^$^(B */
  }
  j_printerr("  %5d words ended     (%6d nodes)\n",i,wchmm->n);
#endif  
  
  /* $BF10l2;AG7ONs$r;}$DC18lF1;N$N(B leaf node $B$r(B2$B=E2=$7$F6hJL$9$k(B */
  wchmm_duplicate_leafnode(wchmm);
  VERMES("  %d leaf nodes are made unshared\n",dupcount);
  
  /* $BC18l$N=*C<$+$i30$X$NA+0\3NN($r5a$a$F$*$/(B */
  wchmm_calc_wordend_arc(wchmm);

  /* wchmm$B$N@09g@-$r%A%'%C%/$9$k(B */
  check_wchmm(wchmm);

  /* $BC18l$N@hF,%N!<%I(B ststart $B$NHV9f$r(B $BJL$K3JG<(B */
  wchmm_index_ststart(wchmm);

  /* factoring$BMQ$K3F>uBV$K8eB3C18l$N%j%9%H$rIU2C$9$k(B */
#ifndef CATEGORY_TREE
  make_successor_list(wchmm);
#ifdef USE_NGRAM
#ifdef UNIGRAM_FACTORING
  /* $BA0$b$C$F(Bfactoring$BCM$r7W;;(B */
  /* $BKvC<0J30$N(Bsc$B$OI,MW$J$$$N$G%U%j!<$9$k(B */
  calc_all_unigram_factoring_values(wchmm);
  /* $BC18l4V(BLM$B%-%c%C%7%e$,I,MW$J%N!<%I$N%j%9%H$r:n$k(B */
  make_iwcache_index(wchmm);
  j_printerr("  1-gram factoring values has been pre-computed\n");
#endif /* UNIGRAM_FACTORING */
#endif /* USE_NGRAM */
  /* sc $B$N%j%9%H:n@.(B */
  make_sc_index(wchmm);
#endif

  j_printerr("done\n");

  /* $B5/F0;~(B -check $B$G%A%'%C%/%b!<%I$X(B */
  if (wchmm_check_flag) {
    wchmm_check_interactive(wchmm);
  }
}

#endif /* CATEGORY_TREE */

/*
  $BLZ9=B$2=<-=q:n@.(B($B?7(B)
  $B:n$jJ}(B: (1)$B$^$:2;AGNs$G%=!<%H$9$k!%(B(2)$B%=!<%H8e$O(B,$B3FC18l$K$D$$$F(B
  $B$=$N!VD>A0$NC18l!W$,>o$K:G$bD9$/%^%C%A$9$kC18l$H$J$k!%(B
*/
/* new lexicon tree construction function */
/* tree-organization algorithm:
   1. sort all words by their phone sequence.
   2. Then,, for each word, the previous one is the best matched word.
*/
  
void
build_wchmm2(WCHMM_INFO *wchmm)
{
  int i,j, last_i;
  int count_step, counter;
  WORD_ID *windex;
#ifdef USE_NGRAM
#ifdef SEPARATE_BY_UNIGRAM
  LOGPROB separate_thres;
#endif
#endif

  /* lingustic infos must be set before build_wchmm() is called */
  /* check if necessary lingustic info is already assigned (for debug) */
  if (wchmm->winfo == NULL
#ifdef USE_NGRAM
      || wchmm->ngram == NULL
#endif
#ifdef USE_DFA
      || wchmm->dfa == NULL
#endif
      ) {
    j_exit("InternalError: build_wchmm: lingustic info not available!!\n");
  }
  
  separated_word_count = 0;
  count_step = wchmm->winfo->num / 10;
  counter = count_step;
  
  j_printerr("Building HMM lexicon tree");
  
#ifdef USE_NGRAM
#ifdef SEPARATE_BY_UNIGRAM
  /* compute score threshold beforehand to separate words from tree */
  /* here we will separate best [separate_wnum] words from tree */
  separate_thres = get_nbest_uniprob(wchmm->winfo, separate_wnum);
#endif
#endif

#ifdef PASS1_IWCD
#ifdef CATEGORY_TREE
  if (ccd_flag && !old_iwcd_flag) {
    /* when Julian mode (category-tree) and triphone is used,
       make all category-indexed context-dependent phone set (cdset) here */
    /* these will be assigned on the last phone of each word on tree */
    lcdset_register_with_category_all(hmminfo, winfo, dfa);
  }
#endif /* CATEGORY_TREE */
#endif /* PASS1_IWCD */
  
  /* initialize wchmm */
  wchmm_init(wchmm);

  /* make sorted word index ordered by phone sequence */
  windex = (WORD_ID *)mymalloc(sizeof(WORD_ID) * wchmm->winfo->num);
  for(i=0;i<wchmm->winfo->num;i++) windex[i] = i;
#ifdef CATEGORY_TREE
  /* sort by category -> sort by word ID in each category */
  wchmm_sort_idx_by_category(wchmm->winfo, windex, wchmm->winfo->num);
  {
    int last_cate;
    last_i = 0;
    last_cate = wchmm->winfo->wton[windex[0]];
    for(i = 1;i<wchmm->winfo->num;i++) {
      if (wchmm->winfo->wton[windex[i]] != last_cate) {
	wchmm_sort_idx_by_wseq(wchmm->winfo, windex, last_i, i - last_i);
	last_cate = wchmm->winfo->wton[windex[i]];
	last_i = i;
      }
    }
    wchmm_sort_idx_by_wseq(wchmm->winfo, windex, last_i, wchmm->winfo->num - last_i);
  }
#else
  /* sort by word ID for whole vocabulary */
  wchmm_sort_idx_by_wseq(wchmm->winfo, windex, 0, wchmm->winfo->num);
#endif

/* 
 *   {
 *     int i,w;
 *     for(i=0;i<wchmm->winfo->num;i++) {
 *	 w = windex[i];
 *	 printf("%d: cate=%4d wid=%4d %s\n",i, wchmm->winfo->wton[w], w, wchmm->winfo->woutput[w]);
 *     }
 *   }
 */

  /* incrementaly add words to lexicon tree */
  /* now for each word, the previous word (last_i) is always the most matched one */
  last_i = WORD_INVALID;
  for (j=0;j<wchmm->winfo->num;j++) {
    i = windex[j];
    if (j >= counter) {
      /*j_printerr("\r %5d words proceeded (%6d nodes)",j, wchmm->n);*/
      j_printerr(".");
      counter += count_step;
    }
#ifdef USE_NGRAM
    /* start/end silence word should not be shared */
    if (i == wchmm->winfo->head_silwid || i == wchmm->winfo->tail_silwid) {
      wchmm_add_word(wchmm, i,0,0); /* add whole word as new (sharelen=0) */
      continue;
    }
#ifndef NO_SEPARATE_SHORT_WORD
    /* separate short words from tree */
    if (wchmm->winfo->wlen[i] <= SHORT_WORD_LEN) {
      wchmm_add_word(wchmm, i,0,0);
      separated_word_count++;
      continue;
    }
#endif
#ifdef SEPARATE_BY_UNIGRAM
    /* separate high-frequent words from tree (threshold = separate_thres) */
    if (
#ifdef CLASS_NGRAM
	class_uni_prob(wchmm->ngram, wchmm->winfo->wton[i])
#else
	uni_prob(wchmm->ngram, wchmm->winfo->wton[i])
#endif
	>= separate_thres && separated_word_count < separate_wnum) {
      wchmm_add_word(wchmm, i,0,0);
      separated_word_count++;
      continue;
    }
#endif
#endif /* USE_NGRAM */
    if (last_i == WORD_INVALID) { /* first word */
      wchmm_add_word(wchmm, i,0,0);
    } else {
      /* the previous word (last_i) is always the most matched one */
#ifdef CATEGORY_TREE
      if (wchmm->winfo->wton[i] != wchmm->winfo->wton[last_i]) {
	wchmm_add_word(wchmm, i,0,0);
      } else {
	wchmm_add_word(wchmm, i, wchmm_check_match(wchmm->winfo, i, last_i), last_i);
      }
#else
      wchmm_add_word(wchmm, i, wchmm_check_match(wchmm->winfo, i, last_i), last_i);
#endif
    }
    last_i = i;
    
  } /* end of add word loop */
  
  /*j_printerr("\r %5d words ended     (%6d nodes)\n",j,wchmm->n);*/

  /* free work area */
  free(windex);
  
  /* duplicate leaf nodes of homophone/embedded words */
  j_printerr("%d", wchmm->n);
  wchmm_duplicate_leafnode(wchmm);
  j_printerr("+%d=%d nodes\n",dupcount, wchmm->n);
  
  /* calculate transition probability of word end node to outside */
  wchmm_calc_wordend_arc(wchmm);

  /* check wchmm coherence (internal debug) */
  check_wchmm(wchmm);

  /* make index of word-beginning nodes (for inter-word transition) */
  wchmm_index_ststart(wchmm);

  /* make successor list for all branch nodes for N-gram factoring */
#ifndef CATEGORY_TREE
  make_successor_list(wchmm);
#ifdef UNIGRAM_FACTORING
  /* for 1-gram factoring, we can compute the values before search */
  calc_all_unigram_factoring_values(wchmm);
  /* make list of start nodes that needs inter-word LM cache */
  make_iwcache_index(wchmm);
  j_printerr("  1-gram factoring values has been pre-computed\n");
#endif /* UNIGRAM_FACTORING */
  /* make index to factoring node for factoring cache */
  make_sc_index(wchmm);
#endif

  j_printerr("done\n");

  /* go into interactive check mode ("-check" on start) */
  if (wchmm_check_flag) {
    wchmm_check_interactive(wchmm);
  }

#ifdef WCHMM_SIZE_CHECK
  /* detailed check of lexicon tree size (inaccurate!) */
  printf("wchmm: %d words, %d nodes\n", wchmm->winfo->num, wchmm->n);
  printf("%9d bytes: wchmm->state[node] (exclude ac, sc)\n", sizeof(WCHMM_STATE) * wchmm->n);
  printf("%9d bytes: wchmm->ststart[node]\n", sizeof(WORD_ID) * wchmm->n);
  printf("%9d bytes: wchmm->stend[node]\n", sizeof(WORD_ID) * wchmm->n);
  {
    int w,count;
    count = 0;
    for(w=0;w<wchmm->winfo->num;w++) {
      count += wchmm->winfo->wlen[w] * sizeof(int) + sizeof(int *);
    }
    printf("%9d bytes: wchmm->offset[w][]\n", count);
  }
  printf("%9d bytes: wchmm->wordend[w]\n", wchmm->winfo->num * sizeof(int));
  printf("%9d bytes: wchmm->startnode[]\n", wchmm->startnum * sizeof(int));
#ifdef UNIGRAM_FACTORING
  printf("%9d bytes: wchmm->start2isolate[]\n", wchmm->isolatenum * sizeof(int));
#endif
  printf("%9d bytes: wchmm->state2scid[]\n", wchmm->n * sizeof(int));
  printf("%9d bytes: wchmm->wordend_a[]\n", wchmm->winfo->num * sizeof(LOGPROB));
  printf("under state[]:\n");
  {
    A_CELL *ac;
    int count,n;
    count = 0;
    for(n=0;n<wchmm->n;n++) {
      for(ac=wchmm->state[n].ac;ac;ac=ac->next) {
	count += sizeof(A_CELL);
      }
    }
    printf("\t%9d: ac\n", count);
  }
#ifndef CATEGORY_TREE
  {
    S_CELL *sc;
    int count,n;
    count = 0;
    for(n=0;n<wchmm->n;n++) {
      for(sc=wchmm->state[n].sc;sc;sc=sc->next) {
	count += sizeof(S_CELL);
      }
    }
    printf("\t%9d: sc\n", count);
  }
#endif
#endif /* WCHMM_SIZE_CHECK */
}


/* wchmm$B$N>pJs$rI8=`=PNO$K=PNO$9$k(B */
/* output wchmm info to stdout */
void
print_wchmm_info(WCHMM_INFO *wchmm)
{
  int n,i, rootnum;

#ifdef USE_NGRAM
  rootnum = wchmm->startnum + 1;	/* including winfo->head_silwid */
#else
  rootnum = wchmm->startnum;
#endif /* USE_NGRAM */
  
  j_printf("Lexicon tree info:\n");
  j_printf("\t total node num = %6d\n", wchmm->n);
#ifdef USE_NGRAM
  j_printf("\t  root node num = %6d\n", rootnum);
#ifdef NO_SEPARATE_SHORT_WORD
#ifdef SEPARATE_BY_UNIGRAM
  j_printf(" (%d hi-freq. words are separated from tree)\n", separated_word_count);
#else
  j_printf(" (no words are separated from tree)\n");
#endif /* SEPARATE_BY_UNIGRAM */
#else
  j_printf(" (%d short words (<= %d phonemes) are separated from tree)\n", separated_word_count, SHORT_WORD_LEN);
#endif /* NO_SEPARATE_SHORT_WORD */
#else /* USE_NGRAM */
  j_printf("\t  root node num = %6d\n", rootnum);
#endif /* USE_NGRAM */
  for(n=0,i=0;i<wchmm->n;i++) {
    if (wchmm->stend[i] != WORD_INVALID) n++;
  }
  j_printf("\t  leaf node num = %6d\n", n);
#ifndef CATEGORY_TREE
  j_printf("\t fact. node num = %6d\n", wchmm->scnum);
#endif /* CATEGORY_TREE */
}
