/*
 * Copyright (c) 1991-2003 Kyoto University
 * Copyright (c) 2000-2003 NAIST
 * All rights reserved
 */

/* dfa-decode.c --- for Julian: predict next word using DFA grammar for 2nd pass */

/* $Id: dfa_decode.c,v 1.7 2003/09/29 06:01:22 ri Exp $ */

/*
  $B%9%?%C%/%G%3!<%G%#%s%0(B($BBh(B2$B%Q%9(B)$B$K$*$1$kJ8K!(B(DFA)$B$K4p$E$/<!C18lM=B,(B:
  $BM?$($i$l$?2>@b$KBP$7$F!$(BDFA $BJ8K!>e@\B32DG=$J<!C18l$N=89g$r7hDj$9$k!%(B
  $B$?$@$7<B:]$K$O(B, $BE83+852>@b$NM=B,$5$l$k;OC<%U%l!<%`<~JU$NC18l%H%l%j%9(B
  $B>e$K;D$C$F$$$kC18l$N$_$,E83+$5$l$k(B(dfa_look_around())$B!%(B

  Next word prediction by grammar rule (DFA) in 2nd pass stack decoding:
  determine a set of words allowed to connect from grammar.
  Actually, only ones that exist in word trellis around the estimated
  word end frame will be expanded (function dfa_lookaround()).
*/

/*
  $B%7%g!<%H%]!<%:$N%9%-%C%W=hM}(B:

  Julian $B$G$O%7%g!<%H%]!<%:$N=P8=2DG=0LCV$O(B
  $BJ8K!$G;XDj$9$k!%<B:]$NH/OC$G$O%]!<%:$,F~$i$J$$>l9g$,$"$k$N$G!$$=$l$r(B
  $B9MN8$7$F<!C18lM=B,$9$kI,MW$,$"$k!%(B
  
  $B<!C18l=89g$K%7%g!<%H%]!<%:$,$"$k>l9g(B, $B$5$i$K$=$N<!$NC18l=89g$^$G(B
  $B<!C18l=89g$K4^$a$k(B("NEXTWORD->can_insert_sp = TRUE)$B!%<B:]$K$=$3$K(B
  $B%7%g!<%H%]!<%:$,A^F~$5$l$k$+$I$&$+$O(B search_bestfirst_main.c $B$G(B
  $B%9%3%"$r8+$FH=CG$5$l$k!%(B

  Short pause skipping:

  in Julian, the location where a short pause is
  possible to be inserted should be explicitly defined in grammar, just
  like other words.  This short pause word should be especially handled
  in this next word prediction, as it can be "skipped" in case user did
  not insert break.

  When a short pause word is possible to connect as the next word, the
  additional word set next to the short pause word is also included in the
  prediction set with "NEXTWORD->can_insert_sp = TRUE".  Whether short pause
  was actually inserted or not in the user input will be determined
  by score in search_bestfirst_main.c.
 */

/* Julian $B%b!<%I$G$O(B dfa_firstwords(), dfa_nextwords(), dfa_acceptable(),
   dfa_eosscore() $B$,Bh(B2$B%Q%9$N%a%$%s4X?t(B wchmm_fbs() $B$KEO$5$l(B, $B;HMQ$5$l$k!%(B
   (Julius $B%b!<%I$G$O(B "ngram_decode.c" $BFb$N(B ngram_*() $B$,;H$o$l$k(B */

/* In julian mode, pointers to functions dfa_firstwords(), dfa_nextwords(),
   dfa_acceptable() and dfa_eosscore() are passed to main search function
   wchmm_fbs().  (for julius, corresponding functions ngram_*() in
   "ngram_decode.c" will be passed instead) */

#include <julius.h>
#ifdef USE_DFA

/* $B:G=i$NM=B,C18l72$rJV$9!%JV$jCM(B: $BC18l?t(B (-1 on error) */
/* return initial word set.  return value: num of words (-1 on error) */
int
dfa_firstwords(NEXTWORD **nw,	/* next word set (return value) */
				/* (assume already malloced) */
	       int peseqlen,	/* end time of input */
	       int maxnw,	/* maximum length of nw[] */
	       DFA_INFO *dfa)	/* DFA info */
{
  DFA_ARC *arc;
  MULTIGRAM *m;
  int s, sb, se;
  int cate, iw, ns;
  int num = 0;

  for (m = gramlist; m; m = m->next) {
    if (m->active) {
      sb = m->state_begin;
      se = sb + m->dfa->state_num;
      for(s=sb;s<se;s++) {
	if ((dfa->st[s].status & INITIAL_S) != 0) { /* from initial state */
	  for (arc = dfa->st[s].arc; arc; arc = arc->next) {	/* for all arc */
	    cate = arc->label;	/* category ID */
	    ns = arc->to_state;	/* next DFA state ID */
	    /* all words within the category is expanded */
	    for (iw=0;iw<dfa->term.wnum[cate];iw++) {
	      nw[num]->id = dfa->term.tw[cate][iw]; /* word ID */
	      nw[num]->next_state = ns; /* next state */
	      nw[num]->can_insert_sp = FALSE; /* short pause should not inserted before this word */
	      num++;
	      if (num >= maxnw) return -1; /* buffer overflow */
	    }
	  }
	}
      }
    }
  }

  return num;
}

/* $BJ8K!$K=>$C$FItJ82>@b$N<!$NM=B,C18l72$rJV$9!%5"$jCM(B: $BC18l?t(B (-1 on error) */
/* return next word set determined by DFA grammar.  return value:
   num of words (-1 on error) */
int
dfa_nextwords(NODE *hypo, NEXTWORD **nw, int maxnw, DFA_INFO *dfa) /* hypo: source hypothesis */
{
  DFA_ARC *arc, *arc2;
  int iw,cate,ns,cate2,ns2;
  int num = 0;

  /* hypo->state: current DFA state ID */
  for (arc = dfa->st[hypo->state].arc; arc; arc = arc->next) {/* for all arc */
    cate = arc->label;
    ns = arc->to_state;
    if (dfa->is_sp[cate]) {	/* short pause */
      /* expand one more next (not expand the short pause word itself) */
      for (arc2 = dfa->st[ns].arc; arc2; arc2 = arc2->next) {
	cate2 = arc2->label;
	ns2 = arc2->to_state;
	for (iw=0;iw<dfa->term.wnum[cate2];iw++) {
	  nw[num]->id = dfa->term.tw[cate2][iw];
	  nw[num]->next_state = ns2;
	  nw[num]->can_insert_sp = TRUE;
	  num++;
	  if (num >= maxnw) return -1; /* buffer overflow */
	}
      }
    } else {			/* not short pause */
      /* all words within the category is expanded */
      for (iw=0;iw<dfa->term.wnum[cate];iw++) {
	nw[num]->id = dfa->term.tw[cate][iw];
	nw[num]->next_state = ns;
	nw[num]->can_insert_sp = FALSE;
	num++;
	if (num >= maxnw) return -1; /* buffer overflow */
      }
    }
  }
  return num;
}

/* $B2>@b$,J8K!>e<uM}>uBV$K$"$k$+$I$&$+$rJV$9(B */
/* return if the hypothesis is in "acceptable" state */
/* NOISE: $B$3$3$K$O$3$J$$;EMM(B */
boolean
dfa_acceptable(NODE *hypo, DFA_INFO *dfa)
{
  if (dfa->st[hypo->state].status & ACCEPT_S) {
    return TRUE;
  } else {
    return FALSE;
  }
}

/* patch by kashima */
/* $B<!C18l8uJd$,?dDj$5$l$?@\B3M=B,E@IU6a$N%H%l%j%9>e$K$"$k$+$I$&$+%A%'%C%/$9$k(B
   $B$"$k>l9g(B, nword->tre $B$r@_Dj(B($B:GL`$N@\B3E@$O$"$H$G7h$^$k$N$G:GE,$G$J$/$F$h$$(B)
   $BJV$jCM(B: $B$"$l$P(B TRUE, $B$J$1$l$P(B FALSE (=$B2>@bE83+$7$J$$(B) */
/* Check if the given nextword 'nword' exists in the word trellis around the
   estimated connection time.  If exist, set 'nword->tre' to the corresponding
   trellis word (as best connection time will be determined later, it need not
   to be an optimal one).
   Return value: TRUE if exist,  FALSE if not exist (= not expand this word) */
boolean
dfa_look_around(NEXTWORD *nword, NODE *hypo, BACKTRELLIS *bt)
{
  int t,tm;
  int i;
  WORD_ID w;
  
  tm = hypo->estimated_next_t;	/* estimated connection time */

  /* look aound [tm-lookup_range..tm+lookup_range] frame */
  /* near the center is better:
     1. the first half (backward)   2. the second half (forward) */
  /* 1. backward */
  for(t = tm; t >= tm - lookup_range; t--) {
    if (t < 0) break;
     for (i=0;i<bt->num[t];i++) {
       w = (bt->rw[t][i])->wid;
       if(w == nword->id){	/* found */
         nword->tre = bt->rw[t][i];
         return TRUE;
       }
     }
  }
  /* 2. forward */
  for(t = tm + 1; t < tm + lookup_range; t++) {
    if (t > bt->framelen - 1) break;
    if (t >= hypo->bestt) break;
    for (i=0;i<bt->num[t];i++) {
      w = (bt->rw[t][i])->wid;
      if(w == nword->id){	/* found */
        nword->tre = bt->rw[t][i];
        return TRUE;
      }
    }
  }

  return FALSE;			/* not found */
}

#endif /* USE_DFA */
