/* Copyright (c) 1991-2002 Doshita Lab. Speech Group, Kyoto University */
/* Copyright (c) 2000-2002 Speech and Acoustics Processing Lab., NAIST */
/*   All rights reserved   */

/* ngram_write_bin.c --- write ngram data in a local JULIUS binary format */

/* $Id: ngram_write_bin.c,v 1.3 2002/09/11 22:01:50 ri Exp $ */

#include <sent/stddefs.h>
#include <sent/ngram2.h>

/* binary write function with byte swap (assume file is BIG ENDIAN) */
static void
wrt(FILE *fp, void *buf, size_t unitbyte, int unitnum)
{
#ifndef WORDS_BIGENDIAN
  if (unitbyte != 1) {
    swap_bytes((char *)buf, unitbyte, unitnum);
  }
#endif
  if (myfwrite(buf, unitbyte, unitnum, fp) < (size_t)unitnum) {
    perror("write_ngram_bin: wrt");
    j_error("write failed\n");
  }
#ifndef WORDS_BIGENDIAN
  if (unitbyte != 1) {
    swap_bytes((char *)buf, unitbyte, unitnum);
  }
#endif
}

/* write header with identifier string */
static void
write_header(FILE *fp, char *str)
{
  char buf[BINGRAM_HDSIZE];
  int i, totallen;
  for(i=0;i<BINGRAM_HDSIZE;i++) buf[i] = EOF;
  totallen = strlen(BINGRAM_IDSTR) + 1 + strlen(str);
  if (totallen >= 512) {
    j_printerr("Warning: header too long, will be truncated\n");
    i = strlen(str) - (totallen - 512);
    str[i] = '\0';
  }
  sprintf(buf, "%s\n%s", BINGRAM_IDSTR, str);
  wrt(fp, buf, 1, 512);
}

/* write N-gram data as bingram (Julius binary format) to a file */
boolean
ngram_write_bin(FILE *fp, NGRAM_INFO *ndata, char *headerstr)
{
  int i,n,len;

  /* write initial header */
  write_header(fp, headerstr);

  /* write total info */
  for(n=0;n<MAX_N;n++) {
    wrt(fp, &(ndata->ngram_num[n]), sizeof(NNID), 1);
    /*j_printf("ngram %d=%d\n",n+1,ndata->ngram_num[n]);*/
  }
  j_printf("wrote total info\n");
  /* total_ngram_num is now bogus */
  /* unk_*, isopen, max_word_num are set after read, so need not save */

  /* write wname */
  len = 0;
  for(i=0;i<ndata->ngram_num[0];i++) {
    len += strlen(ndata->wname[i]) + 1;
  }
  wrt(fp, &len, sizeof(int), 1);
  for(i=0;i<ndata->ngram_num[0];i++) {
    wrt(fp, ndata->wname[i], 1, strlen(ndata->wname[i]) + 1); /* include \0 */
  }
  j_printf("wrote wnames (%d bytes)\n", len + sizeof(int));
  
  /* write 1-gram */
  wrt(fp, ndata->p, sizeof(LOGPROB), ndata->ngram_num[0]);
  wrt(fp, ndata->bo_wt_lr, sizeof(LOGPROB), ndata->ngram_num[0]);
  wrt(fp, ndata->bo_wt_rl, sizeof(LOGPROB), ndata->ngram_num[0]);
  wrt(fp, ndata->n2_bgn, sizeof(NNID), ndata->ngram_num[0]);
  wrt(fp, ndata->n2_num, sizeof(WORD_ID), ndata->ngram_num[0]);
  j_printf("wrote 1-gram (%d KB)\n",
	   ((sizeof(LOGPROB)*3 + sizeof(NNID) + sizeof(WORD_ID)) * ndata->ngram_num[0]) / 1024);
  
  /* write 2-gram*/
  wrt(fp, ndata->n2tonid, sizeof(WORD_ID), ndata->ngram_num[1]);
  wrt(fp, ndata->p_lr, sizeof(LOGPROB), ndata->ngram_num[1]);
  wrt(fp, ndata->p_rl, sizeof(LOGPROB), ndata->ngram_num[1]);
  wrt(fp, ndata->bo_wt_rrl, sizeof(LOGPROB), ndata->ngram_num[1]);
  wrt(fp, ndata->n3_bgn, sizeof(NNID), ndata->ngram_num[1]);
  wrt(fp, ndata->n3_num, sizeof(WORD_ID), ndata->ngram_num[1]);
  j_printf("wrote 2-gram (%d KB)\n",
	   ((sizeof(LOGPROB)*3 + sizeof(NNID) + sizeof(WORD_ID)*2) * ndata->ngram_num[1]) / 1024);
  

  /* write 3-gram*/
  wrt(fp, ndata->n3tonid, sizeof(WORD_ID), ndata->ngram_num[2]);
  wrt(fp, ndata->p_rrl, sizeof(LOGPROB), ndata->ngram_num[2]);
  j_printf("wrote 3-gram (%d KB)\n",
	   ((sizeof(LOGPROB) + sizeof(WORD_ID)) * ndata->ngram_num[2]) / 1024);

  return TRUE;
}
