/*
 * Copyright (c) 1991-2003 Kyoto University
 * Copyright (c) 2000-2003 NAIST
 * All rights reserved
 */

/* init_ngram.c -- read in n-gram file & initialize */

/* $Id: init_ngram.c,v 1.6 2003/09/29 06:01:23 ri Exp $ */

#include <sent/stddefs.h>
#include <sent/ngram2.h>
#include <sent/vocabulary.h>

/* read & initialize n-gram data from binary */
void
init_ngram_bin(NGRAM_INFO *ndata, char *bin_ngram_file)
{
  FILE *fp;
  
  j_printerr("Reading in word n-gram...");
  if ((fp = fopen_readfile(bin_ngram_file)) == NULL) {
    j_error("open error for %s\n", bin_ngram_file);
  }
  if (ngram_read_bin(fp, ndata) == FALSE) {
    j_error("read error for %s\n", bin_ngram_file);
  }
  if (fclose_readfile(fp) == -1) {
    j_error("close error\n");
  }
  j_printerr("done\n");
}

/* for ARPA standard format with Back-off */
void
init_ngram_arpa(NGRAM_INFO *ndata,
		char *ngram_lr_file, /* filename of LR 2-gram data */
		char *ngram_rl_file) /* filename of RL 3-gram data */
{
  FILE *fp;

  ndata->root = NULL;
  j_printerr("Reading in LR 2-gram...\n");
  /* read LR 2-gram */
  if ((fp = fopen_readfile(ngram_lr_file)) == NULL) {
    j_error("open error for %s\n", ngram_lr_file);
  }
  if (ngram_read_arpa(fp, ndata, DIR_LR) == FALSE) {
    j_error("read error for %s\n", ngram_lr_file);
  }
  if (fclose_readfile(fp) == -1) {
    j_error("close error\n");
  }
  if (ngram_rl_file != NULL) {
    j_printerr("done\nReading in RL 3-gram...\n");
    /* read RL 3-gram */
    if ((fp = fopen_readfile(ngram_rl_file)) == NULL) {
      j_error("open error for %s\n", ngram_rl_file);
    }
    if (ngram_read_arpa(fp, ndata, DIR_RL) == FALSE) {
      j_error("read error for %s\n", ngram_rl_file);
    }
    if (fclose_readfile(fp) == -1) {
      j_error("close error\n");
    }
  }

  j_printerr("done\n");
}

/* make correspondence of dictionary and N-gram entries */
void
make_voca_ref(NGRAM_INFO *ndata, WORD_INFO *winfo)
{
  int i;

  j_printerr("Mapping dictonary words to n-gram entries...");
  ndata->unk_num = 0;
  for (i = 0; i < winfo->num; i++) {
    winfo->wton[i] = make_ngram_ref(ndata, winfo->wname[i]);
    if (winfo->wton[i] == ndata->unk_id) {
      (ndata->unk_num)++;
    }
  }
  if (ndata->unk_num == 0) {
    ndata->unk_num_log = 0.0;	/* for safe */
  } else {
    ndata->unk_num_log = (float)log10(ndata->unk_num);
  }
  j_printerr("done\n");
}

