/*
 * init.c - initialization of ChaSen
 *
 * Copyright (C) 1996,1997 Nara Institute of Science and Technology
 *
 * Modified by A.Kitauchi <akira-k@is.aist-nara.ac.jp> Sep. 1996
 *
 */

#include "chalib.h"
#include "pat.h"

/* .chasenrc default values */
#ifdef VGRAM
#define POS_COST_DEFAULT	1
#define RENSETSU_WEIGHT_DEFAULT	1
#define KEITAISO_WEIGHT_DEFAULT	1
#define COST_WIDTH_DEFAULT	0
#else
#define POS_COST_DEFAULT	10
#define RENSETSU_WEIGHT_DEFAULT	10
#define KEITAISO_WEIGHT_DEFAULT	1
#define COST_WIDTH_DEFAULT	20
#endif
#define UNDEF_WORD_DEFAULT	10000

int Cha_server_mode;

int Con_cost_weight = RENSETSU_WEIGHT_DEFAULT * MRPH_DEFAULT_WEIGHT;
int Con_cost_undef = 0;
int Mrph_cost_weight = KEITAISO_WEIGHT_DEFAULT;

undef_info Undef_info[UNDEF_HINSI_MAX];
int Undef_info_num = 0;

char *Bos_string = "";
char *Eos_string = "EOS\n";

extern int num_sufdic_file;

/***********************************************************************/
static void read_class_cost(cell)
    cell_t *cell;
{
    int hinsi, cost;

#ifdef VGRAM
    for (; !nullp(cell); cell = cdr(cell)) {
	cell_t *cell1 = car(car(cell));
	cell_t *cell2 = cdr(car(cell));
	char *s = s_atom(car(cell1));
	if (strmatch3(s, JSTR_UNKNOWN_WORD1, JSTR_UNKNOWN_WORD2, ESTR_UNKNOWN_WORD)) {
	    int i;
	    for (i=0; i<UNDEF_HINSI_MAX && !nullp(cell2); i++, cell2=cdr(cell2)) {
		cell_t *cell3 = car(cell2);
		if (atomp(cell3)) {
		    Undef_info[i].cost = atoi(s_atom(cell3));
		    Undef_info[i].cost_step = 0;
		} else {
		    Undef_info[i].cost = atoi(s_atom(car(cell3)));
		    Undef_info[i].cost_step = atoi(s_atom(car(cdr(cell3))));
		}
	    }   
	    if (Undef_info_num == 0 || Undef_info_num > i)
	      Undef_info_num = i;
	} else if (!strcmp(s, "*")) {
	    cost = atoi(s_atom(car(cell2)));
	    for (hinsi = 1; Hinsi[hinsi].name; hinsi++)
	      if (Hinsi[hinsi].cost == 0)
		Hinsi[hinsi].cost = cost;
	} else {
	    int match = 0;
	    cost = atoi(s_atom(car(cell2)));
	    for (hinsi = 1; Hinsi[hinsi].name; hinsi++) {
		if (match_nhinsi(cell1, hinsi)) {
		    Hinsi[hinsi].cost = cost;
		    match = 1;
		}
	    }
	    if (!match)
	      cha_exit_file(1, "invalid hinsi name `%s'\n", s_tostr(cell1));
	}
    }

    /* default */
    for (hinsi = 1; Hinsi[hinsi].name; hinsi++) 
      if (Hinsi[hinsi].cost == 0)
	Hinsi[hinsi].cost = POS_COST_DEFAULT;

    /* ʸƬ ʸ */
    Hinsi[0].cost = 0;
#else
    int bunrui;

    for (; !nullp(cell); cell = cdr(cell)) {
	cell_t *cell1 = car(car(cell));
	cell_t *cell2 = cdr(car(cell));
	char *s = s_atom(car(cell1));
	if (strmatch3(s, JSTR_UNKNOWN_WORD1, JSTR_UNKNOWN_WORD2, ESTR_UNKNOWN_WORD)) {
	    int i;
	    for (i=0; i<UNDEF_HINSI_MAX && !nullp(cell2); i++, cell2=cdr(cell2)) {
		cell_t *cell3 = car(cell2);
		if (atomp(cell3)) {
		    Undef_info[i].cost = atoi(s_atom(cell3));
		    Undef_info[i].cost_step = 0;
		} else {
		    Undef_info[i].cost = atoi(s_atom(car(cell3)));
		    Undef_info[i].cost_step = atoi(s_atom(car(cdr(cell3))));
		}
	    }   
	    if (Undef_info_num == 0 || Undef_info_num > i)
	      Undef_info_num = i;
	} else {
	    cost = atoi(s_atom(car(cell2)));
	    get_hinsi_bunrui_id(cell1, &hinsi, &bunrui);
	    if (hinsi == 0) {
		for (hinsi = 1; Class[hinsi][0].id; hinsi++)
		  for (bunrui = 0; Class[hinsi][bunrui].id; bunrui++)
		    if (Class[hinsi][bunrui].cost == 0)
		      Class[hinsi][bunrui].cost = cost;
	    } else if (bunrui == 0) {
		for (bunrui = 0; Class[hinsi][bunrui].id; bunrui++)
		  Class[hinsi][bunrui].cost = cost;
	    } else {
		Class[hinsi][bunrui].cost = cost;
	    }
	}
    }

    /* default */
    for (hinsi = 1; Class[hinsi][0].id; hinsi++) 
      for (bunrui = 0; Class[hinsi][bunrui].id; bunrui++)
	if (Class[hinsi][bunrui].cost == 0)
	  Class[hinsi][bunrui].cost = POS_COST_DEFAULT;

    /* For ʸƬ ʸ added by S.Kurohashi */
    Class[0][0].cost = 0;
#endif
}

/***********************************************************************/
static void read_composition(cell)
    cell_t *cell;
{
#ifdef VGRAM
    int comp, pos;

    for (; !nullp(cell); cell = cdr(cell)) {
	comp = get_nhinsi_id(car(cell));
	for (pos = 1; Hinsi[pos].name; pos++)
	  if (match_nhinsi(car(cell), pos))
	    Hinsi[pos].comp = comp;
    }
#else
    int comp_hinsi, comp_bunrui, i;

    for (; !nullp(cell); cell = cdr(cell)) {
	get_hinsi_bunrui_id(car(cell), &comp_hinsi, &comp_bunrui);
	if (comp_bunrui)
	  Class[comp_hinsi][comp_bunrui].comp = 2;
	else {
	    for (i = 0; Class[comp_hinsi][i].id; i++)
	      Class[comp_hinsi][i].comp = 1;
	}
    }
#endif		
}

/***********************************************************************/
static void eval_chasenrc_sexp(cell)
    cell_t *cell;
{
    char *cell1_str;
    cell_t *cell2;

    cell1_str = s_atom(car(cell));
    cell2 = car(cdr(cell));
    if (Cha_errno)
      return;

    /* ե */
    if (!strcmp(cell1_str, ESTR_PAT_FILE))
      read_patdic(cdr(cell));
    else if (!strcmp(cell1_str, ESTR_SUF_FILE))
      read_sufdic(cdr(cell));
    /* ̤ʻ */
    else if (strmatch3(cell1_str, JSTR_UNKNOWN_POS1, JSTR_UNKNOWN_POS2, ESTR_UNKNOWN_POS)) {
	int i;
	cell2 = cdr(cell);
	for (i = 0; i < UNDEF_HINSI_MAX && !nullp(cell2);
	     i++, cell2 = cdr(cell2)) {
#ifdef VGRAM
	    Undef_info[i].hinsi = get_nhinsi_id(car(cell2));
#else
	    get_hinsi_bunrui_id(car(cell2), &Undef_info[i].hinsi, &Undef_info[i].bunrui);
#endif
	}
	if (Undef_info_num == 0 || Undef_info_num > i)
	  Undef_info_num = i;
#if 0
	if (!Undef_hinsi)
	  exit(1);
#endif
    }
    /* ϢܥȽŤ */
    /* modified by S.Kurohashi 93/03/04 
       for balancing connect-weigh with morph-weight */
    else if (strmatch2(cell1_str, JSTR_CONN_WEIGHT, ESTR_CONN_WEIGHT))
      Con_cost_weight = atoi(s_atom(cell2)) * MRPH_DEFAULT_WEIGHT;
    /* ǥȽŤ */
    else if (strmatch2(cell1_str, JSTR_MRPH_WEIGHT, ESTR_MRPH_WEIGHT))
      Mrph_cost_weight = atoi(s_atom(cell2));
    /*  */
    /* modified by S.Kurohashi 93/03/04 
       for balancing connect-weigh with morph-weight */
    else if (strmatch2(cell1_str, JSTR_COST_WIDTH, ESTR_COST_WIDTH))
      set_cost_width(atoi(s_atom(cell2)));
    /* ʻ쥳 */
    else if (strmatch2(cell1_str, JSTR_POS_COST, ESTR_POS_COST))
      read_class_cost(cdr(cell));
    /* ̤Ϣܥ */
    else if (strmatch2(cell1_str, JSTR_DEF_CONN_COST, ESTR_DEF_CONN_COST))
      Con_cost_undef = (int) atoi(s_atom(cell2));
    /* Ϣʻ */
    else if (strmatch2(cell1_str, JSTR_COMPO_POS, ESTR_COMPO_POS))
      read_composition(cdr(cell));
    /* ϥեޥå */
    else if (strmatch2(cell1_str, JSTR_OUTPUT_FORMAT, ESTR_OUTPUT_FORMAT))
      set_opt_form(s_atom(cell2));
    /*  */
    else if (strmatch2(cell1_str, JSTR_LANG, ESTR_LANG))
      set_language(s_atom(cell2));
    /* BOSʸ */
    else if (strmatch2(cell1_str, JSTR_BOS_STR, ESTR_BOS_STR))
      Bos_string = convert_escape(cha_strdup(s_atom(cell2)), 0);
    /* EOSʸ */
    else if (strmatch2(cell1_str, JSTR_EOS_STR, ESTR_EOS_STR))
      Eos_string = convert_escape(cha_strdup(s_atom(cell2)), 0);
    /* ڤʸ */
    else if (strmatch2(cell1_str, JSTR_DELIMITER, ESTR_DELIMITER))
      set_jfgets_delimiter(s_atom(cell2));
}

/***********************************************************************
 * read_chasenrc_fp()
 ***********************************************************************/
void read_chasenrc_fp(fp)
    FILE *fp;
{
    cell_t *cell;

    while (!s_feof(fp)) {
	cell = s_read(fp);
	if (!Cha_errno)
	  eval_chasenrc_sexp(cell);
    }

    if (!Lang_j && !Lang_e) {
	if (!num_sufdic_file)
	  Lang_j = 1;
	else
	  Lang_j = Lang_e = 1;
    }
}

/***********************************************************************/
static void read_chasenrc()
{
    FILE *fp;
    char *rcpath;

    rcpath = get_chasenrc_path();

    fp = cha_fopen(rcpath, "r", 1);
    read_chasenrc_fp(fp);
    fclose(fp);

    if (!number_of_tree && !num_sufdic_file)
      cha_exit(1, "%s: no patricia dictionary is specified",
	       get_chasenrc_path());
}

/*
 * chasen_init - ChaSen's initialization
 */
void chasen_init()
{
    int i;

    /* ϴؿؤΥݥ */
    set_cha_fput(Cha_server_mode);

    /* ϴؿؤΥݥ */
    set_cha_getc_alone();

    /* cost width */
    set_cost_width(COST_WIDTH_DEFAULT);

    read_grammar_dir();
    read_grammar(NULL, 1, 1);

    read_chasenrc();

    read_katuyou(NULL, 1);
    read_table(NULL, 1);
    read_matrix(NULL);

    for (i = 0; i < Undef_info_num; i++) {
#ifdef VGRAM
	Undef_info[i].con_tbl = check_table_for_undef(Undef_info[i].hinsi);
#else
	Undef_info[i].con_tbl =
	  check_table_for_undef(Undef_info[i].hinsi, Undef_info[i].bunrui);
#endif
    }
}

