/*
 * init.c - initialization of ChaSen
 *
 * Copyright (C) 1996,1997 Nara Institute of Science and Technology
 *
 * Modified by A.Kitauchi <akira-k@is.aist-nara.ac.jp> Sep. 1996
 *
 */

#include "chalib.h"
#include "pat.h"

/* .chasenrc default values */
#define POS_COST_DEFAULT	1
#define RENSETSU_WEIGHT_DEFAULT	1
#define KEITAISO_WEIGHT_DEFAULT	1
#define COST_WIDTH_DEFAULT	0
#define UNDEF_WORD_DEFAULT	10000

int Cha_server_mode;

int Cha_con_cost_weight = RENSETSU_WEIGHT_DEFAULT * MRPH_DEFAULT_WEIGHT;
int Cha_con_cost_undef = 0;
int Cha_mrph_cost_weight = KEITAISO_WEIGHT_DEFAULT;

anno_info Cha_anno_info[UNDEF_HINSI_MAX];
undef_info Cha_undef_info[UNDEF_HINSI_MAX];
int Cha_undef_info_num = 0;
int Cha_output_compo = 1;

char *Cha_bos_string = "";
char *Cha_eos_string = "EOS\n";

extern int Suf_ndicfile;
extern int Pat_ndicfile;

/***********************************************************************/
static void read_class_cost(cell)
    chasen_cell_t *cell;
{
    int hinsi, cost;

    for (; !nullp(cell); cell = cha_cdr(cell)) {
	chasen_cell_t *cell1 = cha_car(cha_car(cell));
	chasen_cell_t *cell2 = cha_cdr(cha_car(cell));
	char *s = cha_s_atom(cha_car(cell1));
	if (strmatch3(s, JSTR_UNKNOWN_WORD1, JSTR_UNKNOWN_WORD2, ESTR_UNKNOWN_WORD)) {
	    int i;
	    for (i=0; i<UNDEF_HINSI_MAX && !nullp(cell2); i++, cell2=cha_cdr(cell2)) {
		chasen_cell_t *cell3 = cha_car(cell2);
		if (atomp(cell3)) {
		    Cha_undef_info[i].cost = atoi(cha_s_atom(cell3));
		    Cha_undef_info[i].cost_step = 0;
		} else {
		    Cha_undef_info[i].cost = atoi(cha_s_atom(cha_car(cell3)));
		    Cha_undef_info[i].cost_step = atoi(cha_s_atom(cha_car(cha_cdr(cell3))));
		}
	    }   
	    if (Cha_undef_info_num == 0 || Cha_undef_info_num > i)
	      Cha_undef_info_num = i;
	} else if (!strcmp(s, "*")) {
	    cost = atoi(cha_s_atom(cha_car(cell2)));
	    for (hinsi = 1; Cha_hinsi[hinsi].name; hinsi++)
	      if (Cha_hinsi[hinsi].cost == 0)
		Cha_hinsi[hinsi].cost = cost;
	} else {
	    int match = 0;
	    cost = atoi(cha_s_atom(cha_car(cell2)));
	    for (hinsi = 1; Cha_hinsi[hinsi].name; hinsi++) {
		if (cha_match_nhinsi(cell1, hinsi)) {
		    Cha_hinsi[hinsi].cost = cost;
		    match = 1;
		}
	    }
	    if (!match)
	      cha_exit_file(1, "invalid hinsi name `%s'\n", cha_s_tostr(cell1));
	}
    }

    /* default */
    for (hinsi = 1; Cha_hinsi[hinsi].name; hinsi++) 
      if (Cha_hinsi[hinsi].cost == 0)
	Cha_hinsi[hinsi].cost = POS_COST_DEFAULT;

    /* ʸƬ ʸ */
    Cha_hinsi[0].cost = 0;
}

/***********************************************************************/
static void read_composition(cell)
    chasen_cell_t *cell;
{
    int comp, pos;
    chasen_cell_t *cell2, *cell3;

    for (; !nullp(cell); cell = cha_cdr(cell)) {
	cell2 = cha_car(cell);
	comp = cha_get_nhinsi_id(cha_car(cell2));
	if (!nullp(cha_cdr(cell2)))
	  cell2 = cha_cdr(cell2);
	for (; !nullp(cell2); cell2 = cha_cdr(cell2)) {
	    cell3 = cha_car(cell2);
	    for (pos = 1; Cha_hinsi[pos].name; pos++)
	      if (cha_match_nhinsi(cell3, pos))
		Cha_hinsi[pos].comp = comp;
	}
    }
}

/***********************************************************************/
static void eval_chasenrc_sexp(cell)
    chasen_cell_t *cell;
{
    char *cell1_str;
    chasen_cell_t *cell2;

    cell1_str = cha_s_atom(cha_car(cell));
    cell2 = cha_car(cha_cdr(cell));
    if (Cha_errno)
      return;

    /* ե(patdic, sufdic) */
    if (!strcmp(cell1_str, ESTR_PAT_FILE))
      cha_read_patdic(cha_cdr(cell));
    else if (!strcmp(cell1_str, ESTR_SUF_FILE))
      cha_read_sufdic(cha_cdr(cell));
    /* ʻ(space pos) */
    else if (strmatch2(cell1_str, JSTR_SPACE_POS, ESTR_SPACE_POS)) {
	Cha_anno_info[0].hinsi = cha_get_nhinsi_id(cell2);
    }
    /* (annotation) */
    else if (strmatch2(cell1_str, JSTR_ANNOTATION, ESTR_ANNOTATION)) {
	int i;
	for (i = 1, cell2 = cha_cdr(cell);
	     i < UNDEF_HINSI_MAX && !nullp(cell2);
	     i++, cell2 = cha_cdr(cell2)) {
	    chasen_cell_t *cell3 = cha_car(cell2);
	    chasen_cell_t *cell4;
	    /* str1, len1 */
	    Cha_anno_info[i].str1 = cha_s_atom(cha_car(cha_car(cell3)));
	    Cha_anno_info[i].len1 = strlen(Cha_anno_info[i].str1);
	    cell4 = cha_car(cha_cdr(cha_car(cell3)));
	    /* str2, len2 */
	    Cha_anno_info[i].str2 = nullp(cell4) ? "" : cha_s_atom(cell4);
	    Cha_anno_info[i].len2 = strlen(Cha_anno_info[i].str2);
	    /* hinsi */
	    cell4 = cha_car(cha_cdr(cell3));
	    if (!nullp(cell4)) {
		if (atomp(cell4)) {
		    /* format string */
		    Cha_anno_info[i].format = cha_s_atom(cell4);
		} else {
		    /* pos */
		    Cha_anno_info[i].hinsi = cha_get_nhinsi_id(cell4);
		}
	    }
	}
    }
    /* ̤θʻ */
    else if (strmatch3(cell1_str, JSTR_UNKNOWN_POS1, JSTR_UNKNOWN_POS2, ESTR_UNKNOWN_POS)) {
	int i;
	cell2 = cha_cdr(cell);
	for (i = 0; i < UNDEF_HINSI_MAX && !nullp(cell2);
	     i++, cell2 = cha_cdr(cell2)) {
	    Cha_undef_info[i].hinsi = cha_get_nhinsi_id(cha_car(cell2));
	}
	if (Cha_undef_info_num == 0 || Cha_undef_info_num > i)
	  Cha_undef_info_num = i;
    }
    /* ϢܥȽŤ */
    /* modified by S.Kurohashi 93/03/04 
       for balancing connect-weigh with morph-weight */
    else if (strmatch2(cell1_str, JSTR_CONN_WEIGHT, ESTR_CONN_WEIGHT))
      Cha_con_cost_weight = atoi(cha_s_atom(cell2)) * MRPH_DEFAULT_WEIGHT;
    /* ǥȽŤ */
    else if (strmatch2(cell1_str, JSTR_MRPH_WEIGHT, ESTR_MRPH_WEIGHT))
      Cha_mrph_cost_weight = atoi(cha_s_atom(cell2));
    /*  */
    /* modified by S.Kurohashi 93/03/04 
       for balancing connect-weigh with morph-weight */
    else if (strmatch2(cell1_str, JSTR_COST_WIDTH, ESTR_COST_WIDTH))
      cha_set_cost_width(atoi(cha_s_atom(cell2)));
    /* ʻ쥳 */
    else if (strmatch2(cell1_str, JSTR_POS_COST, ESTR_POS_COST))
      read_class_cost(cha_cdr(cell));
    /* ̤Ϣܥ */
    else if (strmatch2(cell1_str, JSTR_DEF_CONN_COST, ESTR_DEF_CONN_COST))
      Cha_con_cost_undef = (int) atoi(cha_s_atom(cell2));
    /* Ϣʻ */
    else if (strmatch2(cell1_str, JSTR_COMPO_POS, ESTR_COMPO_POS))
      read_composition(cha_cdr(cell));
    /* ʣ */
    else if (strmatch2(cell1_str, JSTR_OUTPUT_COMPO, ESTR_OUTPUT_COMPO))
      Cha_output_compo = strmatch2(cha_s_atom(cell2), JSTR_SEG, ESTR_SEG) ? 0 : 1;
    /* ϥեޥå */
    else if (strmatch2(cell1_str, JSTR_OUTPUT_FORMAT, ESTR_OUTPUT_FORMAT))
      cha_set_opt_form(cha_s_atom(cell2));
    /*  */
    else if (strmatch2(cell1_str, JSTR_LANG, ESTR_LANG))
      cha_set_language(cha_s_atom(cell2));
    /* BOSʸ */
    else if (strmatch2(cell1_str, JSTR_BOS_STR, ESTR_BOS_STR))
      Cha_bos_string = cha_s_atom(cell2);
    /* EOSʸ */
    else if (strmatch2(cell1_str, JSTR_EOS_STR, ESTR_EOS_STR))
      Cha_eos_string = cha_s_atom(cell2);
    /* ڤʸ */
    else if (strmatch2(cell1_str, JSTR_DELIMITER, ESTR_DELIMITER))
      cha_set_jfgets_delimiter(cha_s_atom(cell2));
}

/***********************************************************************
 * cha_read_rcfile_fp()
 ***********************************************************************/
void cha_read_rcfile_fp(fp)
    FILE *fp;
{
    chasen_cell_t *cell;

    while (!cha_s_feof(fp)) {
	cell = cha_s_read(fp);
	if (!Cha_errno)
	  eval_chasenrc_sexp(cell);
    }

    /* default language */
    if (!Cha_lang_j && !Cha_lang_e) {
	Cha_lang_j = 1;
#if 0
	if (!Suf_ndicfile)
	  Cha_lang_j = 1;
	else
	  Cha_lang_j = Cha_lang_e = 1;
#endif
    }
}

/***********************************************************************/
static void read_chasenrc()
{
    FILE *fp;
    char *rcpath;

    rcpath = cha_get_rcpath();

    fp = cha_fopen(rcpath, "r", 1);
    cha_read_rcfile_fp(fp);
    fclose(fp);

    /* required options */
    if (!Cha_undef_info[0].hinsi)
      cha_exit(1, "%s: UNKNOWN_POS/michigo-hinsi is not specified",
	       cha_get_rcpath());

    if (!Pat_ndicfile && !Suf_ndicfile)
      cha_exit(1, "%s: patricia dictionary is not specified",
	       cha_get_rcpath());
}

/*
 * cha_init - ChaSen's initialization
 */
void cha_init()
{
    int i;

    /* ϴؿؤΥݥ */
    cha_set_fput(Cha_server_mode);

    /* ϴؿؤΥݥ */
    cha_set_getc_alone();

    /* cost width */
    cha_set_cost_width(COST_WIDTH_DEFAULT);

    cha_read_grammar_dir();
    cha_read_grammar(NULL, 1, 1);

    read_chasenrc();

    cha_read_katuyou(NULL, 1);
    cha_read_table(NULL, 1);
    cha_read_matrix(NULL);

    for (i = 0; i < Cha_undef_info_num; i++)
      Cha_undef_info[i].con_tbl = cha_check_table_for_undef(Cha_undef_info[i].hinsi);
}

