/*
==============================================================================
	trans.c
		1990/11/12/Mon Yutaka MYOKI(Nagao Lab., KUEE)
==============================================================================
*/

#include "chadic.h"

#define MRPH_WEIGHT_MAX USHRT_MAX /* 65535 */

/*
 * strcmp_tail
 */
static int strcmp_tail(s1, s2)
    char *s1, *s2;
{
    int diff_len;

    diff_len = strlen(s1) - strlen(s2);

    if (diff_len >= 0)
      return strcmp(s1 + diff_len, s2);
    else
      return strcmp(s2 - diff_len, s1);
}

/*
 * print_mrph
 */
static void print_mrph1(fp, mrph)
    FILE *fp;
    mrph_t *mrph;
{
#ifdef SJIS
    sjis2euc(mrph->midasi);
    sjis2euc(mrph->yomi);
    sjis2euc(mrph->info);
#endif

#ifdef VGRAM
    fprintf(fp, "%s%c%d%c%d%c%d%c%d%c%s%c%s%c%s%c%d%c%s%c",
	    mrph->midasi, 0, mrph->hinsi, 0,
	    mrph->ktype, 0, mrph->kform, 0, mrph->weight, 0,
	    mrph->yomi, 0, mrph->pron, 0, mrph->base, 0,
	    mrph->con_tbl, 0, mrph->info, 0);
#else
    fprintf(fp, "%s\t%d %d %d %d %d %s %d %s",
	    mrph->midasi, mrph->hinsi, mrph->bunrui,
	    mrph->ktype, mrph->kform, mrph->weight,
	    mrph->yomi, mrph->con_tbl, mrph->info);
#endif
}

static void print_mrph_comp(fp, mrph)
    FILE *fp;
    mrph_t *mrph;
{
    for (; mrph->hinsi; mrph++) 
      print_mrph1(fp, mrph);
    fputc('\n', fp);
}

static void print_mrph(fp, mrph)
    FILE *fp;
    mrph_t *mrph;
{
    print_mrph1(fp, mrph);
    print_mrph_comp(fp, mrph + 1);
}

static void print_mrph_loop(fp, mrph)
    FILE *fp;
    mrph_t *mrph;
{
    int i;

    for (i = 1; Form[mrph->ktype][i].name; i++) {
	if (!Form[mrph->ktype][i].gobi[0])
	  continue;
#ifdef SJIS
	sjis2euc(Form[mrph->ktype][i].gobi);
	sjis2euc(mrph->info);
#endif
#ifdef VGRAM
	fprintf(fp, "%s%c%d%c%d%c%d%c%d%c%s%c%s%c%s%c%d%c%s%c",
		Form[mrph->ktype][i].gobi, 0, mrph->hinsi, 0,
		mrph->ktype, 0, i, 0, mrph->weight, 0,
		Form[mrph->ktype][i].ygobi, 0,
		Form[mrph->ktype][i].pgobi, 0,
		mrph->base, 0,
		mrph->con_tbl + i - 1, 0,
		mrph->info, 0);
#else
	fprintf(fp, "%s\t%d %d %d %d %d %s %d %s",
		Form[mrph->ktype][i].gobi, mrph->hinsi, mrph->bunrui,
		mrph->ktype, i, mrph->weight,
		Form[mrph->ktype][i].ygobi, mrph->con_tbl + i - 1,
		mrph->info);
#endif
	print_mrph_comp(fp, mrph + 1);
    }
}

/*
 * trans_exit
 */
static void trans_exit(status, msg, cell)
    char *msg;
    cell_t *cell;
{
    cha_exit_file(status, "`%s' %s\n", s_tostr(cell), msg);
}

#if 0
static char *midasi(x)
    cell_t *x;
{
    cell_t *y;
    char *s;

    if (nullp(y = assoc(tmp_atom("Ф"), x)))
      trans_exit(1, "doesn't contain a list for midasi", x);

    s = hantozen(s_atom(car(cdr(y))));

    if (strlen(s) > MIDASI_MAX)
      cha_exit_file(1, "midashi `%s' is too long", s);

    return s;
}
#endif

/*
 * get_midasi_list
 */
static cell_t *get_midasi_list(x)
    cell_t *x;
{
    cell_t *y;

    if (nullp(y = assoc(tmp_atom(JSTR_WORD), x)))
      if (nullp(y = assoc(tmp_atom(ESTR_WORD), x)))
	trans_exit(1, "doesn't contain midasi field", x);

    return cdr(y);
}

/*
 * get_midasi_str_weight
 */
static void get_midasi_str_weight(cell, def_weight, mrph)
    cell_t *cell;
    int def_weight;
    mrph_t *mrph;
{
    double weight_double;
    int weight_int;
    char *midasi_str;

    if (atomp(cell)) {
	/* (Ф ߡߡ ...) */
	midasi_str = s_atom_val(cell);
	mrph->weight = def_weight;
    } else if (atomp(car(cell))) {
	/* (Ф (ߡߡ weight) ...) */
	midasi_str = s_atom_val(car(cell));
	if (nullp(cdr(cell)))
	  mrph->weight = (unsigned short)def_weight;
	else if (!atomp(car(cdr(cell))))
	  trans_exit(1, "has illegal form", cell);
	else {
#if 0
	    if (sscanf(s_atom_val(car(cdr(cell))),"%lf",
		       &weight_double) == 0)
	      trans_exit(1, "has illegal form", cell);
#endif
	    weight_double = atof(s_atom_val(car(cdr(cell))));
	    weight_int = (int)(weight_double * MRPH_DEFAULT_WEIGHT);
	    if (weight_int < 0 || weight_int > MRPH_WEIGHT_MAX) {
		trans_exit(-1, ": weight must be between 0 and 6553.5", cell);
		if (weight_int < 0)
		  weight_int = 0;
		if (weight_int > MRPH_WEIGHT_MAX)
		  weight_int = MRPH_WEIGHT_MAX;
	    }
	    mrph->weight = (unsigned short)weight_int;
	}
    } else {
	trans_exit(1, "has illegal form", cell);
    }

#if 0 /* 980710 akira-k */
    midasi_str = hantozen(midasi_str);
#endif
    if (strlen(midasi_str) > MIDASI_MAX)
      cha_exit_file(1, "midashi `%s' is too long", midasi_str);
    strcpy(mrph->midasi, midasi_str);
}

/*
 * get_yomi
 */
static char *get_yomi(x)
    cell_t *x;
{
    cell_t *y;
    char *s;

    if (nullp(y = assoc(tmp_atom(JSTR_READING), x)))
      if (nullp(y = assoc(tmp_atom(ESTR_READING), x)))
	return "";

    s = s_atom(car(cdr(y)));
#if 0 /* 980710 akira-k */
    s = hantozen(s_atom(car(cdr(y))));
#endif

    if (strlen(s) > MIDASI_MAX)
      cha_exit_file(1, "yomi `%s' is too long", s);

    return s;
}

#ifdef VGRAM
static int get_hinsi(x)
    cell_t *x;
{
    cell_t *y;

    if (nullp(y = assoc(tmp_atom(JSTR_POS), x)))
      if (nullp(y = assoc(tmp_atom(ESTR_POS), x)))
	return 0;

    return get_nhinsi_id(car(cdr(y)));
}
#endif

/*
 * get_ktype
 */
static int get_ktype(x)
    cell_t *x;
{
    cell_t *y;

    if (nullp(y = assoc(tmp_atom(JSTR_CTYPE), x)))
      if (nullp(y = assoc(tmp_atom(ESTR_CTYPE), x)))
	trans_exit(1, "doesn't contain a list for conjugation type", x);

    return get_type_id(s_atom(car(cdr(y))));
}

/*
 * get_ktype
 */
static int get_kform(x, ktype)
    cell_t *x;
    int ktype;
{
    cell_t *y;

    if (nullp(y = assoc(tmp_atom(JSTR_CFORM), x)))
      if (nullp(y = assoc(tmp_atom(ESTR_CFORM), x)))
	trans_exit(1, "doesn't contain a list for conjugation form", x);

    return get_form_id(s_atom(car(cdr(y))), ktype);
}

/* for EDRdic '94.Mar */
/*
 * get_edrconnect
*/
static cell_t *get_edrconnect(x)
    cell_t *x;
{
    cell_t *y;

    y = assoc(tmp_atom(JSTR_CONN_ATTR), x);
    return car(cdr(y));
}

/*
 * get_info
 */
static char *get_info(x)
    cell_t *x;
{
    cell_t *y;

    if (nullp(y = assoc(tmp_atom(JSTR_INFO), x)))
      if (nullp(y = assoc(tmp_atom(ESTR_INFO), x)))
	return "";

    /* JUMAN2.0 Ǥ cdr(y) ֤褦ˤʤäƤ */
    return s_atom(car(cdr(y)));
}

#ifdef VGRAM
static char *get_base(x)
    cell_t *x;
{
    cell_t *y;

    if (nullp(y = assoc(tmp_atom(JSTR_BASE), x)))
      if (nullp(y = assoc(tmp_atom(ESTR_BASE), x)))
	return "";

    return s_atom(car(cdr(y)));
}

static char *get_pron(x)
    cell_t *x;
{
    cell_t *y;
    char *s;

    if (nullp(y = assoc(tmp_atom(JSTR_PRON), x)))
      if (nullp(y = assoc(tmp_atom(ESTR_PRON), x)))
	return "";

    s = s_atom(car(cdr(y)));

    if (strlen(s) > MIDASI_MAX)
      cha_exit_file(1, "pron `%s' is too long", s);

    return s;
}
#endif

/*
 * trim_midasi_gobi
 */
static void trim_midasi_gobi(mrph)
    mrph_t *mrph;
{
    char *gobi;

    gobi = Form[mrph->ktype][Type[mrph->ktype].basic].gobi;
    if (strcmp_tail(mrph->midasi, gobi))
      cha_exit_file(1, "midashi `%s' conflicts with katsuyou form", mrph->midasi);

    mrph->midasi[strlen(mrph->midasi) - strlen(gobi)] = '\0';
}

/*
 * trim_yomi_gobi
 */
static void trim_yomi_gobi(mrph)
    mrph_t *mrph;
{
    char *gobi;

    if (!mrph->yomi[0])
      return;

    gobi = Form[mrph->ktype][Type[mrph->ktype].basic].ygobi;
#if 1
    if (strcmp_tail(mrph->yomi, gobi))
      cha_exit_file(1, "yomi `%s' conflicts with katsuyou form", mrph->yomi);
#endif

    mrph->yomi[strlen(mrph->yomi) - strlen(gobi)] = '\0';
}

#ifdef VGRAM
/*
 * trim_pron_gobi
 */
static void trim_pron_gobi(mrph)
    mrph_t *mrph;
{
    char *gobi;

    if (!mrph->pron[0])
      return;

    gobi = Form[mrph->ktype][Type[mrph->ktype].basic].pgobi;
#if 1
    if (strcmp_tail(mrph->pron, gobi))
      cha_exit_file(1, "pron `%s' conflicts with katsuyou form", mrph->pron);
#endif

    mrph->pron[strlen(mrph->pron) - strlen(gobi)] = '\0';
}
#endif /* VGRAM */

#ifdef VGRAM
/*
 * trans_mrph
 */

static cell_t *get_mrph(block, mrph, def_weight, has_kform)
    cell_t *block;
    mrph_t *mrph;
    int def_weight;
{
#ifdef KOCHA2
    char *midasi_last;
#endif
    int katuyou;
    char *s;

    /* ʻ */
    mrph->hinsi = get_nhinsi_id(car(car(block)));
    /* ѷ */
    katuyou = Hinsi[mrph->hinsi].kt;
    if (katuyou != 1)
      mrph->ktype = mrph->kform = 0;
    else {
	mrph->ktype = get_type_id(s_atom(car(cdr(car(block)))));
	if (has_kform)
	  mrph->kform = get_form_id(s_atom(car(cdr(cdr(car(block))))),
				    mrph->ktype);
    }
    block = cdr(block);

    /* Ф */
    get_midasi_str_weight(car(block), def_weight, mrph);
    /* ʻ졦ѥơ֥Υå */
    check_table(mrph);
    if (katuyou == 1)
      trim_midasi_gobi(&mrph[0]);
    block = cdr(block);
    /* ɤ */
    if (strlen(s = s_atom(car(block))) > MIDASI_MAX)
      cha_exit_file(1, "yomi `%s' is too long", s);
    strcpy(mrph->yomi, s);
    if (katuyou == 1)
      trim_yomi_gobi(mrph);
    block = cdr(block);
    /* ȯ */
    if (strlen(s = s_atom(car(block))) > MIDASI_MAX)
      cha_exit_file(1, "pron `%s' is too long", s);
    strcpy(mrph->pron, s);
    if (katuyou == 1)
      trim_pron_gobi(mrph);
    block = cdr(block);

    /*  */
    mrph->base = s_atom(car(block));
    block = cdr(block);
    /* ̣ */
    mrph->info = s_atom(car(block));
    block = cdr(block);

#ifdef KOCHA2
    if (*mrph->yomi == '\0') 
      strcpy(mrph->yomi, mrph->midasi);
    if (katuyou == 2) {
	midasi_last = mrph->yomi + strlen(mrph->yomi) - 2;
	if (is_moeum(midasi_last))
	  mrph->ktype = get_type_id("첻");
	else if (!strcmp(midasi_last, ""))
	  mrph->ktype = get_type_id("");
	else
	  mrph->ktype = get_type_id("Ҳ");
    }
#endif

    return block;
}

static void trans_mrph(block, def_weight, fp_out)
    cell_t *block;
    int def_weight;
    FILE *fp_out;
{
    mrph_t *mrph, mrphs[256];

    mrph = mrphs;
    block = get_mrph(block, mrph, def_weight, 0);

    /* !nullp(cdr(block)) : ǸηǤΤ߳ѷʤ */
    for (mrph++; !nullp(block); block = cdr(block), mrph++) {
	get_mrph(car(block), mrph, def_weight, !nullp(cdr(block)));
	mrph->weight = 0;
    }
    mrph->hinsi = 0;

    if (mrph > mrphs + 1) {
	mrphs[1].weight = mrphs[0].weight;
	if (mrphs[0].ktype != mrph[-1].ktype)
	  trans_exit(1, ": conjugation type is different from that of the compound word", car(block));
    }

    mrph = mrphs;
    /* 촴̵ʤƤγѷϿ */
    if (mrph->midasi[0])
      print_mrph(fp_out, mrph);
    else
      print_mrph_loop(fp_out, mrph);
}
#endif /* VGRAM */

/*
 * trans_main
 */
static void *get_word(block, mrph, def_weight, has_kform, gets_midasi)
    cell_t *block;
    mrph_t *mrph;
    int def_weight;
    int gets_midasi;
{
    int    katuyou, hinsi;
    cell_t *connect_cell; /* EDRdic '94.Mar */

#ifdef VGRAM
    if ((hinsi = get_hinsi(block)) > 0)
      mrph->hinsi = hinsi;
    katuyou = Hinsi[mrph->hinsi].kt;
    mrph->base = get_base(block);        /*  */
#else
    katuyou = Class[mrph->hinsi][mrph->bunrui].kt;
#endif
    /* ѷ */
    if (katuyou != 1)
      mrph->ktype = mrph->kform = 0;
    else {
	mrph->ktype = get_ktype(block);
	if (has_kform)
	  mrph->kform = get_kform(block, mrph->ktype);
    }

    if (gets_midasi) {
	get_midasi_str_weight(car(get_midasi_list(block)), def_weight, mrph);
	/* ʻ졦ѥơ֥Υå */
	if (nullp(connect_cell = get_edrconnect(block)))
	  check_table(mrph); /* Ϣܾ */
	else
	  check_edrtable(mrph, connect_cell); /* for EDRdic '94.Mar */
	if (katuyou == 1)
	  trim_midasi_gobi(mrph);
    }

    strcpy(mrph->yomi, get_yomi(block)); /* ɤ */
    if (katuyou == 1)
      trim_yomi_gobi(mrph);

#ifdef VGRAM
    strcpy(mrph->pron, get_pron(block)); /* ȯ */
    if (katuyou == 1)
      trim_pron_gobi(mrph);
#endif

    mrph->info = get_info(block); /* ̣ */
}

static void trans_main(block, mrph0, def_weight, fp_out)
    cell_t *block;
    mrph_t *mrph0;
    int def_weight;
    FILE *fp_out;
{
    cell_t *midasi_list;
#ifdef KOCHA2
    char   *midasi_last;
#endif
    mrph_t *mrph, mrphs[256];
    int def_hinsi = mrph0->hinsi;
    int katuyou;
    cell_t *connect_cell; /* EDRdic '94.Mar */
    cell_t *cell1, *block0;

    block0 = block;
    mrph = mrphs;
    memcpy(mrphs, mrph0, sizeof(mrph_t));
    get_word(block, mrph, def_weight, 0, 0);
    mrph++;

#ifdef VGRAM
    if (!nullp(cell1 = assoc(tmp_atom(JSTR_COMPO), block)) ||
	!nullp(cell1 = assoc(tmp_atom(ESTR_COMPO), block))) {
	block = cdr(cell1);
	/* !nullp(cdr(block)) : ǸηǤΤ߳ѷʤ */
	for (; !nullp(block); block = cdr(block), mrph++) {
	    mrph->hinsi = def_hinsi;
	    get_word(car(block), mrph, def_weight, !nullp(cdr(block)), 1);
	    mrph->weight = 0;
	}
	if (mrphs[0].ktype != mrph[-1].ktype)
	  trans_exit(1, ": conjugation type is different from that of the compound word", car(block));
    }
#endif
    mrph->hinsi = 0;

    mrph = mrphs;
    /* Ф */
#ifdef VGRAM
    katuyou = Hinsi[mrph->hinsi].kt;
#else
    katuyou = Class[mrph->hinsi][mrph->bunrui].kt;
#endif
    block = block0;
    for (midasi_list = get_midasi_list(block);
	 !nullp(midasi_list);
	 midasi_list = cdr(midasi_list)) {
	get_midasi_str_weight(car(midasi_list), def_weight, mrph);
	mrphs[1].weight = mrphs[0].weight;
	/* ʻ졦ѥơ֥Υå */
	if (nullp(connect_cell = get_edrconnect(block)))
	  check_table(mrph); /* Ϣܾ */
	else
	  check_edrtable(mrph, connect_cell); /* for EDRdic '94.Mar */
	if (katuyou == 1)
	  trim_midasi_gobi(mrph);

 #ifdef KOCHA2
	if (*mrph->yomi == '\0') 
	  strcpy(mrph->yomi, mrph->midasi);
	if (katuyou == 2) {
	    midasi_last = mrph->yomi + strlen(mrph->yomi) - 2;
	    if (is_moeum(midasi_last))
		mrph->ktype = get_type_id("첻");
	    else if (!strcmp(midasi_last, ""))
		mrph->ktype = get_type_id("");
	    else
		mrph->ktype = get_type_id("Ҳ");
	}
#endif

	/* 촴̵ʤƤγѷϿ */
	if (mrph->midasi[0])
	  print_mrph(fp_out, mrph);
	else
	  print_mrph_loop(fp_out, mrph);
    }
}

void trans(fp_in, fp_out)
    FILE *fp_in, *fp_out;
{
    mrph_t mrphs[2], *mrph;
    cell_t *cell, *main_loop, *main_block, *sub_loop, *sub_block;
    int hinsi, weight = MRPH_WEIGHT_MAX;

    mrph = mrphs;
    mrphs[1].hinsi = 0;
    mrph->kform = 0;
    hinsi = -1;

    while (!s_feof(fp_in)) {
	cell = s_read(fp_in);
	if (atomp(cell))
	  trans_exit(1, "is not list", cell);

#ifdef VGRAM
	if (atomp(car(cell))) {
	    char *s = s_atom_val(car(cell));
	    if (strmatch2(s, JSTR_POS, ESTR_POS))
	      hinsi = get_nhinsi_id(car(cdr(cell)));
	    else if (strmatch2(s, JSTR_DEF_POS_COST, ESTR_DEF_POS_COST))
	      weight = atoi(s_atom_val(car(cdr(cell))));
	    else if (strmatch2(s, JSTR_MRPH, ESTR_MRPH))
	      trans_mrph(cdr(cell), weight, fp_out);
	    else {
		/* upper compatible for old format */
		char *hinsi_str[256];
		char **hinsi = hinsi_str;
		for (; atomp(car(cell)); cell = car(cdr(cell)))
		  *hinsi++ = s_atom_val(car(cell));
		*hinsi = NULL;
		mrph->hinsi = get_nhinsi_str_id(hinsi_str);
		trans_main(cell, mrph, weight, fp_out);
	    }
	} else {
	    if (hinsi < 0)
	      cha_exit_file(1, "hinsi is not defined");
	    mrph->hinsi = hinsi;
	    trans_main(cell, mrph, weight, fp_out);
	}
#else /* !VGRAM */
	/* ʻ */
	mrph->hinsi = get_hinsi_id(s_atom(car(cell)));
	for (main_loop = cdr(cell);
	     !nullp(main_block = car(main_loop));
	     main_loop = cdr(main_loop)) {
	    /* ʬब */
	    if (atomp(car(main_block))) {
		mrph->bunrui =
		  get_bunrui_id(s_atom_val(car(main_block)), mrph->hinsi);
		sub_loop = cdr(main_block);
		while (!nullp(sub_block = car(sub_loop))) {
		    trans_main(sub_block, mrph, weight, fp_out);
		    sub_loop = cdr(sub_loop);
		}
	    } 
	    /* ʬबʤ */
	    else {
		mrph->bunrui = 0;
		trans_main(main_block, mrph, weight, fp_out);
	    }
	}
#endif /* !VGRAM */
	s_free(cell);
    }
}
