/*
 *	chalib.c
 *
 *	last modified by A.Kitauchi <akira-k@is.aist-nara.ac.jp>, Sep. 1996
 */

#include "chalib.h"
#include "pat.h"

#ifdef KOCHA
#define CHA_NAME       "KoCha"
#else
#define CHA_NAME       "ChaSen"
#endif

int Cost_width;
int Lang_j = 0, Lang_e = 0;

static int cost_width0;
/* ե̾ */
static char patdic_filename[MAX_DIC_NUMBER][CHA_FILENAME_MAX];
static char sufdic_filename[MAX_DIC_NUMBER][CHA_FILENAME_MAX];
char *sufdic_file[MAX_DIC_NUMBER];
int num_sufdic_file = 0;
static int obj_dic_no = 0; /* ưŪ(ɲ)оݤȤʤ뼭ֹ */

static int opt_show = 'b', opt_form = 'f', opt_ja, opt_cmd, opt_nobk;
static char *opt_form_string;

/*
 *  cha_version()
 */
void cha_version(fp)
    FILE *fp;
{
    if (!fp)
      return;

    fprintf(fp, "%s version %s (c) 1996-1999 Nara Institute of Science and Technology\n",
	    CHA_NAME, VERSION);
#ifndef VGRAM
    fprintf(fp, "Grammar files are compatible with JUMAN 2.0.\n");
#else
    fprintf(fp, "Grammar files are in ChaSen's new v-gram format.\n");
#endif
}

/*
 * set_opt_form()
 */
void set_opt_form(format)
    char *format;
{
    char *f;

#if 0
    if (opt_form_string != NULL)
      free(opt_form_string);
#endif

    /* -[fecdv] */
    if (format &&
	format[0] == '-' && strchr("fecdv", format[1]) && format[2] == '\0') {
	opt_form = format[1];
	format = NULL;
    }

    if (format == NULL) {
	if (opt_form == 'd' || opt_form == 'v')
	  opt_show = 'm';
        switch (opt_form) {
#ifdef VGRAM
	  case 'd':
	    opt_form_string = "morph(%pi,%ps,%pe,%pc,'%m','%U(%y)','%M',%U(%P'),NIL,%T0,%F0,'%I0',%c,[%ppc,],[%ppi,])";
	    break;
	  case 'v':
	    opt_form_string = "%pb%3pi %3ps %3pe %5pc %m\t%U(%y)\t%U(%a)\t%M\t%U(%P-) NIL %T0 %F0 %I0 %c %ppi, %ppc,\n";
	    break;
	  case 'f':
	    opt_form_string = "%m\t%y\t%M\t%U(%P-)\t%T \t%F \n";
	    break;
	  case 'e':
	    opt_form_string = "%m\t%U(%y)\t%M\t%P- %h %T* %t %F* %f\n";
	    break;
	  case 'c':
	    opt_form_string = "%m\t%y\t%M\t%h %t %f\n";
	    break;
#else
	  case 'd':
	    opt_form_string = "morph(%pi,%ps,%pe,%pc,'%m','%U(%y)','%M',%H,%BB,%T0,%F0,'%I0',%c,[%ppc,],[%ppi,])";
	    break;
	  case 'v':
	    opt_form_string = "%3pi %3ps %3pe %5pc %m %U(%y) %M %H %BB %T0 %F0 %I0 %c %ppi, %ppc,\n";
	    break;
	  case 'f':
	    opt_form_string = "%-11m %-11y %-11M %-14U(%BB) %-14T  %F \n";
	    break;
	  case 'e':
	    opt_form_string = "%m %U(%y) %M %H %h %B* %b %T* %t %F* %f\n";
	    break;
	  case 'c':
	    opt_form_string = "%m %y %M %h %b %t %f\n";
	    break;
#endif
	}
	return;
    }

    /* format string */
    /*    opt_form_string = convert_escape(cha_strdup(format), 1);*/
    opt_form_string = format;

    f = opt_form_string + strlen(opt_form_string);
    if (f[-1] == '\n')
      opt_form = 'F';
    else
      opt_form = 'W';
}

/*
 * set_language()
 */
void set_language(langstr)
    char *langstr;
{
    char *s;

    Lang_j = Lang_e = 0;
    for (s = langstr; *s; s++) {
	if (*s == 'j')
	  Lang_j = 1;
	else if (*s == 'e')
	  Lang_e = 1;
    }
}

/*
 * set_cost_width()
 */
void set_cost_width(cw)
    int cw;
{
    cost_width0 = cw * MRPH_DEFAULT_WEIGHT;

    /* ŬʳɽȤ Cost_width  */
    if (opt_show != 'b')
      Cost_width = cost_width0;
}

/*
 * chasen_getopt_argv - initialize and read options
 *
 * return value:
 *   0 - ok
 *   1 - error
 */
int chasen_getopt_argv(argv, fp)
    char **argv;
    FILE *fp;
{
    int c;

    /* read -r option */
    Cha_optind = 0;
    while ((c = cha_getopt_chasen(argv, fp)) != EOF) {
	switch (c) {
	  case 'r':
	    /* chasenrc file */
	    set_chasenrc_path(Cha_optarg);
	    break;
	  case '?': return 1;
	}
    }

    /* initialize if not done */
    if (!Undef_info_num)
      chasen_init();

    /* read options */
    Cha_optind = 0;
    while ((c = cha_getopt_chasen(argv, fp)) != EOF) {
	switch (c) {
	  case 'b':
	  case 'm':
	  case 'p': opt_show = c; break;
	  case 'd':
	  case 'v':
	  case 'f':
	  case 'e':
	  case 'c':
	    opt_form = c;
	    set_opt_form(NULL);
	    break;
	  case 'F':
	    set_opt_form(convert_escape(cha_strdup(Cha_optarg), 0));
	    break;
	  case 'L':
	    set_language(Cha_optarg);
	    break;
	  case 'w': /* λ */
	    set_cost_width(atoi(Cha_optarg));
	    break;
	  case 'l':
	    set_cha_output(stdout);
	    switch (*Cha_optarg) {
	      case 'p':
		/* display the list of Hinsi table */
		cha_print_hinsi_table();
		exit(0);
		break;
	      case 't':
		cha_print_ctype_table();
		exit(0);
		break;
	      case 'f':
		cha_print_cform_table();
		exit(0);
		break;
	      default:
		break;
	    }
	    break;
	  case 'j': opt_ja = 1; break;
	  case 'B': opt_nobk = 1; break;
	  case 'C': opt_cmd = 1; break;
#if 0 /* not necessary */
	  case '?': return 1;
#endif
	}
    }

    /* ŬʳɽȤ Cost_width  */
    if (opt_show != 'b')
      Cost_width = cost_width0;

    return 0;
}

/***********************************************************************
 * command_usage()
 ***********************************************************************/
static void command_usage()
{
    static char *message[] = {
	"commands are:\n",
	"#V        print ChaSen version\n",
	"#F format show morpheme with formatted output\n",
	"#w num    change the cost width  ex. #w 500\n",
	"#i        various information\n",
	"#e word   check if the word exists in the dictionary  ex. #e \n",
	"#a        resister the word into the dictionary\n",
	"#f        designate the dictionary which a word resistered\n",
	"#s        save the patricia tree after resistering the words\n",
	"#h        show this help\n",
	"#q        quit\n",
	NULL
    };
    char **mes;

    for (mes = message; *mes; mes++)
      fputs(*mes, stdout);
}

#if 1
/*
 * chomp a string
 */
static void chomp(str)
    char *str;
{
    int len;

    len = strlen(str);
    if (str[len - 1] == '\n')
      str[--len] = '\0';
    if (str[len - 1] == '\r')
      str[--len] = '\0';
}
#endif

/***********************************************************************
 * chasen_command()
 *
 * return value:
 *     0 - succeed
 *     1 - quit chasen
 ***********************************************************************/
static int chasen_command(comm)
    char *comm;
{
    char *arg;
    int i, j;
    char *rslt[256]; /* ѿ for ñå(exact match) */
    FILE *of; /* intե˽񤭹(ɲ)Τ */
    long new_word_index;
    char tmpstr[2000];

    arg = comm + 2;
    chomp(arg);

    switch(comm[0]) {	/* command */
      case 'V':
	cha_version(stdout);
	break;
      case 'F':
	set_opt_form(convert_escape(cha_strdup(arg), 0));
	break;
      case 'L':
	set_language(Cha_optarg);
	break;
      case 'w':
	/* cost width */
	set_cost_width(atoi(arg));
	break;
      case 'i':
	/* information */
	cha_version(stdout);
	printf("\n()        %d\n",Cost_width);
	printf("ϢܥȽŤ()  %d\n",Con_cost_weight);
	printf("ǥȽŤ      %d\n",Mrph_cost_weight);
	printf("ϥեޥå      %s",
	       opt_form_string ? opt_form_string : "(ʤ)");
	printf("\nchasenrcե\n\t%s\n",get_chasenrc_path());
	printf("ʸˡե\n\t%s\n",get_grammar_dir());
	printf("ե\n");
	for(i = 0; patdic_filename[i][0]; i++)
	  printf("\t%s\n",patdic_filename[i]);
	printf("ưŪоݤȤʤ뼭\n\t%s\n",
	       patdic_filename[obj_dic_no]);
	break;
      case 'f':
	/* оݤȤʤ뼭ѹ  file name -> dic No. */
	for(i = 0; patdic_filename[i][0]; i++){
	    printf("\t%s\n",patdic_filename[i]);
	    if(strcmp(patdic_filename[i], arg) == 0){
		obj_dic_no = i; /* ưŪ(ɲ)оݤȤʤ뼭ֹ */
		printf("dic number = %d\n",obj_dic_no);
		/* 񤭹߶ػߤʤХ顼ˤ */
		break;
	    }
	}
	break;
      case 'a':
	/* ѥڤؤñɲá*/
	if(strlen(arg) < 4){printf("invalid format\n");break;}
	/* ե礭 = Υǥå */
#ifdef NO_MMAP
	{
	    struct stat st;
	    fstat(fileno(dic_file[obj_dic_no]), &st);
	    new_word_index = st.st_size;
	}
#else
	new_word_index = dicinfo[obj_dic_no].size;
#endif
	/* ɲäñintեɲ */
	sprintf(tmpstr,"%s.int",patdic_filename[obj_dic_no]);
	of = cha_fopen(tmpstr,"a",1);
	fputs(arg, of);
	fputc(0, of);
	printf("add [%s] at %ld\n", arg, new_word_index);
	fclose(of);
	/* ɤ߹ѤintեĤƤޤ */
	fclose(dic_file[obj_dic_no]);
	dic_file[obj_dic_no] = cha_fopen(tmpstr, "r", 1);
	/* ޥåפľƤ餦ν */
	dicinfo[obj_dic_no].used = 0;
	(void)pat_insert(dic_file[obj_dic_no],
			 arg, new_word_index, /*113, */
			 &tree_top[obj_dic_no], "\t");
	break;
      case 's':
	/* ڤΥ */
	sprintf(tmpstr,"%s.pat",patdic_filename[obj_dic_no]);
	(void)com_s(tmpstr,&tree_top[obj_dic_no]);
	break;
      case 'e':
	/* θ (exact match) */
	for(i = 0; patdic_filename[i][0]; i++){
	    mrph2_t mrph;
	    printf("DIC No. %d   \"%s\"\n",i,patdic_filename[i]);
	    (void)pat_search_exact(dic_file[i],arg, &tree_top[i],rslt);
	    if(!rslt[0]) printf("Not Found.\n");
	    else {
		char **pbuf;
		for (pbuf = rslt; *pbuf; pbuf++) {
		    get_mrph_data(&mrph, *pbuf, arg);
		    if (
#ifdef VGRAM
			Hinsi[mrph.hinsi].kt &&
#else
			Class[mrph.hinsi][mrph.bunrui].kt &&
#endif
			mrph.kform) {
			mrph.base_length = 0;
			mrph.yomi = "";
		    }
		    printf_mrph(0, &mrph, opt_form_string);
		}
	    }
	}
	break;
      case 'q': /* quit */
	return 1;
      case 'h':
	command_usage();
	break;
      default:
	printf("invalid command: %s\n",comm);
    }

    fputs("ok\n", stdout);
    fflush(stdout);

    return 0;
}

/*
 * parse a string and output to fp or str
 *
 * return value:
 *     0 - ok / no result / too many morphs
 *     1 - quit
 */
static int chasen_sparse_main(input, output)
    char *input;
    FILE *output;
{
    char *crlf;

    /* initialize if not done */
    if (!Undef_info_num)
      chasen_init();
    if (!opt_form_string)
      set_opt_form(NULL);

#if 0
    /* βԥɤ */
    chomp(input);
#endif

    set_cha_output(output);

    if (input[0] == '\0') {
#ifdef VGRAM
	if (!opt_ja)
	  cha_print_bos_eos(opt_form);
#endif
	return 0;
    }

    /* ޥɡ󥿥ץ꥿ */
    if (opt_cmd && *input == '#')
      return chasen_command(input + 1);

    /* conversion of ISO-2022-JP string to EUC-JP */
/*    jis_to_euc(input);*/

    /* parse a sentence and print */
    while (*input) {
	int c, len;
	if ((crlf = strpbrk(input, "\r\n")) == NULL)
	  len = strlen(input);
	else {
	    len = crlf - input;
	    c = *crlf;
	    *crlf = '\0';
	}
#ifdef SJIS
	sjis2euc(input);
#endif
	if (len > 0 && !chasen_sent(input, len, opt_nobk)) {
	    cha_print_path(opt_show, opt_form, opt_form_string);
	}
#ifdef VGRAM
	else if (!opt_ja)
	  cha_print_bos_eos(opt_form);
#endif
	if (crlf == NULL)
	  break;
	if (c == '\r' && crlf[1] == '\n')
	  input = crlf + 2;
	else
	  input = crlf + 1;
    }

    return 0;
}

/*
 * read from file/str, parse, and write to file
 * 
 * return value:
 *     0 - ok / no result / too many morphs
 *     1 - quit / eof
 */
/*
 * file -> file
 */
int chasen_fparse(fp_in, fp_out)
    FILE *fp_in, *fp_out;
{
    char line[CHA_INPUT_SIZE];

    if (cha_fgets(line, sizeof(line), fp_in) == NULL)
      return 1;

    return chasen_sparse_main(line, fp_out);
}
/*
 * string -> file
 */
int chasen_sparse(str_in, fp_out)
    char *str_in;
    FILE *fp_out;
{
    int rc;
    char *euc_str;

    euc_str = cha_malloc(strlen(str_in) + 1);
    cha_jistoeuc(str_in, euc_str);
    rc = chasen_sparse_main(euc_str, fp_out);
    free(euc_str);

    return rc;
}

/*
 * read from file/str, parse, and output to string
 * 
 * return value: string
 *     !NULL - ok / no result / too many morphs
 *     NULL - quit / eof
 */

/*
 * file -> string
 */
char *chasen_fparse_tostr(fp_in)
    FILE *fp_in;
{
    char line[CHA_INPUT_SIZE];

    if (cha_fgets(line, sizeof(line), fp_in) == NULL)
      return NULL;

    if (chasen_sparse_main(line, NULL))
      return NULL;

    return get_cha_output();
}

/*
 * string -> string
 */
char *chasen_sparse_tostr(str_in)
    char *str_in;
{
    char *euc_str;

    euc_str = cha_malloc(strlen(str_in) + 1);
    cha_jistoeuc(str_in, euc_str);

    if (chasen_sparse_main(euc_str, NULL))
      return NULL;

    free(euc_str);

    return get_cha_output();
}

char *cha_fgets(s, n, fp)
    char *s;
    int n;
    FILE *fp;
{
    if (opt_ja)
      return jfgets(s, n, fp);
    else
      return fget_line(s, n, fp);
}

static void set_dic_filename(filename, s)
    char *filename, *s;
{
#ifdef _WIN32
    if (*s == '\\' || *s && s[1] == ':')
      strcpy(filename, s);
#else
    if (*s == '/')
      strcpy(filename, s);
#endif
    else
      sprintf(filename, "%s%s", get_grammar_dir(), s);
}

/*
 * read_patdic - read patricia dictionaries
 */
void read_patdic(cell)
    cell_t *cell;
{
    int  num;
    char filename[CHA_FILENAME_MAX];

    /* return if already read */
    if (patdic_filename[0][0])
      return;

    for (num = 0; !nullp(cell); num++, cell = cdr(cell)) {
	if (num >= MAX_DIC_NUMBER)
	  cha_exit_file(1, "too many patricia dictionary files");
	set_dic_filename(&patdic_filename[num], s_atom(car(cell)));

	/* Υץ */
	sprintf(filename, "%s.int", patdic_filename[num]);
	/* .chasenrc ɤ߹ʤΤ cha_fopen ϻȤʤ */
	if ((dic_file[num] = fopen(filename, "r")) == NULL)
	  cha_exit_perror(filename);

	sprintf(filename, "%s.pat", patdic_filename[num]);
	(void)pat_init_tree_top(&tree_top[num]);
	(void)com_l(filename, &tree_top[num]);
    }
    number_of_tree = num;
}

/*
 * read_sufdic - read SUFARY dictionaries
 */
void read_sufdic(cell)
    cell_t *cell;
{
    int  num;
    char *s;
    char filename[CHA_FILENAME_MAX];
    char ary_filename[CHA_FILENAME_MAX];

    /* return if already read */
    if (sufdic_filename[0][0])
      return;

    for (num = 0; !nullp(cell); num++, cell = cdr(cell)) {
	if (num >= MAX_DIC_NUMBER)
	  cha_exit_file(1, "too many SUFARY dictionary files");
	set_dic_filename(&sufdic_filename[num], s_atom(car(cell)));

	/* Υץ */
	sprintf(filename, "%s.int", sufdic_filename[num]);
	sprintf(ary_filename, "%s.ary", sufdic_filename[num]);
	sufdic_file[num] = (char *)sa_openfiles(filename, ary_filename);
    }
    num_sufdic_file = num;
}

