/*
 * parse.c - parse a sentence
 *
 * Copyright (C) 1996,1997 Nara Institute of Science and Technology
 *
 * Modified by: A.Kitauchi <akira-k@is.aist-nara.ac.jp>, Oct. 1996
 */

#include "chalib.h"
#include "pat.h"
#include "sufary.h"

#define MRPH_NUM	        1024
#define PATH1_NUM		256

#define HANKAKU            	0x80
#define PRIOD            	0xa1a5
#define CHOON            	0xa1bc
#define KIGOU            	0xa3b0
#define SUJI           	        0xa3c0
#define ALPH            	0xa4a0
#define HIRAGANA                0xa5a0
#define KATAKANA                0xa6a0
#define GR                      0xb0a0
#define KANJI                   0xffff
#define ILLEGAL                 1

#define is_spc(c)    ((c)==' '||(c)=='\t')

mrph2_t *Mrph = NULL;
path_t *Path = NULL;
int Path_num;

extern char *sufdic_file[MAX_DIC_NUMBER];

/***********************************************************************
 * malloc_chars
 ***********************************************************************/
#define CHA_MALLOC_SIZE (1024 * 64)
#define malloc_char(n)     malloc_chars(1, n)
#define malloc_short(n)    malloc_chars(2, n)
#define malloc_int(n)      malloc_chars(4, n)
#define free_chars()       malloc_chars(0, 0)
static void *malloc_chars(size, nitems)
    int nitems, size;
{
    static char *buffer_ptr[128];
    static int buffer_ptr_num = 0;
    static int buffer_idx = CHA_MALLOC_SIZE;

    if (nitems == 0) {
	/* free */
	if (buffer_ptr_num > 0) {
	    while (buffer_ptr_num > 1)
	      free(buffer_ptr[--buffer_ptr_num]);
	    buffer_idx = 0;
	}
	return NULL;
    } else {
	if (size > 1) {
	    /* size ǳ꤭ͤ */
	    buffer_idx+= size - (buffer_idx & (size - 1));
	    nitems *= size;
	}

	if (buffer_idx + nitems >= CHA_MALLOC_SIZE) {
	    if (buffer_ptr_num == 128)
	      cha_exit(1, "Can't allocate memory");
	    buffer_ptr[buffer_ptr_num++] = cha_malloc(CHA_MALLOC_SIZE);
	    buffer_idx = 0;
	}

	buffer_idx += nitems;
	return buffer_ptr[buffer_ptr_num - 1] + buffer_idx - nitems;
    }
}

static void *malloc_free_block(ptr, nblockp, size, do_free)
    void *ptr;
    int *nblockp, size, do_free;
{
    if (do_free) {
	/* free and malloc one block */
	if (*nblockp > 1) {
#if 0
	    printf("# free block (%d)\n",size); fflush(stdout);
#endif
	    free(ptr);
	    *nblockp = 0;
	}
	if (*nblockp == 0)
	  ptr = malloc_free_block(ptr, nblockp, size, 0);
    } else {
	/* realloc one block larger */
	if (*nblockp == 0)
	  ptr = malloc(size * ++*nblockp);
	else {
#if 1
	    ptr = realloc(ptr, size * ++*nblockp);
#else
	    {
		char *ptr2;
		ptr2 = cha_malloc(size * (*nblockp + 1));
		memcpy(ptr2, ptr, size * *nblockp);
		(*nblockp)++;
		free(ptr);
		ptr = ptr2;
	    }
#endif
#if 0
	printf("# %s block (%d*%d)\n",*nblockp?"realloc":"malloc",size,*nblockp); 
	fflush(stdout);
#endif
	}
#if 0
	if (ptr == NULL)
	  printf("# Can't allocate memory"); fflush(stdout);
#endif
    }

    return ptr;
}

#define malloc_path()  malloc_free_path(0)
#define free_path()    malloc_free_path(1)
static int malloc_free_path(do_free)
    int do_free;
{
    static int nblock = 0;

#if 0
    printf("# path %d:%d ", nblock, Path_num);
#endif

    Path = malloc_free_block((void *)Path, &nblock,
			     sizeof(path_t) * PATH_NUM, do_free);

    return Path == NULL;
}

#define malloc_mrph()  malloc_free_mrph(0)
#define free_mrph()    malloc_free_mrph(1)
static int malloc_free_mrph(do_free)
    int do_free;
{
    static int nblock = 0;

#if 0
    printf("# mrph %d ", nblock);
#endif
    Mrph = malloc_free_block((void *)Mrph, &nblock,
			     sizeof(mrph2_t) * MRPH_NUM, do_free);

    return Mrph == NULL;
}

#if 0
/***********************************************************************
 * check_code()
 ***********************************************************************/
static int check_code(str)
    char *str;
{
    int	code;
    unsigned char *s = (unsigned char *)str;

    /* nyuuryoku chuuni hankaku space wo yurusu, by. T.U. '96.01.10 */
#if 1
    if (*s == '\0' || *s == ' ' || *s == '\r' || *s == '\n')
      return 0;
#else
    if (*s == '\0')
      return 0;
#endif
    else if (*s < HANKAKU)
      return HANKAKU;
    else if (*(s+1) < HANKAKU)
      return ILLEGAL;

    code = *s * 256 + *(s + 1);

    if (code == PRIOD)        return PRIOD;
    else if (code == CHOON)   return CHOON;
    else if (code < KIGOU)    return KIGOU;
#if 0
    else if (code < SUJI)     return SUJI;
#endif
    else if (code < ALPH)     return ALPH;
    else if (code < HIRAGANA) return HIRAGANA;
    else if (code < KATAKANA) return KATAKANA;
    else if (code < GR)       return GR;
    else return KANJI;
}
#endif

#if 0
/***********************************************************************
 * undef_mrph_len - ̤ĹĴ٤
 *
 * Ҥ餬ʡ޻ʤ: 1ʸ
 * : Ϣ³ʸ
 ************************************************************************/
static int undef_mrph_len(target)
    char *target;
{
    int code, next_code;
    int len = 0;

    code = check_code(target);

    if (code == HIRAGANA || code == KANJI)
      return 2;

    do {
	if (code == HANKAKU || code == ILLEGAL)
	  len++;
	else
	  len += 2;
	next_code = check_code(target + len);
    } while (next_code == code
	     || (code == KATAKANA && next_code == CHOON)
	     || (code == ALPH     && next_code == PRIOD));

    return len;
}
#endif

/***********************************************************************
 * register_undef_mrph1 - ̤Хåեɲ
 ***********************************************************************/
static int register_undef_mrph1(target, mrph_idx, undef_len, no)
    char *target;
    int mrph_idx, undef_len, no;
{
#if 0
    int undef_len;
#endif
    mrph2_t *mrph = &Mrph[mrph_idx];

#if 0
    undef_len = undef_mrph_len(target);
#endif

#if 0
    mrph->midasi = (char *)malloc_char(undef_len + 1);
    memcpy(mrph->midasi, target, undef_len);
    mrph->midasi[undef_len] = '\0';
#else
    mrph->midasi = target;
#endif
    mrph->yomi = "";
    mrph->base_length = mrph->length = undef_len;
#ifdef VGRAM
    mrph->base = "";
    mrph->pron = "";
    mrph->comp = "\n";
#endif

    mrph->hinsi = Undef_info[no].hinsi;
#ifndef VGRAM
    mrph->bunrui = Undef_info[no].bunrui;
#endif
    mrph->con_tbl = Undef_info[no].con_tbl;
    mrph->ktype = 0;
    mrph->kform = 0;
    mrph->is_undef = no + 1; /* ̤ */
    mrph->weight = MRPH_DEFAULT_WEIGHT;
    mrph->info = ""; /* ̣϶ʸȤ롥 */

    if (++mrph_idx % MRPH_NUM == 0 && malloc_mrph())
      return FALSE;

    return TRUE;
}

#if 0
static int register_undef_mrph(target, mrph_idx, undef_len)
    char *target;
    int mrph_idx, undef_len;
{
    int no;

    for (no = 0; no < Undef_info_num; no++)
      if (register_undef_mrph1(target, mrph_idx+no, undef_len, no) == FALSE)
	return FALSE;

    return TRUE;
}
#endif

/***********************************************************************
 * get_mrph_data
 ***********************************************************************/
#ifdef VGRAM
#define DELI_MIDASI '\0'
#define DELI_YOMI '\0'
#else
#define DELI_MIDASI '\t'
#define DELI_YOMI ' '
#endif
void get_mrph_data(mrph, pbuf, target)
    mrph2_t *mrph;
    char *pbuf, *target;
{
    char *p, *s;
    int  i;

    p = pbuf;

    mrph->midasi = target;
    mrph->is_undef = 0;

    /* ФĹ */
    for (i = 0; *p++ != DELI_MIDASI; i++);
    mrph->base_length = mrph->length = i;
    /* ʻʬ No. */
    if (p[1] == DELI_YOMI) {
	mrph->hinsi = p[0]-'0'; p+=2;
    } else if (p[2] == DELI_YOMI) {
	mrph->hinsi = (p[0]-'0')*10 + (p[1]-'0'); p+=3;
    } else {
	mrph->hinsi = (p[0]-'0')*100 + (p[1]-'0')*10 + (p[2]-'0'); p+=4;
    }
#ifndef VGRAM
    /* ʻʬ No. */
    for (i = 0; *p != DELI_YOMI; i = i * 10 + *p++ - '0');
    mrph->bunrui = i; p++;
#endif
    /* ѷ No. */
    if (p[1] == DELI_YOMI) {
	mrph->ktype = p[0]-'0'; p+=2;
    } else if (p[2] == DELI_YOMI) {
	mrph->ktype = (p[0]-'0')*10 + (p[1]-'0'); p+=3;
    } else {
	mrph->ktype = (p[0]-'0')*100 + (p[1]-'0')*10 + (p[2]-'0'); p+=4;
    }
    /* ѷ No. */
    if (p[1] == DELI_YOMI) {
	mrph->kform = p[0]-'0'; p+=2;
    } else if (p[2] == DELI_YOMI) {
	mrph->kform = (p[0]-'0')*10 + (p[1]-'0'); p+=3;
    } else {
	mrph->kform = (p[0]-'0')*100 + (p[1]-'0')*10 + (p[2]-'0'); p+=4;
    }
    /* Ť */
#ifndef VGRAM
    if (*p < '0' || *p > '9')
      cha_exit(1, "The format of the patricia dictionary is invalid.");
#endif
    if (p[1] == DELI_YOMI) {
	mrph->weight = p[0]-'0'; p+=2;
    } else if (p[2] == DELI_YOMI) {
	mrph->weight = (p[0]-'0')*10 + (p[1]-'0'); p+=3;
    } else if (p[3] == DELI_YOMI) {
	mrph->weight = (p[0]-'0')*100 + (p[1]-'0')*10 + (p[2]-'0'); p+=4;
    } else if (p[4] == DELI_YOMI) {
	mrph->weight = (p[0]-'0')*1000 + (p[1]-'0')*100 + (p[2]-'0')*10
	  + (p[3]-'0'); p+=5;
    } else {
	mrph->weight = (p[0]-'0')*10000 + (p[1]-'0')*1000 + (p[2]-'0')*100
	  + (p[3]-'0')*10 + (p[4]-'0'); p+=6;
    }
    /* ɤ */
    mrph->yomi = p;
    p = strchr(p, DELI_YOMI) + 1;
#ifdef VGRAM
    /* ȯ */
    mrph->pron = p;
    p += strlen(p) + 1;
    /*  */
    mrph->base = p;
    p += strlen(p) + 1;
    /* ³ơֹ֥ */
    if (*p < '0' || *p > '9')
      cha_exit(1, "The format of the patricia dictionary is invalid.");
#endif
    if (p[1] == DELI_YOMI) {
	mrph->con_tbl = p[0]-'0'; p+=2;
    } else if (p[2] == DELI_YOMI) {
	mrph->con_tbl = (p[0]-'0')*10 + (p[1]-'0'); p+=3;
    } else if (p[3] == DELI_YOMI) {
	mrph->con_tbl = (p[0]-'0')*100 + (p[1]-'0')*10 + (p[2]-'0'); p+=4;
    } else if (p[4] == DELI_YOMI) {
	mrph->con_tbl = (p[0]-'0')*1000 + (p[1]-'0')*100 + (p[2]-'0')*10
	  + (p[3]-'0'); p+=5;
    } else {
	mrph->con_tbl = (p[0]-'0')*10000 + (p[1]-'0')*1000 + (p[2]-'0')*100
	  + (p[3]-'0')*10 + (p[4]-'0'); p+=6;
    }
    /* ̣ */
    mrph->info = p;

#ifdef VGRAM
    p = strchr(p, DELI_YOMI) + 1;
    /* ʣ */
    mrph->comp = p;
#endif
}

/***********************************************************************
 * compare_top_str1 - compare strings to prefix of s1
 ***********************************************************************/
static int compare_top_str1(s1, s2)
    char *s1, *s2;
{
    /* 1996.07.28 by akira-k */
    for (;;) {
	if (*s1 == '\0') return TRUE;
	if (*s1++ != *s2++) return FALSE;
    }
}

/***********************************************************************
 * katuyou_process - find all forms which match the follow string
 ***********************************************************************/
static int *katuyou_process(follows, ktype)
    char *follows;
    int ktype;
{
    static int formstr[FORM_NO];
    int *fs = formstr;
    int f;

    for (f = 1; Form[ktype][f].name; f++)
      if (compare_top_str1(Form[ktype][f].gobi, follows))
	*fs++ = f;
    *fs = 0;

    return formstr;
}

/***********************************************************************
 * register_mrph - ѤĴ٤ʤ1ĤηǤХåեɲ
 *
 * mrph_nump: ХåեɲäǤο
 ************************************************************************/
static int register_mrph(mrph_idx, nmrphp)
    int mrph_idx, *nmrphp;
{
    int new_mrph_idx = mrph_idx;
    mrph2_t *new_mrph = &Mrph[mrph_idx];

    if (
#ifdef VGRAM
	Hinsi[new_mrph->hinsi].kt == 1
#else
	Class[new_mrph->hinsi][new_mrph->bunrui].kt
#endif
	) {
	/* Ѥ */
	if (new_mrph->kform == 0) {
	    /* 촴 */
	    int *f;
	    int ktype = new_mrph->ktype;
	    int length = new_mrph->length;
	    int con_tbl = new_mrph->con_tbl;
	    char *follows = new_mrph->midasi + new_mrph->base_length;
	    int new_mrph_idx0 = new_mrph_idx;
	    for (f = katuyou_process(follows, ktype); *f; f++) {
		if (new_mrph_idx != new_mrph_idx0)
		  *new_mrph = Mrph[new_mrph_idx0];
		new_mrph->kform = *f;
		new_mrph->length = length + strlen(Form[ktype][*f].gobi);
		new_mrph->con_tbl = con_tbl + *f - 1;
		if (++new_mrph_idx % MRPH_NUM == 0 && malloc_mrph())
		  return FALSE;
		new_mrph = &Mrph[new_mrph_idx];
	    }
	} else {
	    /* 촴ʤ */
	    new_mrph->base_length = 0;
	    new_mrph->yomi = "";
#ifdef VGRAM
	    new_mrph->pron = "";
#endif
	    if (++new_mrph_idx % MRPH_NUM == 0 && malloc_mrph())
	      return FALSE;
	}
    } else {                         /* Ѥʤ */
	if (++new_mrph_idx % MRPH_NUM == 0 && malloc_mrph())
	  return FALSE;
    }

    *nmrphp = new_mrph_idx - mrph_idx;

    return TRUE;
}

/***********************************************************************
 * convert_pat_mrphs - ѤĴ٤ʤǤХåեɲ
 *
 * mrph_nump: ХåեɲäǤο
 ***********************************************************************/
static int convert_pat_mrphs(target, dic_buffer, mrph_idx, mrph_nump)
    char *target;
    char **dic_buffer;
    int mrph_idx, *mrph_nump;
{
    int nmrph;
    int new_mrph_idx = mrph_idx;
    char **pbuf;

    for (pbuf = dic_buffer; *pbuf; pbuf++) {
#if 0
	fprintf(stdout, "line: %s\n", *pbuf); fflush(stdout);
#endif
	get_mrph_data(&Mrph[new_mrph_idx], *pbuf, target);
	if (register_mrph(new_mrph_idx, &nmrph) == FALSE)
	  return FALSE;
	new_mrph_idx += nmrph;
    }
    *mrph_nump = new_mrph_idx - mrph_idx;

    return TRUE;
}

/***********************************************************************
 * convert_suf_mrphs - ѤĴ٤ʤǤХåեɲ
 *
 * mrph_nump: ХåեɲäǤο
 ***********************************************************************/
static int convert_suf_mrphs(target, dic_buffer, file, mrph_idx, mrph_nump)
    char *target;
    char *file;
    long *dic_buffer;
    int mrph_idx, *mrph_nump;
{
    int nmrph;
    int new_mrph_idx = mrph_idx;
    int i, nsuf = (int)dic_buffer[0];

    for (i = 1; i <= nsuf; i++) {
#if 0
	char *line = (char *)sa_getline(file, dic_buffer[i]);
#endif
	char *line = (char *)(((SUFARY *)file)->txtmap + dic_buffer[i]);
	if (0) {
	  char s[1024];
	  memset(s,0,1024);
	  memccpy(s,line,'\n',256);
	  printf("line: %s",s);fflush(stdout);
	}
	get_mrph_data(&Mrph[new_mrph_idx], line, target);
	if (register_mrph(new_mrph_idx, &nmrph) == FALSE)
	  return FALSE;
	new_mrph_idx += nmrph;
    }
    *mrph_nump = new_mrph_idx - mrph_idx;

    return TRUE;
}

#if 0
/*
------------------------------------------------------------------------------
  PROCEDURE: <trim_space> ڡκ (ȾѥڡΤߤѹ by T.U
                                                                   '96.01.11)
------------------------------------------------------------------------------
*/
static int trim_space(str, pos)
    char *str;
    int pos;
{
    for (;;) {
	/* Ⱦѥڡ */
	if (str[pos] == ' ')
	  pos++;
#if 0
	/* ѥڡ */
	else if ((unsigned char)str[pos  ] == 0xA1 &&
		 (unsigned char)str[pos+1] == 0xA1)
	  pos += 2;
#endif
	else
	  break;
    }
    return pos;
}
#endif

/*
------------------------------------------------------------------------------
  PROCEDURE: <pos_match_process>
------------------------------------------------------------------------------
*/
static int pos_match_process(pos, p_idx)
    int pos, *p_idx;
{
    static int p_start;
    int i, j;

    j = 0;
    if (pos == 0) {
	/* new sentence */
	p_idx[j++] = 0;
	p_start = 1;
    } else {
	for (i = p_start; i < Path_num; i++) {
	    if (Path[i].end <= pos) {
		if (i == p_start)
		  p_start++;
		if (Path[i].end == pos)
		  p_idx[j++] = i;
	    }
	}
    }
    p_idx[j] = -1;

    return j;
}

/*
------------------------------------------------------------------------------
  PROCEDURE: <check_connect>
------------------------------------------------------------------------------
*/
static int check_connect(pos, m_num, p_idx)
    int pos, m_num, *p_idx;
{
    /* ֤ͤǥѥʬह */
    typedef struct _path_cost_t {
	int min_cost;
	short min_cost_no;
	short state;
	short num;
	int   cost[PATH1_NUM];
	int   pno[PATH1_NUM];
    } path_cost_t;

    static path_cost_t pcost[PATH1_NUM];
    int pcost_num;
    int path[PATH1_NUM], *new_path;
    mrph2_t *new_mrph;
    int i, pno, pcostno, npath;
    int	haba_cost, con_cost, cost, mrph_cost;
    int con_tbl, next_state;

#ifdef DEBUG
    printf("[m:%d] ", m_num);
#endif
    new_mrph = &Mrph[m_num];
    con_tbl = new_mrph->con_tbl;

    pcost[0].state = -1;
    pcost_num = 0;

    for (i = 0; (pno = p_idx[i]) >= 0; i++) {
	/* ȥޥȥĴ٤Ƽ֤³ȤФ */
	next_state = check_automaton
	  (Path[pno].state,
#ifdef KOCHA
	   Path[Path[pno].path[0]].state,
	   Mrph[Path[pno].mrph_p].midasi,
	   Mrph[Path[pno].mrph_p].is_undef,
#endif
	   con_tbl, Con_cost_undef, &con_cost);

#ifdef DEBUG
	printf("[m1:%d,m2:%d,cost:%d,state:%d]\n",
	       Path[pno].mrph_p,m_num,con_cost,next_state);
#endif
#ifdef VGRAM
	if (con_cost == -1) continue;
#else
	if (con_cost == 0) continue;
#endif

	/* cost ׻ */
	cost = Path[pno].cost + con_cost * Con_cost_weight;

	/* ɤ pcost °뤫Ĵ٤ */
	for (pcostno = 0; pcostno < pcost_num; pcostno++)
	  if (next_state == pcost[pcostno].state)
	    break;
	if (pcostno < pcost_num) {
	    if (cost - pcost[pcostno].min_cost > Cost_width)
	      continue;
	} else {
	    /*  pcost  */
	    pcost_num++;
	    pcost[pcostno].num = 0;
	    pcost[pcostno].state = next_state;
	    pcost[pcostno].min_cost = INT_MAX;
	}

	/* pcost Ͽ */
	pcost[pcostno].cost[pcost[pcostno].num] = cost;
	pcost[pcostno].pno[pcost[pcostno].num] = pno;
	if (cost < pcost[pcostno].min_cost) {
	    pcost[pcostno].min_cost = cost;
	    pcost[pcostno].min_cost_no = pcost[pcostno].num;
	}
	pcost[pcostno].num++;
    }

    if (pcost_num == 0)
      return TRUE;

    /* ǥ */
    if (new_mrph->is_undef) {
	mrph_cost = Undef_info[new_mrph->is_undef-1].cost
	  + Undef_info[new_mrph->is_undef-1].cost_step * new_mrph->length / 2;
    } else {
#ifdef VGRAM
	mrph_cost = Hinsi[new_mrph->hinsi].cost;
#else
	mrph_cost = Class[new_mrph->hinsi][new_mrph->bunrui].cost;
#endif
    }
    mrph_cost *= new_mrph->weight * Mrph_cost_weight;

#ifdef KOCHA
    if (new_mrph->midasi &&
	(unsigned char)new_mrph->midasi[0] == 0xA1 &&
	(unsigned char)new_mrph->midasi[1] == 0xA1) {
	for (i = 0; i < pcost[0].num; i++) {
	    new_path = malloc_int(2);
	    new_path[0] = pcost[0].pno[i];
	    new_path[1] = -1;

	    Path[Path_num].cost = pcost[0].cost[i] + mrph_cost;
	    Path[Path_num].mrph_p = m_num;
	    Path[Path_num].state = pcost[0].state;
	    Path[Path_num].start = pos;
	    Path[Path_num].end = pos + new_mrph->length;
	    Path[Path_num].path = new_path;
#ifdef KOCHA_DEBUG
	    printf("[Sp:%d,prev:%d,m:%d,c:%d,pc:%d]\n",
		   Path_num,Path[Path_num].path[0],m_num,pcost[0].cost[i],Path[Path_num].cost);
#endif
	    if (++Path_num % PATH_NUM == 0 && malloc_path())
	      return FALSE;
	}
    } else {
#endif /* KOCHA */
	for (pcostno = 0; pcostno < pcost_num; pcostno++) {
	    /* ˤޤäƤѥȴФ */
	    haba_cost = pcost[pcostno].min_cost + Cost_width;
	    npath = 0;
	    path[npath++] = pcost[pcostno].pno[pcost[pcostno].min_cost_no];
	    for (i = 0; i < pcost[pcostno].num; i++)
	      if (pcost[pcostno].cost[i] <= haba_cost && i != pcost[pcostno].min_cost_no)
		path[npath++] = pcost[pcostno].pno[i];
	    path[npath++] = -1;

	    new_path = malloc_int(npath);
	    memcpy(new_path, path, sizeof(int) * npath);

	    /* Path Ͽ */
	    Path[Path_num].cost = pcost[pcostno].min_cost + mrph_cost;
	    Path[Path_num].mrph_p = m_num;
	    Path[Path_num].state = pcost[pcostno].state;
	    Path[Path_num].start = pos;
	    Path[Path_num].end = pos + new_mrph->length;
	    Path[Path_num].path = new_path;
#ifdef DEBUG
	    printf("%3d %3d %5d [p:%d,prev:%d,m:%d,c:%d,pc:%d]\n",
		   Path[Path_num].start, Path[Path_num].end,
		   Path[Path_num].state,
		   Path_num,Path[Path_num].path[0],m_num,pcost[0].cost[i],Path[Path_num].cost);
#endif
	    if (++Path_num % PATH_NUM == 0 && malloc_path())
	      return FALSE;
	}
#ifdef KOCHA
    }
#endif
    return TRUE;
}

static void set_mrph_end(mrph)
    mrph2_t *mrph;
{
    mrph->midasi = mrph->yomi = mrph->info = "";
#ifdef VGRAM
    mrph->base = mrph->pron = "";
    mrph->comp = "\n";
#endif
    mrph->base_length = mrph->length = 3;

    mrph->hinsi = 0;
#ifndef VGRAM
    mrph->bunrui = 0;
#endif

    mrph->con_tbl = 0;
    mrph->ktype = 0;
    mrph->kform = 0;

    mrph->is_undef = 0;
    mrph->weight = MRPH_DEFAULT_WEIGHT;
}

#ifdef VGRAM
static int set_mrph_bkugiri()
{
    static int bkugiri_num;
    int h;
    mrph2_t *mrph;

    if (Mrph[1].midasi)
      return bkugiri_num;

    for (h = 0; Hinsi[h].name; h++) {
	if (!Hinsi[h].bkugiri)
	  continue;
	mrph = &Mrph[++bkugiri_num];
	/* memset: unnecessary? */
	memset(mrph, 0, sizeof(mrph2_t));

	mrph->hinsi = h;
	mrph->con_tbl = check_table_for_undef(h);
	mrph->midasi = mrph->yomi = mrph->base = Hinsi[h].bkugiri;
#ifdef VGRAM
	mrph->pron = mrph->midasi;
#endif
	mrph->info = "";
#if 0
	mrph->base_length = mrph->length = 0;
	mrph->ktype = mrph->kform = 0;
	mrph->is_undef = 0;
	mrph->weight = 0;
#endif
    }
    return bkugiri_num;
}
#endif

static int strcmp_anno(target)
    char *target;
{
    int i;

    for (i = 1; Anno_info[i].str1; i++)
      if (!memcmp(target, Anno_info[i].str1, Anno_info[i].len1))
	return -i;
    return 0;
}

/*
 * check_undefword_len()
 *
 * char_type:
 *   0: 
 */
static void check_undefword_len(target, undefword_len, char_type, target_len)
    char *target, *char_type;
    short *undefword_len;
{
    unsigned char *t;
    short *ulen0, *ulen;
    char *type;
    int stat0 = 1, stat;
    anno_info *anno;

    memset(undefword_len, 0, target_len * sizeof(short));
    memset(char_type, 1, target_len + 1);

    t = (unsigned char *)target;
    ulen0 = ulen = undefword_len;
    type = char_type;

    while ((char *)t < target + target_len) {
	if (stat0 < 0 &&
	    (anno->len2 == 0 ||
	     (char *)t - target > anno->len2 &&
	     !memcmp(t - anno->len2, anno->str2, anno->len2)))
	  stat0 = 99;
	if (stat0 < 0) {
	    ;
	} else if (is_spc(*t)) {
	    *type = stat = 0;
	} else if ((stat = strcmp_anno(t)) < 0) {
	    *type = stat;
	    anno = &Anno_info[-stat];
	} else if (Lang_e
		   ? (*t>='a' && *t<='z' || *t>='A' && *t<='Z')
		   : !(*t & 0x80)) {
	    /*: (!(*t & 0x80) && !(*t>='0'&&*t<='9'||*t=='.'||*t==','))) {*/
	    /* [a-zA-Z] / [^1-9.,] */
	    stat = 1;
	} else if (t[0] == 0xa5 && t[1] >= 0xa1 ||
		   t[0] == 0xa1 && t[1] == 0xbc) {
	    /* 0xa5a1-0xa5ff: zenkaku katakana */
	    /* 0xa1bc: zenkaku chou-on */
	    stat = 2;
	} else if (t[0] == 0xa3 && t[1] >= 0xc1) {
	    /* 0xa3c1-0xa3ff: zenkaku alphabet */
	    stat = 3;
	} else {
	    stat = 4;
	    stat0 = 99;
	}

        if (stat != stat0) {
	    *ulen0 = ulen - ulen0;
	    ulen0 = ulen;
	}

	if (stat < 0 && stat != stat0) {
	    int len = anno->len1 + anno->len2;
	    if (len > target_len - ((char *)t - target))
	      len = target_len - ((char *)t - target);
	    t += len;
	    ulen += len;
	    type += len;
	} else if ((t[0] & 0x80) && (t[1] & 0x80)) {
	    *type = 2;
	    t += 2; ulen += 2, type += 2;
	} else {
	    t++; ulen++, type++;
	}
	stat0 = stat;
    }

    *ulen0 = ulen - ulen0;
}

/***********************************************************************
 * chasen_sent() - ʸǲϤ
 *
 * return value:
 *     0 - ok
 *     1 - no result / too many morphs
 ***********************************************************************/
int chasen_sent(target, target_len, opt_nobk)
    char *target;
    int target_len; /* should be >0 */
    int opt_nobk;
{
    extern int num_sufdic_file;
    int  i, dic_no;
    int  pos, pos_end;
    int  path_idx[PATH1_NUM], path_idx_num;
    int  mrph_idx, new_mrph_idx;
    int  undef_len;
    int  bkugiri_num = 0, bk;
    static int path0 = -1;
    long *sufdic_buffer;
    char *patdic_buffer[256];
    static short undefword_len[CHA_INPUT_SIZE];
    static char char_type[CHA_INPUT_SIZE];

    check_undefword_len(target, undefword_len, char_type, target_len);
    cha_set_sentence(target, undefword_len, char_type);

    free_chars();
    free_path();
    free_mrph();

    /* ʸƬ */
    Path[0].start = Path[0].end = 0;
    Path[0].path = &path0;
    Path[0].cost = 0;
    Path[0].mrph_p = 0;
    Path[0].state = 0;

    Path_num = 1;
    set_mrph_end(&Mrph[0]);
#ifdef VGRAM
    if (!opt_nobk)
      bkugiri_num = set_mrph_bkugiri();
#endif
    new_mrph_idx = mrph_idx = bkugiri_num + 1;

    for (pos = pos_end = 0; pos < target_len;
	 pos += char_type[pos] == 2 ? 2 : undefword_len[pos],
	 pos_end = pos) {
#if 0
	printf("# mrph %d\n", mrph_idx);
#endif

	while (char_type[pos] <= 0)
	  pos += undefword_len[pos];
	if (pos == target_len)
	  break;

	path_idx_num = pos_match_process(pos_end, path_idx);
#if 0
	printf("# path_idx_num %d\n", path_idx_num);
#endif
	if (path_idx_num == 0)
	  continue;

#if 1
	for (bk = 0; bk < bkugiri_num; bk++) {
	    int path_num0, path_num1;
	    path_num0 = Path_num;
	    /* ʸڤɲ */
	    if (check_connect(pos, bk + 1, path_idx) == FALSE)
	      goto error_end;
#if 0
	    printf("PATH: %d: %d -> %d\n", pos, path_num0, Path_num);
#endif
	    /* ɲä줿 path  path_idx ɲ */
	    if (Path_num > path_num0)
	      for (; path_num0 < Path_num; path_num0++)
		path_idx[path_idx_num++] = path_num0;
	    path_idx[path_idx_num] = -1;
	}
#endif

	/* (ʸΤ߸) */
	if (char_type[pos] == 2) {
	    for (dic_no = 0; dic_no < number_of_tree; dic_no++) {
		int nmrph;
		/* ѥȥꥷڤǤ򸡺 */
		pat_search(dic_file[dic_no], target + pos,
 			   &tree_top[dic_no], patdic_buffer);

		/* ѤĤķǤ Mrph ɲ */
		if (convert_pat_mrphs(target + pos, patdic_buffer,
				      new_mrph_idx, &nmrph) == FALSE)
		  goto error_end;
		new_mrph_idx += nmrph;
	    }
	}

	for (dic_no = 0; dic_no < num_sufdic_file; dic_no++) {
	  int nmrph;
	  /* SUFARY ե뤫Ǥ򸡺 */
	  sa_reset(sufdic_file[dic_no]);
	  sufdic_buffer =
	    (long *)sa_common_prefix_search(sufdic_file[dic_no],
					    target + pos, '\0');
	  /* ѤĤķǤ Mrph ɲ */
	  if (convert_suf_mrphs(target + pos, sufdic_buffer,
				sufdic_file[dic_no], new_mrph_idx, &nmrph)
	      == FALSE)
	    goto error_end;
	  free(sufdic_buffer);
	  new_mrph_idx += nmrph;
	}

#if 0
	/*
	 * ̤ϢܥȤ 0 ΤȤñ줬1Ĥ⼭Ǥʤä
	 * ̤ Mrph ɲ
	 */
	if (Con_cost_undef == 0 || mrph_idx == new_mrph_idx) {
	    if (register_undef_mrph(target + pos, new_mrph_idx) == FALSE)
	      goto error_end;
	    new_mrph_idx += Undef_info_num;
	}
#endif

	/* ̤ */
	undef_len = undefword_len[pos];
#if 0
	undef_len = undef_mrph_len(target + pos);
#endif

#if 0
	printf("# pos: %d, undef_len: %d\n", pos, undef_len);
#endif
	/* ľΥѥȤ³å */
	for (i = mrph_idx; i < new_mrph_idx; i++) {
	    /* ̤ƱĹñ줬ˤ̤ɲäʤ */
	    if (Con_cost_undef > 0 && Mrph[i].length == undef_len)
	      undef_len = 0;
	    if (check_connect(pos, i, path_idx) == FALSE)
	      goto error_end;
	}

	/* ̤ɲ */
	if (undef_len > 0) {
	    int no;
	    for (no = 0; no < Undef_info_num; no++, new_mrph_idx++) {
		if (register_undef_mrph1(target + pos, new_mrph_idx,
					 undef_len, no) == FALSE)
		  goto error_end;
		if (check_connect(pos, new_mrph_idx, path_idx) == FALSE)
		  goto error_end;
#if 0
		printf("path[0]: %d:%d\n",Path_num-1,Path[Path_num-1].path[0]);
#endif
	    }
	}

	mrph_idx = new_mrph_idx;
    }

    /* ʸ */
    set_mrph_end(&Mrph[mrph_idx]);
    if (++mrph_idx % MRPH_NUM == 0 && malloc_mrph())
      goto error_end;

    pos_match_process(pos_end, path_idx);
    if (check_connect(pos, mrph_idx - 1, path_idx) == FALSE)
      goto error_end;

#ifdef DEBUG
    for (i = 1; i < mrph_idx - 1; i++) {
	printf("%4d: %4d ", i, Mrph[i].con_tbl);
	print_mrph(0, &Mrph[i], 'F', "%-11m %-11y %-11P3- %-14T  %F \n");
    }
#endif

#if 0
    printf("# num - mrph:%d path:%d, size - mrph:%d path:%d \n",
	   mrph_idx, Path_num, sizeof(mrph2_t), sizeof(path_t));
#endif

    return 0;

  error_end:
    printf("Error: Too many morphs: %s\n", target);
#if 0
    printf("# num - mrph:%d path:%d, size - mrph:%d path:%d \n",
	   mrph_idx, Path_num, sizeof(mrph2_t), sizeof(path_t));
#endif
    return 1;
}

