/* $Id: translator.c,v 1.1.1.1 2004/04/28 08:57:31 makigura Exp $ */
/*
 * xbabylon translator, the translator on X Window System.
 * Copyright (c) 2001 Shigeki Kaneko, all right reserved.
 */
#include <unistd.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <assert.h>
#include "xbabylon.h"

/* This define executes to remove unprintable tail from the word got
   from cut buffer. This case is often occurred on Mozilla (bug?) */
#define REMOVE_UNPRINTABLE_GARBAGE

/* These defines executes to search possible modified word from the
   target word. */
#ifdef ADDITIONAL_SEARCH
# define ADDITIONAL_SEARCH_FOR_REMOVE_BLANK
# define ADDITIONAL_SEARCH_FOR_CAPITALIZE
# define ADDITIONAL_SEARCH_FOR_ADDITIONAL_S
# define ADDITIONAL_SEARCH_FOR_ING
# define ADDITIONAL_SEARCH_FOR_UPPERCASE
#endif

/*
 * Maching function:
 *   -n < -2:   is not reached the word.
 *   -1:   	matched as long as the length of dict entry, but dict is shorter.
 *    0:   	completely matchs of full length.
 *   +1:   	matches as long as the length of the searched word, but dict has more chars.
 *   +n > 2:    is gone over the word
 */
static int  CompareWords (char *sword, char *dict, int length)
{
    int  i;
    
    for (i = 0; i < length; i++) {
	// same up to now
	if (sword[i] == dict[i]) {
	    continue;
	}

	// difference is found
	if (dict[i] >= 'A' && dict[i] <= 'z') {
	    // different char is alphabet
	    return (dict[i] - sword[i] > 0) ? 2 : -2;
	} else if (dict[i] == '\t') {
	    // same up to now, but the dic entry is shorter.
	    return -1;
	} else {
	    // difference is such as . , ...
	    return -1;
	}
    }

    // same up to the length of the searched word
    return (dict[i] == '\t') ? 0 : 1;
}

/*
 * GetTranslationBuffer
 */
Translation * GetTranslationBuffer (int size)
{
    Translation *t;

    t = (Translation*)malloc(sizeof(Translation));
    if (!t) {
	perror("malloc");
	Dprint(("malloc error in GetTranslationBuffer\n"));
	return NULL;
    }
    t->buf = (char*)malloc(size);
    if (!t) {
	perror("malloc");
	Dprint(("malloc error in GetTranslationBuffer\n"));
	return NULL;
    }
    t->next = NULL;
    return t;
}

/* 
 * core of the search engine
 */
static Translation * SearchWordInFilestream (FILE *fp, char *word, int length, Boolean partial_match)
{
    Translation thead, *ttail, *trs;
    char  buf[XBUFSIZ];
    int  rcmp;
    Boolean  exact_match = FALSE;

    Dprint(("SearchWordInFileStream(%s, %d)\n", word, length));

    thead.next = NULL;
    ttail = &thead;

    while (fgets(buf, sizeof(buf), fp)) {
	rcmp = CompareWords(word, buf, length);
	// Dprint(("(%d)|%s|%s|%d\n", rcmp, word, buf, length));
	if (rcmp < 0) {
	    continue;
	} if (rcmp >= 2) {
	    return thead.next;
	} else if (rcmp == 0) {
	    // exact matching
	    trs = GetTranslationBuffer(strlen(buf) + 1);
	    if (!trs) {
		Dprint(("GetTranslationBuffer error in SearchWordInFileStream\n"));
		return thead.next;
	    }
	    
	    ttail = ttail->next = trs;
	    trs->length = strlen(buf);
	    trs->offset = length;
	    strncpy(trs->buf, buf, trs->length + 1);
	    exact_match = TRUE;
	}
	else {
	    // partial matching
	    if (partial_match && !exact_match) {
		trs = GetTranslationBuffer(strlen(buf) + 1);
		if (!trs) {
		    Dprint(("GetTranslationBuffer error in SearchWordInFileStream\n"));
		    return thead.next;
		}
			
		ttail = ttail->next = trs;
		trs->length = strlen(buf);
		trs->offset = length;
		strncpy(trs->buf, buf, trs->length + 1);
	    }
	    return thead.next;
	}
    }
    return NULL;
}

/*
 * Search word in the dictionary
 */
Translation * SearchWord (char *dict, char *word, int length)
{
    FILE  *fp;
    Translation  *found;

    Dprint(("SearchWord(%s, %d)\n", word, length));
    if (!dict) {
	dict = DICT_FILE;
	Dprint(("SearchWord: Using the default dictionary", dict));
    }

#ifdef DEBUG
    {
	int i;
	for (i = 0; i < length; i++)
	    fprintf(stderr, "(%x)", word[i]);
	fprintf(stderr, "\n");
    }
#endif

    fp = fopen(dict, "r");
    if(!fp) {
	perror("fopen");
	Dprint(("%s\n", dict));
	exit(ERR);
    }

    Dprint(("%d -- %s\n", length, word));

    /* The translation is found if the word is eqaul to the index */
    found = SearchWordInFilestream(fp, word, length, FALSE);

    if (found)  goto I_find_out_the_word;

#ifdef ADDITIONAL_SEARCH
    /* copy word to buf, because we modify the word, if it can't be found */
    {
	char *buf;
	int  len = length, i;

	/* Since the string must be modified, we prepare a new string. */
	buf = (char*)malloc(len + 1);
	memcpy(buf, word, len + 1);

# ifdef ADDITIONAL_SEARCH_FOR_REMOVE_BLANK
	/* If there are spaces in the tail of the word, removing it. */
	{
	    for (i = len - 1; i > 0; i--) {
		if (IsSpace(buf[i]) || IsSeparator(buf[i])) {
		    buf[i] = '\0';
		    len--;
		}
	    }
	    if (len < length) {
		fseek(fp, 0L, SEEK_SET);
		found = SearchWordInFilestream(fp, buf, len, FALSE);
		if (found)  goto I_find_out_the_word;
	    }
	}
	/* If there are spaces in the head of the word, removing it. */
	{
	    for (i = 0; i < length; i++) {
		if (IsSpace(buf[i]) || IsSeparator(buf[i]))
		    len--;
		else
		    break;
	    }
	    if (len < length) {
		fseek(fp, 0L, SEEK_SET);
		found = SearchWordInFilestream(fp, &buf[i], len, FALSE);
		if (found)  goto I_find_out_the_word;
	    }
	}
# endif //  ADDITIONAL_SEARCH_FOR_REMOVE_BLANK

# ifdef ADDITIONAL_SEARCH_FOR_CAPITALIZE
	/* If the word begins with upper case char, we try to search the
	   word starting with lower case */
	if (IsUpperCase(*buf)) {
	    ToLowerCase(buf[0]);
	    fseek(fp, 0L, SEEK_SET);
	    found = SearchWordInFilestream(fp, buf, len, FALSE);
	    if (found)  goto I_find_out_the_word;
	}
# endif // ADDITIONAL_SEARCH_FOR_CAPITALIZE

# ifdef ADDITIONAL_SEARCH_FOR_UPPERCASE
	/* If the word is written with upper case word, zB. all characters are uppercase */
	{
	    Boolean  is_changed = FALSE;

	    for (i = 0; i < len; i++) {
		if (IsUpperCase(buf[i])) {
		    ToLowerCase(buf[i]);
		    is_changed = TRUE;
		}
	    }
	    if (is_changed) {
		fseek(fp, 0L, SEEK_SET);
		found = SearchWordInFilestream(fp, buf, len, FALSE);
		if (found)  goto I_find_out_the_word;
	    }
	}
# endif // ADDITIONAL_SEARCH_FOR_UPPERCASE

# ifdef ADDITIONAL_SEARCH_FOR_ADDITIONAL_S
	/* If there is a 's' or 'd' with a tail, removing it. */
	if (buf[len - 1] == 's' || buf[len - 1] == 'd') {
	    if (len > 2 && buf[len - 2] == 'e') {
		if (len > 3 && buf[len - 3] == 'i')
		    buf[len - 3] = 'y';
		buf[len - 2] = '\0';
		len -= 2;
	    } else {
		buf[len - 1] = '\0';
		len -= 1;
	    }
	
	    fseek(fp, 0L, SEEK_SET);
	    found = SearchWordInFilestream(fp, buf, len, FALSE);
	    if (found)  goto I_find_out_the_word;
	}
# endif // ADDITIONAL_SEARCH_FOR_ADDITIONAL_S
	
# ifdef ADDITIONAL_SEARCH_FOR_ING
	/* If there is a 's' or 'd' with a tail, removing it. */
	if (len > 3 && strncmp(&buf[len - 3], "ing", 3) == 0) {
	    if (len > 5 && buf[len - 4] == buf[len - 5]) {
		/* like cutting */
		buf[len - 4] = '\0';
		len -= 4;
	    } else {
		buf[len - 3] = '\0';
		len -= 3;
	    }
	
	    fseek(fp, 0L, SEEK_SET);
	    found = SearchWordInFilestream(fp, buf, len, FALSE);
	    if (found)  goto I_find_out_the_word;
	}
# endif // ADDITIONAL_SEARCH_FOR_ING

	/* If there is no word exactly matching but a word headed with the search word, 
	 * such as exactly for exact, printing the first one. */
	fseek(fp, 0L, SEEK_SET);
	found = SearchWordInFilestream(fp, buf, len, TRUE);
	if (found)  goto I_find_out_the_word;
    }
#endif /* ADDITIONAL_SEARCH */

    /* I found the word in the dictionary */
  I_find_out_the_word:

    // checking is done, then closing the dictionary.
    fclose(fp);

    // If we can't find the word in the dictionary, giving up.
    if (! found) return NULL;

#ifdef DEBUG_
    Dprint(("(%c) [%s] [%s] [%s]\n", i_got_it ? 'o' : 'x', word, buf, buf));
    {
	char *c;
	for (c = buf; *c; c++)
	    Dprint(("[%c:%x]", *c, *c));
    }
#endif

#ifdef REMOVE_UNPRINTABLE
    /* remove line feed and tab */
    {
	Translation *tr;
	char *c;
	for (tr = found; tr; tr = tr->next) {
	    for (c = tr->buf; *c; c++) {
		if (!IsPrintable(*c)) {
		    *c = ' ';
		}
	    }
	}
    }
#endif

    return found;
}

// eof

