/* $Id: translator.c,v 1.4 2004/06/26 15:31:11 makigura Exp $ */
/*
 * xbabylon translator, the translator on X Window System.
 * Copyright (c) 2001 Shigeki Kaneko, all right reserved.
 */
#include <unistd.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <assert.h>
#include "xbabylon.h"

/* This define executes to remove unprintable tail from the word got
   from cut buffer. This case is often occurred on Mozilla (bug?) */
#define REMOVE_UNPRINTABLE_GARBAGE

/* These defines executes to search possible modified word from the
   target word. */
#ifdef ADDITIONAL_SEARCH
# define ADDITIONAL_SEARCH_FOR_REMOVE_BLANK
# define ADDITIONAL_SEARCH_FOR_CAPITALIZE
# define ADDITIONAL_SEARCH_FOR_ADDITIONAL_ER
# define ADDITIONAL_SEARCH_FOR_ADDITIONAL_S
# define ADDITIONAL_SEARCH_FOR_ING
# define ADDITIONAL_SEARCH_FOR_UPPERCASE
#endif

/* #ifdef REMOVE_UNPRINTABLE_GARBAGE */
/* # define REMOVE_0A_20 */
/* #endif */

/*
 * Maching function:
 *   -n < -2:   is not reached the word.
 *   -1:   	matched as long as the length of dict entry, but dict is shorter.
 *    0:   	completely matchs of full length.
 *   +1:   	matches as long as the length of the searched word, but dict has more chars.
 *   +n > 2:    is gone over the word
 */
static int  CompareWords (char *sword, char *dict, int length)
{
    int  i;
    
    for (i = 0; i < length; i++) {
	// same up to now
	if (sword[i] == dict[i]) {
	    continue;
	}

	// difference is found
	if (dict[i] >= 'A' && dict[i] <= 'z') {
	    // different char is alphabet
	    return (dict[i] - sword[i] > 0) ? 2 : -2;
	} else if (dict[i] == '\t') {
	    // same up to now, but the dic entry is shorter.
	    return -1;
	} else {
	    // difference is such as . , ...
	    return -1;
	}
    }

    // same up to the length of the searched word
    return (dict[i] == '\t') ? 0 : 1;
}

/*
 * Create list of search words from a search word specified from the user.
 */
Searchword * ExpandSearchword (Searchword *srchword)
{
#ifdef REMOVE_UNPRINTABLE_GARBAGE
    {
	int  i;
# ifdef REMOVE_0A_20
        for (i = 0; i < srchword->length - 1; i++) {
            char *b = srchword->word;
            if (b[i] == 0xa && b[i + 1] == 0x20) {
                int j;
                for (j = 0; i + 2 + j < srchword->length; j++) {
                    if (b[i + 2 + j] != 0x20 && i + 2 + j + 1 < srchword->length && b[i + 3 + j] != 0xa)
                        break;
                    b[j] = b[i + 2 + j];
                }
                b[j] = '\0';
                srchword->length = j;
                break;
            }
        }
# endif // REMOVE_0A_20
	for (i = srchword->length - 1; i > 0; i--) {
	    if (!IsPrintable(srchword->word[i])) {
		srchword->word[i] = '\0';
		srchword->length--;
	    }
	}
        for (i = 0; i < srchword->length; i++) {
            if (IsPrintable(srchword->word[i]))
                break;
        }
        if (i > 0) {
            int j;
            for (j = 0; i + j < srchword->length; j++) {
                srchword->word[j] = srchword->word[i + j];
            }
            srchword->length -= i;
            srchword->word[j] = '\0';
        }
    }
#endif // REMOVE_UNPRINTABLE_GARBAGE

#ifdef ADDITIONAL_SEARCH
    {
        Searchword  *swd, *swdlast = srchword;
        int  i;

# ifdef ADDITIONAL_SEARCH_FOR_REMOVE_BLANK
        /* If there are spaces in the tail of the word, removing it. */
        {
            swd = GetSearchwordBuffer(swdlast->length);
            if (!swd) return srchword;
            memcpy(swd->word, swdlast->word, swdlast->length + 1);
        
            for (i = swd->length - 1; i > 0; i--) {
                if (IsSpace(swd->word[i]) || IsSeparator(swd->word[i])) {
                    swd->word[i] = '\0';
                    swd->length--;
                }
            }
            if (swd->length < swdlast->length) {
                swdlast = swdlast->next = swd;
            } else {
                FreeSearchwordBuffer(swd);
            }
        }
        /* If there are spaces in the head of the word, removing it. */
        {
            swd = GetSearchwordBuffer(swdlast->length);
            if (!swd) return srchword;
            memcpy(swd->word, swdlast->word, swdlast->length + 1);
        
            for (i = 0; i < swd->length; i++) {
                if (IsSpace(swd->word[i]) || IsSeparator(swd->word[i])) {
                    swd->length--;
                } else
                    break;
            }
            if (swd->length < swdlast->length) {
                char  *b = swd->word;
                swd->word = (char*)malloc(swd->length + 1);
                if (!b) return srchword;
                memcpy(swd->word, b + swdlast->length - swd->length, swd->length + 1);
                free(b);
                swdlast = swdlast->next = swd;
            } else {
                FreeSearchwordBuffer(swd);
            }
        }
# endif //  ADDITIONAL_SEARCH_FOR_REMOVE_BLANK

# ifdef ADDITIONAL_SEARCH_FOR_CAPITALIZE
        {
            swd = GetSearchwordBuffer(swdlast->length);
            if (!swd) return srchword;
            memcpy(swd->word, swdlast->word, swdlast->length + 1);

            /* If the word begins with upper case char, we try to search the
               word starting with lower case */
            if (IsUpperCase(swd->word[0])) {
                ToLowerCase(swd->word[0]);
                swdlast = swdlast->next = swd;
            } else {
                FreeSearchwordBuffer(swd);
            }
        }
# endif // ADDITIONAL_SEARCH_FOR_CAPITALIZE

# ifdef ADDITIONAL_SEARCH_FOR_UPPERCASE
        {
            /* If the word is written with upper case word, zB. all characters are uppercase */
            Boolean  is_changed = FALSE;

            swd = GetSearchwordBuffer(swdlast->length);
            if (!swd) return srchword;
            memcpy(swd->word, swdlast->word, swdlast->length + 1);

            for (i = 0; i < swd->length; i++) {
                if (IsUpperCase(swd->word[i])) {
                    ToLowerCase(swd->word[i]);
                    is_changed = TRUE;
                }
            }
            if (is_changed) {
                swdlast = swdlast->next = swd;
            } else {
                FreeSearchwordBuffer(swd);
            }
        }
# endif // ADDITIONAL_SEARCH_FOR_UPPERCASE

# ifdef ADDITIONAL_SEARCH_FOR_ADDITIONAL_ER
        {
            swd = GetSearchwordBuffer(swdlast->length);
            if (!swd) return srchword;
            memcpy(swd->word, swdlast->word, swdlast->length + 1);

            /* If there is a 'er' or 'or' with a tail, removing it. */
            if (swd->word[swd->length - 1] == 'r' && swd->length > 2 && (swd->word[swd->length - 2] == 'e' || swd->word[swd->length - 2] == 'o')) {
                if (swd->length > 4 && swd->word[swd->length - 3] == swd->word[swd->length - 4])
                    i = 3;          // z.B. 'bigger'
                else
                    i = 2;          // z.B. 'stronger'
                swd->word[swd->length - i] = '\0';
                swd->length -= i;
                swdlast = swdlast->next = swd;
            } else if (swd->word[swd->length - 1] == 't' && swd->length > 3 && swd->word[swd->length - 2] == 's' && swd->word[swd->length - 3] == 'e') {
                if (swd->length > 5 && swd->word[swd->length - 4] == swd->word[swd->length - 5])
                    i = 4;          // z.B. biggest
                else
                    i = 3;          // z.b. strongest 
                swd->word[swd->length - i] = '\0';
                swd->length -= i;
                swdlast = swdlast->next = swd;
            } else {
                FreeSearchwordBuffer(swd);
            }
        }
# endif // ADDITIONAL_SEARCH_FOR_ADDITIONAL_ER

# ifdef ADDITIONAL_SEARCH_FOR_ADDITIONAL_S
        {
            swd = GetSearchwordBuffer(swdlast->length);
            if (!swd) return srchword;
            memcpy(swd->word, swdlast->word, swdlast->length + 1);

            /* If there is a 's' or 'd' with a tail, removing it. */
            if (swd->word[swd->length - 1] == 's' || swd->word[swd->length - 1] == 'd') {
                if (swd->length > 2 && swd->word[swd->length - 2] == 'e') {
                    if (swd->length > 3 && swd->word[swd->length - 3] == 'i')
                        swd->word[swd->length - 3] = 'y';
                    i = 2;
                } else {
                    i = 1;
                }
                swd->word[swd->length - i] = '\0';
                swd->length -= i;
                swdlast = swdlast->next = swd;
            } else {
                FreeSearchwordBuffer(swd);
            }
        }
# endif // ADDITIONAL_SEARCH_FOR_ADDITIONAL_S
	
# ifdef ADDITIONAL_SEARCH_FOR_ING
        {
            swd = GetSearchwordBuffer(swdlast->length);
            if (!swd) return srchword;
            memcpy(swd->word, swdlast->word, swdlast->length + 1);

            /* If there is a 's' or 'd' with a tail, removing it. */
            if (swd->length > 3 && strncmp(&swd->word[swd->length - 3], "ing", 3) == 0) {
                if (swd->length > 5 && swd->word[swd->length - 4] == swd->word[swd->length - 5])
                    i = 4;          /* like cutting */
                else
                    i = 3;
            
                swd->word[swd->length - i] = '\0';
                swd->length -= i;
                swdlast = swdlast->next = swd;
            } else {
                FreeSearchwordBuffer(swd);
            }
        }
# endif // ADDITIONAL_SEARCH_FOR_ING
        {
            // searching for partial match.
            swd = GetSearchwordBuffer(swdlast->length);
            if (!swd) return srchword;
            memcpy(swd->word, swdlast->word, swdlast->length + 1);
            swd->exact_match = FALSE;
            swdlast = swdlast->next = swd;
        }
    }
#endif /* ADDITIONAL_SEARCH */
#ifdef DEBUG
    {
        for (swd = srchword; swd; swd = swd->next) {
            Dprint(("ExpandSearchword: [%d][%s]\n", swd->length, swd->word));
        }
    }
#endif
    return srchword;
}
    

/* 
 * core of the search engine
 */
static Translation * SearchWordInFilestream (FILE *fp, char *word, int length, Boolean exact_match)
{
    Translation thead, *ttail, *trs;
    char  buf[XBUFSIZ];
    int  rcmp;
    Boolean  complete_match = FALSE;

    Dprint(("SearchWordInFileStream(%s, %d)\n", word, length));

    thead.next = NULL;
    ttail = &thead;

    while (fgets(buf, sizeof(buf), fp)) {
	rcmp = CompareWords(word, buf, length);
	// Dprint(("(%d)|%s|%s|%d\n", rcmp, word, buf, length));
	if (rcmp < 0) {
	    continue;
	} if (rcmp >= 2) {
	    return thead.next;
	} else if (rcmp == 0) {
	    // exact matching
	    trs = GetTranslationBuffer(strlen(buf) + 1);
            Dprint(("SearchwordInFileStream: exact match(%s)(%s)\n", word, buf));
	    if (!trs) {
		Dprint(("GetTranslationBuffer error in SearchWordInFileStream\n"));
		return thead.next;
	    }
	    
	    ttail = ttail->next = trs;
	    trs->length = strlen(buf);
	    trs->offset = length;
	    strncpy(trs->buf, buf, trs->length + 1);
	    complete_match = TRUE;
	}
	else {
	    // partial matching
	    if (!exact_match && !complete_match) {
                Dprint(("SearchwordInFileStream: partial match(%s)(%s)\n", word, buf));
		trs = GetTranslationBuffer(strlen(buf) + 1);
		if (!trs) {
		    Dprint(("GetTranslationBuffer error in SearchWordInFileStream\n"));
		    return thead.next;
		}
			
		ttail = ttail->next = trs;
		trs->length = strlen(buf);
		trs->offset = length;
		strncpy(trs->buf, buf, trs->length + 1);
	    }
	    return thead.next;
	}
    }
    return NULL;
}

/*
 * Search word in the dictionary
 */
Translation * SearchWord (char *dict, Searchword *srchword)
{
    FILE  *fp;
    Translation  *found = NULL;
    Searchword  *swd;

    Dprint(("SearchWord(%s, %d)\n", srchword->word, srchword->length));

    /* expand search word to match with "not exactly word" */
    srchword = ExpandSearchword(srchword);

    /* Open the dictionary */
    if (!dict) {
	dict = DICT_FILE;
	Dprint(("SearchWord: Using the default dictionary %s", dict));
    }

#ifdef DEBUG
    {
	int i;
	for (i = 0; i < srchword->length; i++)
	    Dprint(("(%x)",srchword-> word[i]));
	Dprint(("\n"));
    }
#endif

    fp = fopen(dict, "r");
    if(!fp) {
	perror("fopen");
	Dprint(("%s\n", dict));
	exit(ERR);
    }

    for (swd = srchword; swd; swd = swd->next) {
        Dprint(("Searchword: %d -- %s\n", swd->length, swd->word));

        /* The translation is found if the word is eqaul to the index */
        found = SearchWordInFilestream(fp, swd->word, swd->length, swd->exact_match);

        /* I found the word in the dictionary */
        if (found)
            break;
        else    
            fseek(fp, 0L, SEEK_SET);
    }

    // checking is done, then closing the dictionary.
    fclose(fp);

    // If we can't find the word in the dictionary, giving up.
    if (! found) return NULL;

#ifdef DEBUG
    {
        Translation *tr;
        for (tr = found; tr; tr = tr->next) {
            Dprint(("Searchword: (%d) [%s]\n", tr->length, tr->buf));
        }
    }
#endif

#ifdef REMOVE_UNPRINTABLE
    /* remove line feed and tab */
    {
	Translation *tr;
	char *c;
	for (tr = found; tr; tr = tr->next) {
	    for (c = tr->buf; *c; c++) {
		if (!IsPrintable(*c)) {
		    *c = ' ';
		}
	    }
	}
    }
#endif
    return found;
}

// eof

