/*
 * pat.c - library for patricia tree
 *
 * Copyright (C) 1996, 1997, 2000, 2001, 
 *                            Nara Institute of Science and Technology
 *                           
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. All advertising materials mentioning features or use of this software
 *    must display the following acknowledgement:
 *      This product includes software developed by Nara Institute of 
 *      Science and Technology.
 * 4. The name Nara Institute of Science and Technology may not be used to
 *    endorse or promote products derived from this software without specific
 *    prior written permission.
 *    
 *
 * THIS SOFTWARE IS PROVIDED BY Nara Institute of Science and Technology 
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 
 * PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE Nara Institute
 * of Science and Technology BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 
 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 
 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 
 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 * $Id: pat.c,v 1.11 2001/02/14 00:20:53 masayu-a Exp $
 */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include "pat.h"

/*
 * strcpy_tonl()
 */
static void strcpy_tonl(char *dst, char *src)
{
  while ((*dst++ = *src++) != '\n')
      ;
}

static int strcmp_tonl(char *s1, char *s2)
{
    for (; *s1 != '\n' && *s1 == *s2; s1++, s2++)
	;
    return (int)(*s1 - *s2);
}

/****************************************************
* pat_bits --- ʸλꤵ줿֤ΥӥåȤ֤
* 
* ѥ᡼
*   string --- ʸ
*   cbit --- ꤵ줿֡ʸΤĤΥӥåȹͤ
*           Ƭ()bit 0,1,2,3... ǻꤹ롣
*   len --- ʸĹstrlen򤤤äƤ󤸤Ѥ 900918
*
* ֤
*   0 / not 0
****************************************************/
static int pat_bits(char *string, int cbit, int len)
{
  int moji_idx = cbit / 8; /* ꤵ줿֤ʸܤ */
#if 0
  static int bitval[8] = { 0x80, 0x40, 0x20, 0x10, 0x08, 0x04, 0x02, 0x01 };
  printf("[%d,%d,%d]",cbit,moji_idx,len);
  if (moji_idx>len){printf("!!!!!!!!!!!!!!!");exit(1);}
#endif
  /* ꤵ줿 >= ʸĹΥå */
  if (moji_idx >= len)
      return 0;
  /* ȥåץΡɤΤȤ1֤(topɬ) */
  if (cbit < 0)
      return 1;
  return string[moji_idx] & (1 << (7 - cbit % 8));
}

static int pat_memcmp(unsigned char *s1, unsigned char *s2, int n)
{
    if (n == 2)
      return (s1[0] != s2[0] || s1[1] != s2[1]);
    else
      return memcmp(s1,s2,n);
}

/* key  checkbitӥåܤǺ˿ʬ */
#define get_next_node(node, key, checkbit, key_length) \
((pat_bits((key), (checkbit), (key_length))) ? (node)->right : (node)->left)

/****************************************************
* pat_search --- ѥȥꥷڤ򸡺
* 
* ѥ᡼
*   key --- 
*   result --- ̤롥
* 
* ֤
*   λ(ݥ)
*
****************************************************/
pat_node *pat_search(pat_t *pat, char *key, char **result)
{
    pat_node *top_ptr = pat->root;
    pat_node *tmp_ptr = NULL;
    pat_node *ptr = pat->root->right;
    pat_index_list *list;
    int checkbit;
    int key_length = strlen(key); /* ʸƤ */
    int match_len = 0; /* ǥޥåPrefixʸ */
    int result_last = 0;

    do {
	checkbit = ptr->checkbit;
	/* ߵӥåȤʤ */
	if(checkbit % SIKII_BIT == 0 && checkbit){ /* ñõ */
	    tmp_ptr = ptr->left;
#ifdef DEBUG
	    printf("\n[%d,%02x%02x]", checkbit, key[0], key[1]);
#endif
      /* ƬΡָФʬǥޥå󥰤Ԥʤ */
	    if (!pat_memcmp(key + match_len,
			    pat_get_text(pat,
					 (tmp_ptr->il).index)
			    + match_len, checkbit / 8 - match_len)) {
	/* Ĥ */
		match_len = checkbit / 8; /* ǥޥåPrefixʸ */
		list = &(tmp_ptr->il); /* ꥹǤμФ */
		while (list != NULL) {
		    result[result_last++] = pat_get_text(pat,list->index);
		    list = list->next;
		}
	    } else { /* ǼԤȯ */
		result[result_last] = NULL;
		return ptr;
	    }
	}

	/* key  checkbitӥåܤǺ˿ʬ */
	ptr = get_next_node(ptr, key, checkbit, key_length);
    } while (checkbit < ptr->checkbit);

    if (ptr != tmp_ptr || ptr == top_ptr) { /* λΡɤå */
	char *line = pat_get_text(pat,(ptr->il).index);
	/* bufferƬΡָФʬǥޥå󥰤Ԥʤ */
	/* ɤޤñPrefixå */
	if (!pat_memcmp(key + match_len,
			line + match_len, strlen(line) - match_len)) {
	    /* оñ줫ݤΥå */
	    if (match_len != key_length) {
		list = &(ptr->il); /* ꥹǤμФ */
		while(list != NULL){
		    result[result_last++] = pat_get_text(pat, list->index);
		    list = list->next;
		}
	    }
	}
    }
    result[result_last++] = NULL;

    return ptr;
}


/****************************************************
* pat_search_exact --- ѥȥꥷڤ򸡺(exact match)
* 
* ѥ᡼
*   key --- 
*   x_ptr --- ϰ(ݥ)
*   result --- ̤롥
* 
* ֤
*   λ(ݥ)
****************************************************/
pat_node *pat_search_exact(pat_t *pat, char *key, char **result)
{
    pat_node *x_ptr = pat->root;
    pat_node *ptr;
    pat_index_list *list;
    int key_length = strlen(key); /* ʸƤ */
    char *line;
    int result_last = 0;

    do {
	ptr = x_ptr;
	x_ptr = get_next_node(x_ptr, key, x_ptr->checkbit, key_length);
    } while (ptr->checkbit < x_ptr->checkbit);

    /* ե뤫ä */
    line = pat_get_text(pat, (x_ptr->il).index);

    /* bufferƬΡָФʬǥޥå󥰤Ԥʤ */
    if (strcmp(key, line) == 0){ /* ɤޤñΥå */
	list = &(x_ptr->il); /* ꥹǤμФ */
	while(list != NULL){
	    line = pat_get_text(pat,list->index);
	    result[result_last++] = line;
	    list = list->next;
	}
    }
    result[result_last] = NULL;

    return x_ptr;
}

/****************************************************
* pat_search4insert --- Ѥ˸
* 
* ѥ᡼
*   key --- 
*   node --- ϰ(ݥ)
* 
* ֤
*   λ(ݥ)
*
* 
*   ѿ prefix_str λؤ˥ץեåʸ롣
****************************************************/
static pat_node *pat_search4insert(char *key, pat_node *node)
{
    pat_node *tmp_node;
    int key_length = strlen(key); /* ʸƤ */

    do {
	tmp_node = node;
	node = get_next_node(node, key, node->checkbit, key_length);
    } while (tmp_node->checkbit < node->checkbit);

    return node;
}


/****************************************************
* pat_insert --- ѥȥꥷڤ˥ǡ
* 
* ѥ᡼
*   f --- ե
*   line --- ǡ(ƤڤʸǶڤƤ빽¤)
*   index --- ǡΥեΥǥå
*   x_ptr --- Τθγϰ
* 
* ֤
*   ̵!
****************************************************/
void pat_insert(pat_t *pat, char *line, long index)
{
    pat_node *x_ptr = pat->root;
    pat_node *t_ptr, *p_ptr, *new_ptr;
    int diff_bit;
    pat_index_list *new_l_ptr, *list, *mae_wo_sasu_ptr = NULL;
    int buffer_length;
    int key_length;
    char key[500];
    char buffer[50000]; /* ѥХåե */

    x_ptr = pat->root;

    strcpy(key,line);
    key_length = strlen(key); /* ʸƤ */

    /* õ */
    t_ptr = (pat_node*)pat_search4insert(key,x_ptr);

    if((t_ptr->il).index >= 0) {
	strcpy_tonl(buffer, pat_get_text(pat,(t_ptr->il).index));

	if(strncmp(key,buffer,strlen(key)) == 0){ /*  */
	    /* printf("%s: פΤ\n",buffer);
	       fflush(stdout); */

	    list = &(t_ptr->il);

	    while(list !=NULL){
		strcpy_tonl(buffer, pat_get_text(pat,list->index));
		if(strcmp_tonl(buffer,line)==0){
/*	if(strncmp(buffer,line,strlen(line))==0){*/
		    /* ƱΤΤ˥꥿ */
#if 0
		    fprintf(stderr,"%s: ƱΤΤ̵\n",buffer);
		    fflush(stderr);
#endif
		    return;
		}
		mae_wo_sasu_ptr = list;
		list = list->next;
	    }  /* λ list ϥꥹȤؤ */

	    /* ˤ륭Ƥ򤵤 */
	    new_l_ptr = pat_malloc_index_list();  /* indexlist */
	    new_l_ptr->index = index;
	    new_l_ptr->next = NULL;
	    mae_wo_sasu_ptr->next = new_l_ptr;

	    return;
	} else { /* פʤä buffer ˤΰפʤä */
	}
    } else { /* ǡ̵Ρɤ: ǽ˥ǡ򤤤줿Ȥ */
	buffer[0] = buffer[1] = '\0'; /* 16bit */
    }

    /* Ⱦͤ륭Ȥδ֤
       ǽ˰ۤʤ bit ΰ(diff_bit) */
    buffer_length = strlen(buffer);
    for (diff_bit=0;
	 !pat_bits(key, diff_bit, key_length)
	     == !pat_bits(buffer, diff_bit, buffer_length); diff_bit++)
	;/* ʸ */


    /* ֤(x_ptr)롣 */
    do {
	p_ptr = x_ptr;
	/* key  checkbitӥåܤǺ˿ʬ */
	x_ptr = get_next_node(x_ptr, key,x_ptr->checkbit,key_length);
    } while ((x_ptr->checkbit < diff_bit) 
	     && (p_ptr->checkbit < x_ptr->checkbit));

    /* Ρɤӥåꤹ롣 */
    new_ptr = pat_malloc_node(); /* Ρ */
    new_ptr->checkbit = diff_bit; /* åӥå */
    (new_ptr->il).index = index;
    (new_ptr->il).next = NULL;

    /* ȿꤹ롣 */
    /* ӥåȤ1ʤ鱦󥯤Τ֤ؤ0ʤ麸󥯡 */
    if (pat_bits(key, new_ptr->checkbit, key_length)){
	new_ptr->right = new_ptr;
	new_ptr->left = x_ptr;
    } else {
	new_ptr->left = new_ptr;
	new_ptr->right = x_ptr;
    }
    /* ӥåȤ1ʤ顢ƤαˤĤʤ0ʤ麸 */
    if (pat_bits(key,p_ptr->checkbit, key_length))
	p_ptr->right = new_ptr;
    else
	p_ptr->left = new_ptr;

    return;
}

/* This function is broken. */
/****************************************************
* pat_show_patfile --- ѥȥꥷڥǡ
*
* ѥ᡼
*   top_ptr --- ϥΡɤΰ(ݥ)
*   out_to --- (stdoutե)
* 
* ֤
*   ̵ѥȥꥷڥǡϡ
****************************************************/
void pat_show_patfile(pat_t *pat, FILE *out_to, char *prefix)
{
#if 0
    long idx = -1;
    pat_index_list *t_ptr;
    char word[100];
    char pftmp[100];
    char prefix_keep[100];
    pat_node top_ptr = pat->root;

    word[0] = '\0';

    strcpy(prefix_keep,prefix);

    /* ߵӥåȤΤȤ */
    if(top_ptr->checkbit % SIKII_BIT == 0 && top_ptr->checkbit != 0){
	strcpy(word, pat_get_line(Pat_dicfile[0],top_ptr->left->il_ptr->index));
	strcpy(pftmp,(word+strlen(prefix)));

/*
  printf("#@# %i\n",strlen(word));
  printf("### %i\n",strlen(pftmp));

  top_ptr->left->str = (char*)malloc(strlen(word)+1);
  strcpy(top_ptr->left->str,word);
*/
	top_ptr->left->str = (char*)malloc(strlen(pftmp)+1);
	strcpy(top_ptr->left->str,pftmp);

	strcat(prefix,pftmp);

    } else {
	/*  Subtree νդäѤǤʤкƵ*/
	if(top_ptr->checkbit < top_ptr->left->checkbit){
	    pat_show_patfile(top_ptr->left,out_to,prefix);}
	else {
	    if(top_ptr->left->il_ptr != NULL) {
		strcpy(word, pat_get_line(Pat_dicfile[0],top_ptr->left->il_ptr->index));
		strcpy(pftmp,(word+strlen(prefix)));

/*
  printf("#@# %i\n",strlen(word));
  printf("### %i\n",strlen(pftmp));

  top_ptr->left->str = (char*)malloc(strlen(word)+1);
  strcpy(top_ptr->left->str,word);
*/
		top_ptr->left->str = (char*)malloc(strlen(pftmp)+1);
		strcpy(top_ptr->left->str,pftmp);
	    }
	}

    }

    if(top_ptr->checkbit < top_ptr->right->checkbit){
	pat_show_patfile(top_ptr->right,out_to,prefix);}
    else {
	if(top_ptr->right->il_ptr != NULL) {
	    strcpy(word, pat_get_line(Pat_dicfile[0],top_ptr->right->il_ptr->index));
	    strcpy(pftmp,(word+strlen(prefix)));

/*
  printf("#@# %i\n",strlen(word));
  printf("### %i\n",strlen(pftmp));

  top_ptr->left->str = (char*)malloc(strlen(word)+1);
  strcpy(top_ptr->left->str,word);
*/
	    top_ptr->right->str = (char*)malloc(strlen(pftmp)+1);
	    strcpy(top_ptr->right->str,pftmp);
	}
    }

    strcpy(prefix,prefix_keep);
    return;
#endif
}
