/**************************************************************************/
/*  Mana : A kana(romaji)-kanji conversion engine using ChaSen algorithm.    */
/*  Copyright (C) 2003, 2004  Yamagata Yoriyuki                           */
/*                                                                        */
/*  This program is free software; you can redistribute it and/or modify  */
/*  it under the terms of the GNU General Public License as published by  */
/*  the Free Software Foundation; either version 2 of the License, or (at  */
/*  your option) any later version.                                       */
/*                                                                        */
/*  This program is distributed in the hope that it will be useful, but   */
/*  WITHOUT ANY WARRANTY; without even the implied warranty of            */
/*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU     */
/*  General Public License for more details.                              */
/**************************************************************************/

/* $Id: darts.cpp 71 2005-05-01 19:59:33Z yori $ */

#include <string>
#include <vector>
#include <map>
#include <iostream>
#include <darts.h>
extern "C" {
#include <caml/memory.h>
#include <caml/mlvalues.h>
#include <caml/alloc.h>
#include "chalib.h"
#include "dartsdic.h"
#include "tools.h"
}

typedef Darts::DoubleArrayImpl<char, unsigned char, long, unsigned long>
DoubleArrayL;

struct _darts_t {
    DoubleArrayL *da;
    cha_mmap_t *da_mmap;
    cha_mmap_t *lex_mmap;
    cha_mmap_t *dat_mmap;
};

#define DIC_BUFSIZ 256

extern "C" value darts_open (value dname, value lexname, value datname){
  CAMLparam3(dname, lexname, datname);
  darts_t *da;

  da = da_open (String_val(dname), 
		String_val(lexname), 
		String_val(datname));

  CAMLreturn ((value)da);
}

extern "C" value get_darts (value v){
  CAMLparam1 (v);
  int dic_no;

  dic_no = Long_val(v);
  CAMLreturn ((value)Da_dicfile[dic_no]);
}

extern "C" value ndicfile (value unit){
  CAMLparam1(unit);
  CAMLreturn (Val_long(Da_ndicfile));
}

extern "C" value darts_lookup (value da, value string, value pos, value len){
  CAMLparam4(da, string, pos, len);
  CAMLlocal1(mrph_array);
  int count = 0;
  int index;
  da_lex_t lex_data[DIC_BUFSIZ];
  mrph_t *mrph_buffer[16 * DIC_BUFSIZ];
  int nlex, i, j, keyword_len;

  index = ((darts_t *)da)->da
    ->exactMatchSearch(String_val(string) + Long_val(pos),
		       Long_val(len));
  
  if(index < 0){
    CAMLreturn (alloc(0, 0));
  }

  nlex = da_get_lex((darts_t*)da,
		    index,
		    lex_data, &keyword_len);

  for (j = 0; j < nlex; j++) {
    mrph_t *new_mrph;
    char *keyword;
    keyword = copy_new_string(String_val(string)+Long_val(pos), 
			      keyword_len);
    
    new_mrph = (mrph_t *)malloc (sizeof (mrph_t));
    new_mrph->keyword = keyword;
    new_mrph->keyword_len = keyword_len;
    new_mrph->is_undef = 0;
    new_mrph->darts = (darts_t *)da;
    memcpy(new_mrph, lex_data + j, sizeof(da_lex_t));
    mrph_buffer[count++] = new_mrph;
  }


  mrph_array = alloc(count, 0);
  for (i = 0; i < count; i++){
    Store_field (mrph_array, i, (value)mrph_buffer[i]);
  }
  
  CAMLreturn (mrph_array);
  
}

extern "C" value darts_lookup_prefix (value da, 
				      value string, 
				      value pos, 
				      value len){
  CAMLparam4(da, string, pos, len);
  CAMLlocal2(pair, mrph_array);
  int count = 0;
  long index_buffer[DIC_BUFSIZ];
  da_lex_t lex_data[DIC_BUFSIZ];
  mrph_t *mrph_buffer[16 * DIC_BUFSIZ];
  int mrph_len_buffer[16 * DIC_BUFSIZ];
  int i, num;

  num = ((darts_t *)da)->da
    ->commonPrefixSearch(String_val(string) + Long_val(pos),
			 index_buffer, 
			 DIC_BUFSIZ, 
			 Long_val(len));

  for (i = 0; i < num; i++) {
    int nlex, j, keyword_len;
    nlex = da_get_lex((darts_t*)da,
		      index_buffer[i],
		      lex_data, &keyword_len);

    for (j = 0; j < nlex; j++) {
      mrph_t *new_mrph;
      char *keyword;
      keyword = copy_new_string(String_val(string)+Long_val(pos), 
				keyword_len);
      
      new_mrph = (mrph_t *)malloc (sizeof (mrph_t));
      new_mrph->keyword = keyword;
      new_mrph->keyword_len = keyword_len;
      new_mrph->is_undef = 0;
      new_mrph->darts = (darts_t *)da;
      memcpy(new_mrph, lex_data + j, sizeof(da_lex_t));
      mrph_buffer[count] = new_mrph;
      mrph_len_buffer[count++] = keyword_len;
    }
  }

  mrph_array = alloc(count, 0);
  for (i = 0; i < count; i++){
    pair = alloc(2, 0);
    Store_field (pair, 0, (value)mrph_buffer[i]);
    Store_field (pair, 1, Val_long(mrph_len_buffer[i]));
    Store_field (mrph_array, i, pair);
  }

  CAMLreturn (mrph_array);

}
