/*
 * SKK is a simple Japanese input method
 *
 * Many many things are to be implemented!
 */
#include <sys/types.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#include <ctype.h>

#include "context.h"

extern LISP sym_t;


/*
 * |C0|C1| .. |Cnr_real_cands| ..              |Cnr_cands|
 * <-------should be saved --><-- cache of master dict -->
 */
struct skk_cand_array {
  char *okuri;
  int nr_cands;/* length of cands array*/
  int nr_real_cands;/* length of read from file part */
  char **cands;
};

struct skk_line {
  char *head;
  char okuri_head;
  int nr_cand_array;
  struct skk_cand_array *cands;
  struct skk_line *next;
};

static struct dic_info {
  void *addr;
  int border;
  int size;
  struct skk_line head;
} *skk_dic;

static int
calc_line_len(char *s)
{
  int i;
  for (i = 0; s[i] != '\n'; i++);
  return i;
}


static int
is_okuri(char *str)
{
  char *b;
  b = strchr(str, ' ');
  if (!b) {
    return 0;
  }
  b--;
  if (isalpha(*b)) {
    return 1;
  }
  return 0;
}

static int
find_border(struct dic_info *di)
{
  char *s = di->addr;
  int off = 0;
  while (1) {
    int l = calc_line_len(&s[off]);
    if (s[off] == ';') {
      off += l +1;
      continue;
    }
    if (!is_okuri(&s[off])) {
      return off;
    }
    off += l + 1;
  }
  return 0;
}

static struct dic_info *
open_dic(const char *fn)
{
  struct dic_info *di;
  struct stat st;
  int fd;
  void *addr;
  if (lstat(fn, &st) == -1) {
    return NULL;
  }
  fd = open(fn, O_RDONLY);
  if (fd == -1) {
    return NULL;
  }
  addr = mmap(0, st.st_size, PROT_READ, MAP_SHARED, fd, 0);
  close(fd);
  if (addr == MAP_FAILED) {
    return NULL;
  }
  di = (struct dic_info *)malloc(sizeof(struct dic_info));
  di->addr = addr;
  di->size = st.st_size;
  di->border = find_border(di);
  di->head.next = NULL;
  return di;
}

static char *
find_line(struct dic_info *di, int off)
{
  char *ptr = di->addr;
  while (off > 0 && ptr[off] != '\n') {
    off --;
  }
  if (off) {
    off ++;
  }
  return &ptr[off];
}

static char *
extract_entry(struct dic_info *di, int off, char *buf, int len)
{
  char *p = find_line(di, off);
  int i;
  if (p[0] == ';') {
    return NULL;
  }
  for (i = 0; i < len && p[i] != ' '; i++) {
    buf[i] = p[i];
  }
  buf[i] = 0;
  return buf;
}

static int
do_search(struct dic_info *di, char *s, int min,
	  int max, int d)
{
  char buf[256];
  char *r;
  int idx = (min + max) / 2;
  int c = 0;

  if (abs(max-min) < 4) {
    return -1;
  }
  r = extract_entry(di, idx, buf, 256);
  if (r) {
    c = strcmp(s,r);
  } else {
    return -1;
  }

  if (!c) {
    return idx;
  }
  if (c * d> 0) {
    return do_search(di, s, idx, max, d);
  } else {
    return do_search(di, s, min, idx, d);
  }
  return -1;
}

static char *
skk_search_line_from_file(struct dic_info *di, char *s, char okuri_head)
{
  int n;
  char *p;
  int len;
  char *line;
  char *idx = alloca(strlen(s) + 2);
  if (!di) {
    return "";
  }
  sprintf(idx, "%s%c",s, okuri_head);
  printf("INDEX=%s\n", idx);
  if (okuri_head) {
    n = do_search(di, idx, 0, di->border - 1, -1);
  } else {
    n = do_search(di, idx, di->border, di->size - 1, 1);
  }
  if (n == -1) {
    printf("not found\n");
    return NULL;
  }
  p = find_line(di, n);
  len = calc_line_len(p);
  line = malloc(len+1);
  line[0] = 0;
  strncat(line, p, len);
  return line;
}

char *next_slash(char *str)
{
  int p = 0;
  while (*str && (*str != '/' || p == 1)) {
    if (*str == '[') {
      p = 1;
    }
    if (p == 1 && *str == ']') {
      p = 0;
    }
    str ++;
  }
  return str;
}

static char *
nth_candidate(char *str, int nth)
{
  char *p , *term;
  int i;
  for (i = 0; i <= nth; i++) {
    str = next_slash(str);
    if (*str == '/') {
      str++;
    }
  }
  if (!str) {
    return NULL;
  }
  if (*str == '/') {
    str++;
  }
  p = strdup(str);
  term = next_slash(p);
  *term = 0;
  return p;
}

static LISP
skk_dic_open(LISP fn)
{
  char *s = uim_get_c_string(fn);
  if (!skk_dic) {
    skk_dic = open_dic(s);
  }
  free(s);
  return NIL;
}

static struct skk_cand_array *
find_candidate_array(struct skk_line *sl, char *okuri)
{
  int i;
  struct skk_cand_array *ca;
  if (!okuri) {
    return &sl->cands[0];
  }
  for (i = 1; i < sl->nr_cand_array; i++) {
    if (okuri && !strcmp(okuri, sl->cands[i].okuri)) {
      return &sl->cands[i];
    }
  }
  /* allocate now */
  sl->nr_cand_array ++;
  sl->cands = realloc(sl->cands,
		      sizeof(struct skk_cand_array) * sl->nr_cand_array);
  ca = &sl->cands[sl->nr_cand_array - 1];
  ca->cands = 0;
  ca->nr_cands = 0;
  ca->nr_real_cands = 0;
  ca->okuri = strdup(okuri);
  return ca;
}

static void
push_back_candidate_to_array(struct skk_cand_array *ca, char *cand)
{
  ca->nr_cands++;
  ca->cands = realloc(ca->cands, sizeof(char *) * ca->nr_cands);
  ca->cands[ca->nr_cands - 1] = cand;
}

static void
compose_line_parts(struct dic_info *di, struct skk_line *sl,
		   char *okuri, char *line)
{
  int i, nth;
  char *tmp;
  struct skk_cand_array *ca = find_candidate_array(sl, okuri);

  nth = 0;
  do {
    tmp = nth_candidate(line, nth);
    if (tmp && strlen(tmp)) {
      if (tmp[0] == '[') {
	tmp ++;
	compose_line_parts(di, sl, nth_candidate(tmp, -1), tmp);
      } else if (tmp[0] != ']') {
	push_back_candidate_to_array(ca, tmp);
	ca->nr_real_cands++;
      }
      nth++;
    } else {
      break;
    }
  } while (1);
}

/*
 * Compose skk line
 */
static struct skk_line *
compose_line(struct dic_info *di, char *word, char okuri_head, char *entry)
{
  struct skk_line *sl;


  sl = malloc(sizeof(struct skk_line));
  sl->head = strdup(word);
  sl->okuri_head = okuri_head;
  sl->nr_cand_array = 1;
  sl->cands = malloc(sizeof(struct skk_cand_array));
  sl->cands[0].okuri = NULL;
  sl->cands[0].cands = NULL;
  sl->cands[0].nr_cands = 0;

  /* parse */
  compose_line_parts(di, sl, NULL, entry);

  /* link */
  sl->next = di->head.next;
  di->head.next = sl;
  return sl;
}

static struct skk_line *
skk_search_line(struct dic_info *di, char *s, char okuri_head, char *okuri)
{
  struct skk_line *sl;
  char *res;

  if (!di) {
    return NULL;
  }
  /* search from cache */
  for (sl = di->head.next; sl; sl = sl->next) {
    if (!strcmp(sl->head, s) &&
	sl->okuri_head == okuri_head) {
      return sl;
    }
  }
  res = skk_search_line_from_file(di, s, okuri_head);
  if (res) {
    return compose_line(di, s, okuri_head, res);
  }

  return NULL;
}

static LISP
skk_get_entry(LISP head_, LISP okuri_head_, LISP okuri_)
{
  char *head_str, o = 0;
  char *okuri_str;
  struct skk_line *sl;

  if (okuri_head_ == NIL) {
    o = 0;
  } else {
    char *os = uim_get_c_string(okuri_head_);
    o = os[0];
    free(os);
  }

  head_str = uim_get_c_string(head_);
  if (okuri_) {
    okuri_str = uim_get_c_string(okuri_);
  } else {
    okuri_str = NULL;
  }

  sl = skk_search_line(skk_dic, head_str, o, okuri_str);
  free(head_str);
  if (okuri_str) {
    free(okuri_str);
  }

  if (!sl) {
    return NIL;
  }

  return sym_t;
}

static LISP
skk_get_nth_candidate(LISP nth_, LISP head_,
		      LISP okuri_head_, LISP okuri_)
{
  int n;
  char o;
  char *hs;
  char *okuri = NULL;
  LISP ret;
  struct skk_line *sl;

  hs = get_c_string(head_);
  n = get_c_long(nth_);
  if (okuri_ != NIL) {
    okuri = uim_get_c_string(okuri_);
  }
  if (okuri_head_ == NIL) {
    o = 0;
  } else {
    char *os= get_c_string(okuri_head_);
    o = os[0];
  }

  sl = skk_search_line(skk_dic, hs, o, okuri);
  free(okuri);

  if (sl && sl->cands[0].nr_cands > n) {
    char *str = sl->cands[0].cands[n];
    return strcons(strlen(str), str);
  }
   return NIL;
}

static LISP
skk_get_nr_candidates(LISP head_, LISP okuri_head_, LISP okuri_)
{
  int n;
  char o;
  char *hs;
  char *okuri = NULL;
  LISP ret;
  struct skk_line *sl;

  hs = get_c_string(head_);
  if (okuri_ != NIL ) {
    okuri = uim_get_c_string(okuri_);
  }
  if (okuri_head_ == NIL) {
    o = 0;
  } else {
    char *os= get_c_string(okuri_head_);
    o = os[0];
  }

  sl = skk_search_line(skk_dic, hs, o, okuri);
  free(okuri);
  return flocons(sl->cands[0].nr_cands);
}

static LISP
skk_commit_candidate(LISP head_, LISP okuri_head_,
		     LISP okuri_, LISP nth_)
{
  int nth;
  char o;
  char *head;
  char *okuri = NULL;
  struct skk_line *sl;

  head = get_c_string(head_);
  if (okuri_ != NIL) {
    okuri = uim_get_c_string(okuri_);
  }
  nth = get_c_long(nth_);
  if (okuri_head_ == NIL) {
    o = 0;
  } else {
    char *os = get_c_string(okuri_head_);
    o = os[0];
  }

  sl = skk_search_line(skk_dic, head, o, okuri);
  free(okuri);

  if (sl && sl->cands[0].nr_cands > nth) {
    char *tmp;
    int i;
    tmp = sl->cands[0].cands[nth];
    for (i = nth; i > 0; i--) {
      sl->cands[0].cands[i] = sl->cands[0].cands[i - 1];
    }
    sl->cands[0].cands[0] = tmp;
  }
  return NIL;
}

void
parse_dic_line(char *line)
{
  char *buf, *sep;
  if (!skk_dic) {
    return ;
  }

  buf = alloca(strlen(line)+1);
  strcpy(buf, line);
  sep = strchr(buf, ' ');
  if (!sep) {
    return ;
  }
  if (sep == buf) {
    return ;
  }
  *sep = 0;
  if (islower(sep[-1])) {
    compose_line(skk_dic, buf, sep[-1], line);
  } else {
    compose_line(skk_dic, buf, 0, line);
  }
}

static LISP
skk_lib_read_personal_dictionary(LISP fn_)
{
  char *fn = get_c_string(fn_);
  FILE *fp = fopen(fn, "r");
  char buf[4096];
  int err_flag = 0;

  if (!fp) {
    return NIL;
  }
  while (fgets(buf, 4096, fp)) {
    int len = strlen(buf);
    if (buf[len-1] == '\n') {
      if (err_flag == 0) {
	if (buf[0] != ';') {
	  buf[len-1] = 0;
	  parse_dic_line(buf);
	}
      } else {
	/* erroneous line ends here */
	err_flag = 0;
      }
    } else {
      err_flag = 1;
    }
  }
  fclose(fp);
  return sym_t;
}

void uim_init_skk_dic()
{
  init_subr_1("skk-lib-dic-open", skk_dic_open);
  init_subr_3("skk-lib-get-entry", skk_get_entry);
  init_subr_4("skk-lib-get-nth-candidate", skk_get_nth_candidate);
  init_subr_3("skk-lib-get-nr-candidates", skk_get_nr_candidates);
  init_subr_4("skk-lib-commit-candidate", skk_commit_candidate);
  init_subr_1("skk-lib-read-personal-dictionary", skk_lib_read_personal_dictionary);
}
