/*
 * SKK is a simple Japanese input method
 *
 * Many many things are to be implemented!
 */
#include <sys/types.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#include <ctype.h>

#include "context.h"

extern LISP sym_t;


/*
 * |C0|C1| .. |Cnr_real_cands| ..              |Cnr_cands|
 * <-------should be saved --><-- cache of master dict -->
 */
struct skk_cand_array {
  char *okuri;
  int nr_cands;/* length of cands array allocated */
  int nr_real_cands;/* length of read from file part */
  char **cands;

  int is_used;
  struct skk_line *line;
};

struct skk_line {
  char *head;
  char okuri_head;
  /**/
  int nr_cand_array;
  struct skk_cand_array *cands;
  /**/
  int need_save;
  /**/
  struct skk_line *next;
};

static struct dic_info {
  void *addr;
  int first;
  int border;
  int size;
  struct skk_line head;
} *skk_dic;

static int
calc_line_len(char *s)
{
  int i;
  for (i = 0; s[i] != '\n'; i++);
  return i;
}


static int
is_okuri(char *str)
{
  char *b;
  b = strchr(str, ' ');
  if (!b) {
    return 0;
  }
  b--;
  if (isalpha(*b)) {
    return 1;
  }
  return 0;
}


static int
find_first_line(struct dic_info *di)
{
  char *s = di->addr;
  int off = 0;
  while ( off < di->size && s[off] == ';' ) {
    int l = calc_line_len(&s[off]);
    off += l + 1;
  }
  return off;
}

static int
find_border(struct dic_info *di)
{
  char *s = di->addr;
  int off = 0;
  while (1) {
    int l = calc_line_len(&s[off]);
    if (s[off] == ';') {
      off += l + 1;
      continue;
    }
    if (!is_okuri(&s[off])) {
      return off;
    }
    off += l + 1;
  }
  return 0;
}

static struct dic_info *
open_dic(const char *fn)
{
  struct dic_info *di;
  struct stat st;
  int fd;
  void *addr;
  if (lstat(fn, &st) == -1) {
    return NULL;
  }
  fd = open(fn, O_RDONLY);
  if (fd == -1) {
    return NULL;
  }
  addr = mmap(0, st.st_size, PROT_READ, MAP_SHARED, fd, 0);
  close(fd);
  if (addr == MAP_FAILED) {
    return NULL;
  }
  di = (struct dic_info *)malloc(sizeof(struct dic_info));
  di->addr = addr;
  di->size = st.st_size;
  di->first = find_first_line(di);
  di->border = find_border(di);
  di->head.next = NULL;
  return di;
}

static char *
find_line(struct dic_info *di, int off)
{
  char *ptr = di->addr;
  while (off > 0 && (ptr[off] != '\n' || ptr[off+1] == ';')) {
    off --;
  }
  if (off) {
    off ++;
  }
  return &ptr[off];
}

static char *
extract_entry(struct dic_info *di, int off, char *buf, int len)
{
  char *p = find_line(di, off);
  int i;
  if (p[0] == ';') {
    return NULL;
  }
  for (i = 0; i < len && p[i] != ' '; i++) {
    buf[i] = p[i];
  }
  buf[i] = 0;
  return buf;
}

static int
do_search(struct dic_info *di, char *s, int min,
	  int max, int d)
{
  char buf[256];
  char *r;
  int idx = (min + max) / 2;
  int c = 0;

  if (abs(max-min) < 4) {
    return -1;
  }
  r = extract_entry(di, idx, buf, 256);
  if (r) {
    c = strcmp(s, r);
  } else {
    return -1;
  }

  if (!c) {
    return idx;
  }
  if (c * d> 0) {
    return do_search(di, s, idx, max, d);
  } else {
    return do_search(di, s, min, idx, d);
  }
  return -1;
}

static char *
next_slash(char *str)
{
  int p = 0;
  while (*str && (*str != '/' || p == 1)) {
    if (*str == '[') {
      p = 1;
    }
    if (p == 1 && *str == ']') {
      p = 0;
    }
    str ++;
  }
  return str;
}

static char *
nth_candidate(char *str, int nth)
{
  char *p , *term;
  int i;
  for (i = 0; i <= nth; i++) {
    str = next_slash(str);
    if (*str == '/') {
      str++;
    }
  }
  if (!str) {
    return NULL;
  }
  if (*str == '/') {
    str++;
  }
  p = strdup(str);
  term = next_slash(p);
  *term = 0;
  return p;
}

static LISP
skk_dic_open(LISP fn_)
{
  char *fn = uim_get_c_string(fn_);
  if (!skk_dic) {
    skk_dic = open_dic(fn);
  }
  free(fn);
  return NIL;
}

static void
free_skk_line(struct skk_line *sl)
{
  int i, j;
  if (!sl) {
    return ;
  }
  for (i = 0; i < sl->nr_cand_array; i++) {
    struct skk_cand_array *ca = &sl->cands[i];
    for (j = 0; j < ca->nr_cands; j++) {
      free(ca->cands[j]);
    }
    free(ca->okuri);
    free(ca->cands);
  }
  free(sl->head);
  free(sl->cands);
}

static struct skk_cand_array *
find_candidate_array_from_line(struct skk_line *sl, char *okuri,
			       int create_if_notfound)
{
  int i;
  struct skk_cand_array *ca;
  if (!okuri || !strlen(okuri)) {
    return &sl->cands[0];
  }
  for (i = 1; i < sl->nr_cand_array; i++) {
    if (okuri && !strcmp(okuri, sl->cands[i].okuri)) {
      return &sl->cands[i];
    }
  }
  if (!create_if_notfound) {
    return &sl->cands[0];
  }
  /* allocate now */
  sl->nr_cand_array ++;
  sl->cands = realloc(sl->cands,
		      sizeof(struct skk_cand_array) * sl->nr_cand_array);
  ca = &sl->cands[sl->nr_cand_array - 1];
  ca->is_used = 0;
  ca->cands = 0;
  ca->nr_cands = 0;
  ca->nr_real_cands = 0;
  ca->okuri = strdup(okuri);
  ca->line = sl;
  return ca;
}

static void
push_back_candidate_to_array(struct skk_cand_array *ca, char *cand)
{
  ca->nr_cands++;
  ca->cands = realloc(ca->cands, sizeof(char *) * ca->nr_cands);
  ca->cands[ca->nr_cands - 1] = strdup(cand);
}

static void
merge_candidate_array(struct skk_line *sl, struct skk_cand_array *dst_ca)
{
  int i, j;
  struct skk_cand_array *src_ca;
  if (!sl) {
    return ;
  }
  src_ca = &sl->cands[0];
  if (src_ca == dst_ca) {
    return ;
  }
  for (i = 0; i < src_ca->nr_cands; i++) {
    int dup = 0;
    for (j = 0; j < dst_ca->nr_cands; j++) {
      if (!strcmp(src_ca->cands[i], dst_ca->cands[j])) {
	dup = 1;
      }
    }
    if (!dup) {
      push_back_candidate_to_array(dst_ca, src_ca->cands[i]);
    }
  }
}

static void
compose_line_parts(struct dic_info *di, struct skk_line *sl,
		   char *okuri, char *line)
{
  int i, nth;
  char *tmp;
  struct skk_cand_array *ca = find_candidate_array_from_line(sl, okuri, 1);

  nth = 0;
  do {
    tmp = nth_candidate(line, nth);
    if (tmp && strlen(tmp)) {
      if (tmp[0] == '[') {
	compose_line_parts(di, sl, nth_candidate(&tmp[1], -1), &tmp[1]);
      } else if (tmp[0] != ']') {
	push_back_candidate_to_array(ca, tmp);
	ca->nr_real_cands++;
      }
      nth++;
      free(tmp);
    } else {
      break;
    }
  } while (1);

}

/*
 * Compose skk line
 */
static struct skk_line *
compose_line(struct dic_info *di, char *word, char okuri_head, char *entry)
{
  struct skk_line *sl;

  sl = malloc(sizeof(struct skk_line));
  sl->need_save = 0;
  sl->head = strdup(word);
  sl->okuri_head = okuri_head;
  sl->nr_cand_array = 1;
  sl->cands = malloc(sizeof(struct skk_cand_array));
  sl->cands[0].okuri = NULL;
  sl->cands[0].cands = NULL;
  sl->cands[0].nr_cands = 0;
  sl->cands[0].nr_real_cands = 0;
  sl->cands[0].is_used = 0;
  sl->cands[0].line = sl;

  /* parse */
  compose_line_parts(di, sl, NULL, entry);

  return sl;
}

static void
add_line_to_cache(struct dic_info *di, struct skk_line *sl)
{
  sl->next = di->head.next;
  di->head.next = sl;
}



static struct skk_line *
skk_search_line_from_file(struct dic_info *di, char *s, char okuri_head)
{
  int n;
  char *p;
  int len;
  char *line;
  char *idx = alloca(strlen(s) + 2);
  struct skk_line *sl;

  if (!di) {
    return NULL;
  }
  sprintf(idx, "%s%c",s, okuri_head);
  if (okuri_head) {
    n = do_search(di, idx, di->first, di->border - 1, -1);
  } else {
    n = do_search(di, idx, di->border, di->size - 1, 1);
  }
  if (n == -1) {
    return NULL;
  }

  p = find_line(di, n);
  len = calc_line_len(p);
  line = malloc(len+1);
  line[0] = 0;
  strncat(line, p, len);
  sl = compose_line(di, s, okuri_head, line);
  free(line);
  return sl;
}

static struct skk_line *
skk_search_line_from_cache(struct dic_info *di, char *s, char okuri_head)
{
  struct skk_line *sl;

  if (!di) {
    return NULL;
  }
  /* search from cache */
  for (sl = di->head.next; sl; sl = sl->next) {
    if (!strcmp(sl->head, s) &&
	sl->okuri_head == okuri_head) {
      return sl;
    }
  }
  return NULL;
}


static struct skk_cand_array *
find_candidate_array(struct dic_info *di, char *s,
		     char okuri_head, char *okuri)
{
  struct skk_line *sl, *sl_file;
  struct skk_cand_array *ca;
  int from_file = 0;

  sl = skk_search_line_from_cache(skk_dic, s, okuri_head);
  if (!sl) {
    sl = skk_search_line_from_file(skk_dic, s, okuri_head);
    if (!sl) {
      return NULL;
    }
    from_file = 1;
    add_line_to_cache(di, sl);
    
  }
  ca = find_candidate_array_from_line(sl, okuri, 0);

  if (!ca->is_used) {
    merge_candidate_array(sl, ca);
    ca->is_used = 1;
    if (!from_file) {
      sl_file = skk_search_line_from_file(skk_dic, s, okuri_head);
      merge_candidate_array(sl_file, ca);
      free_skk_line(sl_file);
    }
  }

  return ca;
}

static struct skk_cand_array *
find_cand_array_lisp(LISP head_, LISP okuri_head_, LISP okuri_)
{
  int n;
  char o;
  char *hs;
  char *okuri = NULL;
  struct skk_cand_array *ca;

  hs = get_c_string(head_);
  if (okuri_ != NIL) {
    okuri = uim_get_c_string(okuri_);
  }
  if (okuri_head_ == NIL) {
    o = 0;
  } else {
    char *os= get_c_string(okuri_head_);
    o = os[0];
  }

  ca = find_candidate_array(skk_dic, hs, o, okuri);
  free(okuri);
  return ca;
}


static LISP
skk_get_entry(LISP head_, LISP okuri_head_, LISP okuri_)
{
  struct skk_cand_array *ca = find_cand_array_lisp(head_, okuri_head_, okuri_);
  if (ca) {
    return sym_t;
  }
  return NIL;
}

static LISP
skk_get_nth_candidate(LISP nth_, LISP head_,
		      LISP okuri_head_, LISP okuri_)
{
  int n;
  struct skk_cand_array *ca;
  char *str;

  ca = find_cand_array_lisp(head_, okuri_head_, okuri_);
  n = get_c_long(nth_);
  if (ca && ca->nr_cands > n) {
    str = ca->cands[n];
    return strcons(strlen(str), str);
  }
  
  return NIL;
}

static LISP
skk_get_nr_candidates(LISP head_, LISP okuri_head_, LISP okuri_)
{
  struct skk_cand_array *ca;
  int n = 0;
  ca = find_cand_array_lisp(head_, okuri_head_, okuri_);

  if (ca) {
    n = ca->nr_cands;
  }
  return flocons(n);
}

static void
reorder_candidate(struct skk_cand_array *ca, char *str)
{
  int i;
  int nth = 0;
  char *tmp;
  for (i = 0; i < ca->nr_cands; i++) {
    if (!strcmp(str, ca->cands[i])) {
      nth = i;
    }
  }
  if (nth == 0) {
    return ;
  }

  tmp = ca->cands[nth];
  for (i = nth; i > 0; i--) {
    ca->cands[i] = ca->cands[i - 1];
  }
  ca->cands[0] = tmp;
  /**/
  if (nth >= ca->nr_real_cands) {
    ca->nr_real_cands ++;
  }
}

static LISP
skk_commit_candidate(LISP head_, LISP okuri_head_,
		     LISP okuri_, LISP nth_)
{
  int nth;
  struct skk_cand_array *ca;
  char *str;

  nth = get_c_long(nth_);
  if (nth == 0) {
    return NIL;
  }

  ca = find_cand_array_lisp(head_, okuri_head_, okuri_);
  if (!ca || ca->nr_cands <= nth) {
    return NIL;
  }

  str = ca->cands[nth];
  reorder_candidate(ca, str);

  if (okuri_ != NIL) {
    ca = find_cand_array_lisp(head_, okuri_head_, NIL);
    if (!ca || ca->nr_cands <= nth) {
      return NIL;
    }
    reorder_candidate(ca, str);
  }

  ca->line->need_save = 1;

  return NIL;
}

static LISP
skk_learn_word(LISP head_, LISP okuri_head_, LISP okuri_, LISP word_)
{
  return NIL;
}

static void
parse_dic_line(char *line)
{
  char *buf, *sep;
  struct skk_line *sl;
  if (!skk_dic) {
    return ;
  }

  buf = alloca(strlen(line)+1);
  strcpy(buf, line);
  sep = strchr(buf, ' ');
  if (!sep) {
    return ;
  }
  if (sep == buf) {
    return ;
  }
  *sep = 0;
  if (islower(sep[-1])) {
    char okuri_head = sep[-1];
    sep[-1] = 0;
    sl = compose_line(skk_dic, buf, okuri_head, line);
  } else {
    sl = compose_line(skk_dic, buf, 0, line);
  }
  sl->need_save = 1;
  add_line_to_cache(skk_dic, sl);
}

static void
write_out_array(FILE *fp, struct skk_cand_array *ca)
{
  int i;
  if (ca->okuri) {
    fprintf(fp, "[%s/", ca->okuri);
    for (i = 0; i < ca->nr_real_cands; i++) {
      fprintf(fp, "%s/", ca->cands[i]);
    }
    fprintf(fp, "]/");
  } else {
    for (i = 0; i < ca->nr_real_cands; i++) {
      fprintf(fp, "%s/", ca->cands[i]);
    }
  }
}

static LISP
skk_lib_save_personal_dictionary(LISP fn_)
{
  FILE *fp;
  char *fn = uim_get_c_string(fn_);
  struct skk_line *sl;

  fp = fopen(fn, "w");
  free(fn);
  if (!fp) {
    return NIL;
  }

  for (sl = skk_dic->head.next; sl; sl = sl->next) {
    struct skk_cand_array *ca;
    int i;
    fprintf(fp, "%s", sl->head);
    if (sl->okuri_head) {
      fprintf(fp, "%c /", sl->okuri_head);
    } else {
      fprintf(fp, " /");
    }
    for (i = 0; i < sl->nr_cand_array; i++) {
      ca = &sl->cands[i];
      write_out_array(fp, ca);
    }
    fprintf(fp, "\n");
  }
  fclose(fp);
  return NIL;
}

static LISP
skk_lib_read_personal_dictionary(LISP fn_)
{
  char *fn = get_c_string(fn_);
  FILE *fp = fopen(fn, "r");
  char buf[4096];
  int err_flag = 0;

  if (!fp) {
    return NIL;
  }
  while (fgets(buf, 4096, fp)) {
    int len = strlen(buf);
    if (buf[len-1] == '\n') {
      if (err_flag == 0) {
	if (buf[0] != ';') {
	  buf[len-1] = 0;
	  parse_dic_line(buf);
	}
      } else {
	/* erroneous line ends here */
	err_flag = 0;
      }
    } else {
      err_flag = 1;
    }
  }
  fclose(fp);
  return sym_t;
}

static LISP
skk_split_string(LISP str_)
{
  char *str = get_c_string(str_);
  unsigned char *cur = str;
  LISP res = NIL;
  while (*cur) {
    LISP tmp;
    char buf[3];
    int len;
    buf[2] = 0;
    if (*cur > 127) {
      /* 2 bytes */
      buf[0] = cur[0];
      buf[1] = cur[1];
      len = 2;
      cur ++;
    } else {
      buf[0] = cur[0];
      buf[1] = 0;
      len = 1;
    }
    res = cons (strcons(len, buf), res);
    cur ++;
  }
  return res;
}

void uim_init_skk_dic()
{
  init_subr_1("skk-lib-dic-open", skk_dic_open);
  init_subr_1("skk-lib-read-personal-dictionary", skk_lib_read_personal_dictionary);
  init_subr_1("skk-lib-save-personal-dictionary", skk_lib_save_personal_dictionary);
  init_subr_3("skk-lib-get-entry", skk_get_entry);
  init_subr_4("skk-lib-get-nth-candidate", skk_get_nth_candidate);
  init_subr_3("skk-lib-get-nr-candidates", skk_get_nr_candidates);
  init_subr_4("skk-lib-commit-candidate", skk_commit_candidate);
  init_subr_4("skk-lib-learn-word", skk_learn_word);
  init_subr_1("skk-lib-split-string", skk_split_string);
}
