/*
 * cannadicΥե뤫鼭Υ
 *
 * Funded by IPA̤Ƨեȥ¤ 2001 8/22
 */
/*
 * ɤߤindexȤʻѴʸ(=entry)򸡺
 * 빽¤ˤʤäƤ롣
 */

/* $Id: mkdic.c,v 1.1.1.1 2001/12/18 08:32:10 yusuke Exp $ */

#include <sys/types.h>
#include <netinet/in.h>
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>

#include <xstr.h>
#include <wtype.h>

#define MAX_LINE_LEN 1024
#define WORDS_PER_PAGE 64
#define NR_HEADER_SECTIONS 8
#define SECTION_ALIGNMENT 8

#define DEFAULT_FN "anthy.dic"
static char *output_fn = DEFAULT_FN;

static const char *progname;
static FILE *page_out, *page_index_out, *entry_index_out, *entry_out;

/* νΥե򥪡ץ󤹤 */
static void open_output_files()
{
  if (!(page_index_out	= tmpfile ()) ||
      !(page_out	= tmpfile ()) ||
      !(entry_index_out	= tmpfile ()) ||
      !(entry_out	= tmpfile ()))
    {
      fprintf (stderr, "%s: cannot open temporary file: %s\n",
	       progname, strerror (errno));
      exit (2);
    }
}

/* 2ĤʸζʬĹ */
static int common_len(xstr *s1, xstr *s2)
{
  int m,i;
  if (!s1 || !s2) {
    return 0;
  }
  if (s1->len < s2->len) {
    m = s1->len;
  }else{
    m = s2->len;
  }
  for (i = 0; i < m; i++) {
    if (s1->str[i] != s2->str[i]) {
      return i;
    }
  }
  return m;
}

/* ͥåȥbyteorder4bytes񤭽Ф */
static void write_nl(FILE *fp, int i)
{
  i = htonl(i);
  fwrite(&i, sizeof(int), 1, fp);
}

/*
 * 2ĤʸκʬϤ
 * AAA ABBB Ȥ2Ĥʸ򸫤ˤ
 * ABBBAAAΤ2ʸäBBBդΤȤ
 * \0x2BBBȽϤ롣
 */
static int output_diff(xstr *p, xstr *c)
{
  int i, m, l = 1;
  m = common_len(p, c);
  if (p && p->len > m) {
    fprintf(page_out, "%c", p->len - m + 1);
  }else{
    fprintf(page_out, "%c", 1);
  }
  for (i = m; i < c-> len; i++) {
    char buf[3];
    l += sputxchar(buf, c->str[i]);
    fputs(buf, page_out);
  }
  return l;
}

static void print_usage()
{
  printf("please use mkanthydic command.\n");
  exit(0);
}

static void parse_args(int argc, char **argv)
{
  int i;
  for (i = 1; i < argc; i++) {
    if (!strcmp(argv[i], "--help")) {
      print_usage();
    }
    if (i + 1 < argc && !strcmp(argv[i], "-o")) {
      output_fn = argv[i + 1];
      i++;
    }
  }
}

static char *read_line(char *buf)
{
  while(fgets(buf, MAX_LINE_LEN, stdin)) {
    if (buf[0] != '#') {
      int len = strlen(buf);
      if (buf[len - 1] == '\n') {
	buf[len - 1] = 0;
      }
      return buf;
    }
  }
  return NULL;
}

static xstr *get_index(char *buf)
{
  char *sp;
  xstr *xs;
  sp = strchr(buf, ' ');
  *sp = 0;
  xs = cstr_to_xstr(buf);
  *sp = ' ';
  return xs;
}

static char *get_entry(char *buf)
{
  char *sp;
  sp = strchr(buf, ' ');
  while(*sp == ' ') {
    sp ++;
  }
  return sp;
}

static void begin_new_page(int i)
{
  fputc(0, page_out);
  write_nl(page_index_out, i);
}

static void output_entry_index(int i)
{
  write_nl(entry_index_out, i);
}

static int output_entry(char *buf)
{
  if (!buf) {
    fputc(0, entry_out);
    return 1;
  }
  return fprintf(entry_out, "%s", buf);
}

static void do_output()
{
  xstr *prev = NULL, *cur;
  char buf[MAX_LINE_LEN];
  char *ent;
  int count = 0;
  int entry_index = 0;
  int page_index = 0;

  write_nl(page_index_out, page_index);

  while(read_line(buf) && (cur = get_index(buf))) {
    ent = get_entry(buf);
    if (cur->len > 30) {
      free_xstr(cur);
      continue;
    }
    if (prev && !xstrcmp(prev, cur)) {
      /* Ʊñ */
      entry_index += output_entry(" ");
      entry_index += output_entry(ent);
    } else {
      /* ñ */
      if ((count % WORDS_PER_PAGE) == 0 && count) {
	/* ڡ */
	page_index ++;
	begin_new_page(page_index);
	free_xstr(prev);
	prev = NULL;
      }
      page_index += output_diff(prev, cur);
      if (count) {
	entry_index += output_entry(NULL);
      }
      output_entry_index(entry_index);
      entry_index += output_entry(ent);
      count ++;
    }
    if (prev) {
      free_xstr(prev);
    }
    prev = cur;
  }
  fputc(0, entry_out);
  write_nl(page_index_out, 0);
  write_nl(entry_index_out, entry_index);
  printf("Total %d words (%d pages).\n", count, count / WORDS_PER_PAGE + 1);
}

static int get_size (FILE *fp)
{
  return (ftell (fp) + SECTION_ALIGNMENT - 1) & -SECTION_ALIGNMENT;
}

static void copy_file (FILE *in, FILE *out)
{
  int i;
  size_t nread;
  char buf[BUFSIZ];

  /* Pad OUT to the next aligned offset.  */
  for (i = ftell (out); i & (SECTION_ALIGNMENT - 1); i++)
    fputc (0, out);

  /* Copy the contents.  */
  rewind (in);
  while ((nread = fread (buf, 1, sizeof buf, in)) > 0)
    fwrite (buf, 1, nread, out);
}

static void link_dics()
{
  FILE *fp;
  int buf[NR_HEADER_SECTIONS];
  int i;

  fp = fopen (output_fn, "w");
  if (!fp)
    {
      fprintf (stderr, "%s: cannot open dictionary file for output: %s\n",
	       progname, strerror (errno));
      exit (1);
    }

  buf[0] = NR_HEADER_SECTIONS * sizeof(int);
  buf[1] = 0;
  buf[2] = buf[0];		/* ȥΥǥåΤ륪եå */
  buf[3] = buf[2] + get_size (entry_index_out);	/* ȥΤ륪եå */
  buf[4] = buf[3] + get_size (entry_out);	/* ڡΤ륪եå */
  buf[5] = buf[4] + get_size (page_out);	/* ڡΥǥåΤ
						   եå */
  for (i = 6; i < NR_HEADER_SECTIONS; i++)
    buf[i] = 0;

  for (i = 0; i < NR_HEADER_SECTIONS; i++) {
    write_nl(fp, buf[i]);
  }
  copy_file (entry_index_out, fp);
  copy_file (entry_out, fp);
  copy_file (page_out, fp);
  copy_file (page_index_out, fp);
}


int main(int argc, char **argv)
{
  parse_args(argc, argv);
  open_output_files();
  do_output();

  link_dics();
  return 0;
}
