/* Copyright(C) 2004 Brazil

  This library is free software; you can redistribute it and/or
  modify it under the terms of the GNU Lesser General Public
  License as published by the Free Software Foundation; either
  version 2.1 of the License, or (at your option) any later version.
  
  This library is distributed in the hope that it will be useful,
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  Lesser General Public License for more details.
  
  You should have received a copy of the GNU Lesser General Public
  License along with this library; if not, write to the Free Software
  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
*/
#include "lib/senna_in.h"
#include "inv.h"
#include <stdio.h>
#include <string.h>

/* copied from inv.c */

#define NEXT_ADDR(p) (((byte *)(p)) + sizeof *(p))

/* encode/decode */

#define DECODE(v,p) \
{ \
  uint8_t *_p = (uint8_t *)p; \
  uint32_t _v = *_p++; \
  switch (_v >> 4) { \
  case 0x08 : \
    if (_v == 0x8f) { \
      _v = *((uint32_t *)_p); \
      _p += sizeof(uint32_t); \
    } \
    break; \
  case 0x09 : \
    _v = (_v - 0x90) * 0x100 + *_p++; \
    _v = _v * 0x100 + *_p++; \
    _v = _v * 0x100 + *_p++ + 0x20408f; \
    break; \
  case 0x0a : \
  case 0x0b : \
    _v = (_v - 0xa0) * 0x100 + *_p++; \
    _v = _v * 0x100 + *_p++ + 0x408f; \
    break; \
  case 0x0c : \
  case 0x0d : \
  case 0x0e : \
  case 0x0f : \
    _v = (_v - 0xc0) * 0x100 + *_p++ + 0x8f; \
    break; \
  } \
  v = _v; \
  p = _p; \
}

#define SKIP(p) \
{ \
  uint8_t *_p = (uint8_t *)p; \
  uint32_t _v = *_p++; \
  switch (_v >> 4) { \
  case 0x08 : \
    if (_v == 0x8f) { \
      _p += sizeof(uint32_t); \
    } \
    break; \
  case 0x09 : \
    _p += 3; \
    break; \
  case 0x0a : \
  case 0x0b : \
    _p += 2; \
    break; \
  case 0x0c : \
  case 0x0d : \
  case 0x0e : \
  case 0x0f : \
    _p += 1; \
    break; \
  } \
  p = _p; \
}

typedef struct {
  uint16_t step;
  uint16_t jump;
} buffer_rec;

#define BUFFER_REC_AT(b,pos) ((buffer_rec *)(b) + (pos))

/* copied from inv.c */

sen_index *i;

struct timeval tv0;

inline static void
t0(void)
{
  gettimeofday(&tv0, NULL);
}

inline static void
ts(const char *msg)
{
  struct timeval tv;
  long long int t1;
  gettimeofday(&tv, NULL);
  t1 = (tv.tv_sec - tv0.tv_sec) * CLOCKS_PER_SEC + (tv.tv_usec - tv0.tv_usec);
  printf("%08lld %s\n", t1, msg);
}

void
sel_direct(const char *key)
{
  sen_id tid;
  sen_inv_cursor *c;
  int bdf = 0, cdf = 0, size = 0, posts = 0;
  t0();
  tid = sen_sym_at(i->lexicon, (unsigned char *)key);
  ts("sym_at");
  printf("tid=%d\n", tid);
  c = sen_inv_cursor_open(i->inv, tid, 1);
  ts("cursor open");
  if (!c) { return; }
  while (c->nextb) {
    uint32_t tf, lrid = c->pb.rid, lsid = c->pb.sid; /* for check */
    buffer_rec *br = BUFFER_REC_AT(c->buf, c->nextb);
    c->bp = NEXT_ADDR(br);
    DECODE(c->pb.rid, c->bp);
    DECODE(c->pb.sid, c->bp);
    if (lrid > c->pb.rid || (lrid == c->pb.rid && lsid >= c->pb.sid)) {
      sen_log("brokend!! (%d:%d) -> (%d:%d)", lrid, lsid, c->pb.rid, c->pb.sid);
    }
    c->nextb = br->step;
    DECODE(tf, c->bp);
    if (tf & 1) { DECODE(c->pb.score, c->bp); } else { c->pb.score = 0; }
    c->pb.rest = c->pb.tf = tf >> 1;
    c->pb.pos = 0;
    {
      uint32_t gap;
      while (c->pb.rest) {
	c->pb.rest--;
	DECODE(gap, c->bp);
	c->pb.pos += gap;
      }
    }
    bdf++;
  }
  printf("buffer df=%d\n", bdf);
  ts("buffer scan");
  size = c->cpe - c->cp;
  while (c->cp < c->cpe) {
    uint32_t tf, gap;
    DECODE(gap, c->cp);
    c->pc.rid += gap;
    if (gap) { c->pc.sid = 0; }
    DECODE(gap, c->cp);
    c->pc.sid += gap;
    DECODE(tf, c->cp);
    if (tf & 1) { DECODE(c->pc.score, c->cp); } else { c->pc.score = 0; }
    c->pc.rest = c->pc.tf = tf >> 1;
    c->pc.pos = 0;
    // while (c->cp < c->cpe && c->pc.rest--) { SKIP(c->cp); }
    posts += c->pc.tf;
    while (c->cp < c->cpe && c->pc.rest--) {
      DECODE(gap, c->cp);
      c->pc.pos += gap;
    }
    cdf++;
  }
  printf("chunk size=%d df=%d posts=%d\n", size, cdf, posts);
  ts("chunk scan");
  sen_inv_cursor_close(c);
  ts("cursor close");
}

typedef struct {
  uint32_t rid;
  uint32_t sid;
  int *posts;
//uint8_t *rest;
} res_entry;

void
sel_direct_merge(const char *key)
{
  sen_id tid;
  sen_inv_cursor *c;
  res_entry *res, *rp;
  uint32_t pos;
  int df = 0, size = 0;
  int *posts, *pp;
  t0();
  tid = sen_sym_at(i->lexicon, (unsigned char *)key);
  ts("sym_at");
  printf("tid=%d\n", tid);
  c = sen_inv_cursor_open(i->inv, tid, 1);
  size = sen_inv_estimate_size(i->inv, tid);
  ts("cursor open");
  if (!c) { return; }
  printf("size=%d\n", (size + 0x40000) * sizeof(res_entry));
  res = malloc((size + 0x40000) * sizeof(res_entry)); //SEN_INV_SEGMENT_SIZE

  posts = pp = (int *)(((uintptr_t) res) + (size + 0x40000) * sizeof(res_entry) - sizeof(int));
  while (c->nextb) {
    uint32_t tf, gap;
    buffer_rec *br = BUFFER_REC_AT(c->buf, c->nextb);
    c->bp = NEXT_ADDR(br);
    DECODE(c->pb.rid, c->bp);
    DECODE(c->pb.sid, c->bp);

    while (c->cp < c->cpe && (c->pc.rid <  c->pb.rid ||
			     (c->pc.rid == c->pb.rid && c->pc.sid <= c->pb.sid))) {
      DECODE(gap, c->cp);
      c->pc.rid += gap;
      if (gap) { c->pc.sid = 0; }
      DECODE(gap, c->cp);
      c->pc.sid += gap;

      if (c->pc.rid != c->pb.rid || c->pc.sid != c->pb.sid) {
	rp = &res[df++];
	rp->rid = c->pc.rid;
	rp->sid = c->pc.sid;
	rp->posts = pp;
      }

      DECODE(tf, c->cp);
      if (tf & 1) { DECODE(c->pc.score, c->cp); } else { c->pc.score = 0; }
      *pp-- = c->pc.rest = c->pc.tf = tf >> 1;
      *pp-- = c->pc.score;
      pos = 0;
      // while (c->cp < c->cpe && c->pc.rest--) { SKIP(c->cp); }
      while (c->cp < c->cpe && c->pc.rest--) {
	DECODE(gap, c->cp);
	*pp-- = pos += gap;
      }
    }

    rp = &res[df++];
    rp->rid = c->pb.rid;
    rp->sid = c->pb.sid;
    rp->posts = pp;

    c->nextb = br->step;

    DECODE(tf, c->bp);
    if (tf & 1) { DECODE(c->pb.score, c->bp); } else { c->pb.score = 0; }
    *pp-- = c->pb.rest = c->pb.tf = tf >> 1;
    *pp-- = c->pb.score;
    c->pb.pos = 0;
    {
      while (c->pb.rest) {
	c->pb.rest--;
	DECODE(gap, c->bp);
	*pp-- = c->pb.pos += gap;
      }
    }
  }
  printf("df=%d posts=%d\n", df, posts - pp);
  ts("buffer scan");
  sen_inv_cursor_close(c);
  ts("cursor close");
}

typedef struct {
  sen_id rid;
  uint32_t sid;
  uint32_t pos;
} posinfo;

typedef struct {
  int score;
  int n_subrecs;
  byte subrecs[1];
} recinfo;

#define DELETE_FLAG 1

#define SCORE_SIZE (sizeof(int))

#define B31    0x80000000
#define B30_00 0x7fffffff
#define BIT30_00(x) (x & B30_00)

#define SUBRECS_CMP(a,b,dir) (((a) - (b))*(dir) > 0)
#define SUBRECS_NTH(subrecs,size,n) ((int *)(subrecs + n * (size + SCORE_SIZE)))
#define SUBRECS_COPY(subrecs,size,n,src) \
  (memcpy(subrecs + n * (size + SCORE_SIZE), src, size + SCORE_SIZE))

inline static void
subrecs_push(byte *subrecs, int size, int n_subrecs, int score, void *body, int dir)
{
  byte *v;
  int *c2;
  int n = n_subrecs - 1, n2;
  while (n) {
    n2 = (n - 1) >> 1;
    c2 = SUBRECS_NTH(subrecs,size,n2);
    if (SUBRECS_CMP(score, *c2, dir)) { break; }
    SUBRECS_COPY(subrecs,size,n,c2);
    n = n2;
  }
  v = subrecs + n * (size + SCORE_SIZE);
  *((int *)v) = score;
  memcpy(v + SCORE_SIZE, body, size);
}

inline static void
subrecs_replace_min(byte *subrecs, int size, int n_subrecs, int score, void *body, int dir)
{
  byte *v;
  int n = 0, n1, n2, *c1, *c2;
  for (;;) {
    n1 = n * 2 + 1;
    n2 = n1 + 1;
    c1 = n1 < n_subrecs ? SUBRECS_NTH(subrecs,size,n1) : NULL;
    c2 = n2 < n_subrecs ? SUBRECS_NTH(subrecs,size,n2) : NULL;
    if (c1 && SUBRECS_CMP(score, *c1, dir)) {
      if (c2 && SUBRECS_CMP(score, *c2, dir) && SUBRECS_CMP(*c1, *c2, dir)) {
        SUBRECS_COPY(subrecs,size,n,c2);
        n = n2;
      } else {
        SUBRECS_COPY(subrecs,size,n,c1);
        n = n1;
      }
    } else {
      if (c2 && SUBRECS_CMP(score, *c2, dir)) {
        SUBRECS_COPY(subrecs,size,n,c2);
        n = n2;
      } else {
        break;
      }
    }
  }
  v = subrecs + n * (size + SCORE_SIZE);
  memcpy(v, &score, SCORE_SIZE);
  memcpy(v + SCORE_SIZE, body, size);
}

inline static void
res_add(sen_records *r, posinfo *pi, uint32_t score, sen_sel_operator op)
{
  recinfo *ri;
  sen_set_eh *eh = NULL;
  if (r->ignore_deleted_records &&
      sen_sym_pocket_get(r->keys, pi->rid) == DELETE_FLAG) { return; }
  switch (op) {
  case sen_sel_or :
    eh = sen_set_get(r->records, pi, (void *)&ri);
    break;
  case sen_sel_and :
    if ((eh = sen_set_at(r->records, pi, (void *)&ri))) {
      ri->n_subrecs |= B31;
    }
    break;
  case sen_sel_but :
    if ((eh = sen_set_at(r->records, pi, (void *)&ri))) {
      sen_set_del(r->records, eh);
      eh = NULL;
    }
    break;
  case sen_sel_adjust :
    if ((eh = sen_set_at(r->records, pi, (void *)&ri))) {
      ri->score += score;
      eh = NULL;
    }
    break;
  }

  if (eh) {
    int limit = r->max_n_subrecs;
    ri->score += score;
    ri->n_subrecs += 1;
    if (limit) {
      int dir = 1;
      int ssize = r->subrec_size;
      int n_subrecs = BIT30_00(ri->n_subrecs);
      byte *ekey = ((byte *)pi) + r->record_size;
      if (limit < n_subrecs) {
        if (SUBRECS_CMP(score, *ri->subrecs, dir)) {
          subrecs_replace_min(ri->subrecs, ssize, limit, score, ekey, dir);
        }
      } else {
        subrecs_push(ri->subrecs, ssize, n_subrecs, score, ekey, dir);
      }
    }
  }
}

void
sel_direct_w_result(const char *key)
{
  sen_id tid;
  sen_records *r;
  sen_inv_cursor *c;
  int bdf = 0, cdf = 0, size = 0, posts = 0;
  t0();
  tid = sen_sym_at(i->lexicon, (unsigned char *)key);
  r = sen_records_open(sen_rec_document, sen_rec_none, 0);
  ts("sym_at");
  printf("tid=%d\n", tid);
  c = sen_inv_cursor_open(i->inv, tid, 1);
  ts("cursor open");
  if (!c) { return; }
  while (c->nextb) {
    uint32_t tf, lrid = c->pb.rid, lsid = c->pb.sid; /* for check */
    buffer_rec *br = BUFFER_REC_AT(c->buf, c->nextb);
    c->bp = NEXT_ADDR(br);
    DECODE(c->pb.rid, c->bp);
    DECODE(c->pb.sid, c->bp);
    if (lrid > c->pb.rid || (lrid == c->pb.rid && lsid >= c->pb.sid)) {
      sen_log("brokend!! (%d:%d) -> (%d:%d)", lrid, lsid, c->pb.rid, c->pb.sid);
    }
    c->nextb = br->step;
    DECODE(tf, c->bp);
    if (tf & 1) { DECODE(c->pb.score, c->bp); } else { c->pb.score = 0; }
    c->pb.rest = c->pb.tf = tf >> 1;
    c->pb.pos = 0;
    bdf++;
    res_add(r, (void *) &c->pb, c->pb.score + c->pb.tf, sen_sel_or);
  }
  printf("buffer df=%d\n", bdf);
  ts("buffer scan");
  size = c->cpe - c->cp;
  while (c->cp < c->cpe) {
    uint32_t tf, gap;
    DECODE(gap, c->cp);
    c->pc.rid += gap;
    if (gap) { c->pc.sid = 0; }
    DECODE(gap, c->cp);
    c->pc.sid += gap;
    DECODE(tf, c->cp);
    if (tf & 1) { DECODE(c->pc.score, c->cp); } else { c->pc.score = 0; }
    c->pc.rest = c->pc.tf = tf >> 1;
    c->pc.pos = 0;
    // while (c->cp < c->cpe && c->pc.rest--) { SKIP(c->cp); }
    posts += c->pc.tf;
    while (c->cp < c->cpe && c->pc.rest--) { DECODE(gap, c->cp); }
    cdf++;
    res_add(r, (void *) &c->pc, c->pc.score + c->pc.tf, sen_sel_or);
  }
  printf("chunk size=%d df=%d posts=%d\n", size, cdf, posts);
  ts("chunk scan");
  sen_inv_cursor_close(c);
  ts("cursor close");
  printf("nhits = %d\n", sen_records_nhits(r));
  sen_records_close(r);
}

void
sel_direct_w_hash(const char *key)
{
  sen_id tid;
  sen_set *r;
  sen_set_eh *eh;
  sen_inv_cursor *c;
  int *score;
  int bdf = 0, cdf = 0, size = 0, posts = 0;
  t0();
  tid = sen_sym_at(i->lexicon, (unsigned char *)key);
  r = sen_set_open(sizeof(sen_id), sizeof(int), 150000);
  ts("sym_at");
  printf("tid=%d\n", tid);
  c = sen_inv_cursor_open(i->inv, tid, 1);
  ts("cursor open");
  if (!c) { return; }
  while (c->nextb) {
    uint32_t tf, lrid = c->pb.rid, lsid = c->pb.sid; /* for check */
    buffer_rec *br = BUFFER_REC_AT(c->buf, c->nextb);
    c->bp = NEXT_ADDR(br);
    DECODE(c->pb.rid, c->bp);
    DECODE(c->pb.sid, c->bp);
    if (lrid > c->pb.rid || (lrid == c->pb.rid && lsid >= c->pb.sid)) {
      sen_log("brokend!! (%d:%d) -> (%d:%d)", lrid, lsid, c->pb.rid, c->pb.sid);
    }
    c->nextb = br->step;
    DECODE(tf, c->bp);
    if (tf & 1) { DECODE(c->pb.score, c->bp); } else { c->pb.score = 0; }
    c->pb.rest = c->pb.tf = tf >> 1;
    c->pb.pos = 0;
    bdf++;
    eh = sen_set_get(r, &c->pb, (void *)&score);
    *score = c->pb.score + c->pb.tf;
  }
  printf("buffer df=%d\n", bdf);
  ts("buffer scan");
  size = c->cpe - c->cp;
  while (c->cp < c->cpe) {
    uint32_t tf, gap;
    DECODE(gap, c->cp);
    c->pc.rid += gap;
    if (gap) { c->pc.sid = 0; }
    DECODE(gap, c->cp);
    c->pc.sid += gap;
    DECODE(tf, c->cp);
    if (tf & 1) { DECODE(c->pc.score, c->cp); } else { c->pc.score = 0; }
    c->pc.rest = c->pc.tf = tf >> 1;
    c->pc.pos = 0;
    // while (c->cp < c->cpe && c->pc.rest--) { SKIP(c->cp); }
    posts += c->pc.tf;
    while (c->cp < c->cpe && c->pc.rest--) { DECODE(gap, c->cp); }
    cdf++;
    eh = sen_set_get(r, &c->pc, (void *)&score);
    *score = c->pc.score + c->pc.tf;
  }
  printf("chunk size=%d df=%d posts=%d\n", size, cdf, posts);
  ts("chunk scan");
  sen_inv_cursor_close(c);
  ts("cursor close");
  printf("nhits = %d\n", r->n_entries);
  sen_set_close(r);
}

void
sel_sel(const char *key)
{
  sen_records *r;
  t0();
  if ((r = sen_index_sel(i, key, strlen(key)))) {
    ts("sel");
    printf("nhits = %d\n", sen_records_nhits(r));
    sen_records_close(r);
  }
  ts("cursor close");
}

void
sel_query(const char *key)
{
  sen_query *q;
  sen_records *r;
  t0();
  r = sen_records_open(sen_rec_document, sen_rec_none, 0);
  q = sen_query_open(key, strlen(key), sen_sel_and, 10, sen_enc_euc_jp);
  ts("query open");
  sen_query_exec(i, q, r, sen_sel_or);
  ts("query exec");
  sen_query_close(q);
  ts("cursor close");
  printf("%d hits\n", sen_records_nhits(r));
  sen_records_close(r);
}

int
main(int argc, char **argv)
{
  void (*func)(const char *);
  char key[0x1000];
  if (argc < 3) {
    fputs("usage: mtest <cursor|direct> indexname\n", stderr);
    return -1;
  }
  switch (argv[1][0]) {
  case 's' :
    func = sel_sel;
    break;
  case 'd' :
    func = sel_direct;
    break;
  case 'm' :
    func = sel_direct_merge;
    break;
  case 'r' :
    func = sel_direct_w_result;
    break;
  case 'h' :
    func = sel_direct_w_hash;
    break;
  case 'q' :
    func = sel_query;
    break;
  default :
    fputs("usage: mtest <cursor|direct> indexname\n", stderr);
    return -1;
  }
  t0();
  sen_init();
  ts("init");
  if (!(i = sen_index_open(argv[2]))) {
    fprintf(stderr, "index open failed (%s)\n", argv[1]);
    return -1;
  }
  ts("open");
  while (!feof(stdin) && fgets(key, 0x1000, stdin)) {
    key[strlen(key) - 1] = '\0';
    func(key);
  }
  return 0;
}
