/*
 * Copyright (c) 2007, to-do. All rights reserved.
 */
#include "util.h"
#include "var.h"
#include "_sym.h"
#include "_ext.h"

/************************************************************
 *
 ************************************************************/
#define REG_CHR   0
#define REG_SET   1
#define REG_ARR   2

/* ^ */
#define REG_SO    1
/* $ */
#define REG_EO    2
/* | */
#define REG_OR    4
/* () */
#define REG_SUB   8

/* ignore case */
#define REG_IC     (1 << ('i' - 'a'))
/* each lines (not `s`) */
#define REG_EL     (1 << ('e' - 'a'))
/* one time (not `g`) */
#define REG_OM     (1 << ('o' - 'a'))

/* min match */
#define REG_MM     (1 << ('m' - 'a'))

typedef struct reg_exec_t reg_exec_t;
typedef struct reg_comp_t reg_comp_t;
typedef struct reg_var_t  reg_var_t;
typedef struct reg_arr_t  reg_arr_t;
typedef struct reg_t      reg_t;

struct reg_comp_t {
	char *s;
	char *e;
	char *set;
	int f;
	FILE* err;
};

struct reg_exec_t {
	char *s;
	int n;
	int i;
	int m;
	int so[10];
	int eo[10];
	int f;
	FILE* err;
#ifdef REG_MM
	reg_var_t* vp;
	int vn;
	int vd;
#endif
};

struct reg_var_t {
#ifdef REG_MM
	int e;
#endif
	int t;
	int q;
	union {
		int c;
		char *s;
		reg_arr_t *a;
	} u;
};

struct reg_arr_t {
	union {
		reg_var_t* v;
		reg_arr_t *a;
	} u;
	int n;
	int q;
};

struct reg_t {
	void* a;
	int f;
};

/************************************************************
 *
 ************************************************************/
static void reg_var_trace(reg_var_t* v, FILE* fp);

static void reg_comp_trace(reg_comp_t* cp, FILE* fp)
{
	int n;
	n = cp->e - cp->s;
	if (n > 0) {
		if (n > 16) n = 16;
		fprintf(fp, "comp: %.*s\n", n, cp->s);
	}
}
static void reg_exec_trace(reg_exec_t* ex, FILE* fp)
{
	int n;
	n = ex->n - ex->i;
	if (n > 0) {
		if (n > 16) n = 16;
		fprintf(fp, "match: [%d] %.*s\n", ex->i, n, ex->s + ex->i);
	}
}
static void reg_arr_trace(reg_arr_t* a, FILE* fp)
{
	int i;
	fprintf(fp, "arr: [%d] %s%s%s%s\n", a->n,
		(a->q & REG_SO)  ? "^ " : "",
		(a->q & REG_EO)  ? "$ " : "",
		(a->q & REG_OR)  ? "| " : "",
		(a->q & REG_SUB) ? "()" : ""
	);
	if (a->q & REG_OR) {
		for (i = 0; i < a->n; i++) {
			reg_arr_trace(a->u.a + i, fp);
		}
	} else {
		for (i = 0; i < a->n; i++) {
			reg_var_trace(a->u.v + i, fp);
		}
	}
}
static void reg_var_trace(reg_var_t* v, FILE* fp)
{
	int i,c;
	fprintf(fp, "var: 0x%04x - 0x%04x\n", v->q >> 16, v->q & 0xffff);
	if (v->t == REG_CHR) {
		if (isprint(v->u.c)) {
			fprintf(fp, "chr: '%c'\n", v->u.c);
		} else {
			fprintf(fp, "chr: 0x%02x\n", v->u.c);
		}
	} else if (v->t == REG_SET) {
		for (c = i = 0; i < 256; i++) {
			c += v->u.s[i];
		}
		fprintf(fp, "set: %d\n", c);
	} else if (v->t == REG_ARR) {
		reg_arr_trace(v->u.a, fp);
	} else {
		fprintf(fp, "? (%d)\n", v->t);
	}
}
static void reg_trace(reg_t* reg, FILE* fp)
{
	char s[32];
	int i, c;
	i = 0;
	for (c = 'a'; c <= 'z'; c++) {
		if (reg->f & (1 << (c - 'a'))) {
			s[i++] = c;
		}
	}
	s[i] = 0;
	fprintf(fp, "reg: %s\n", s);
	if (reg->a) {
		reg_arr_trace(reg->a, fp);
	}
}

/************************************************************
 *
 ************************************************************/
static reg_arr_t* reg_arr_alloc()
{
	return calloc(1, sizeof(reg_arr_t));
}
static void reg_arr_addChr(reg_arr_t* a, int c)
{
	reg_var_t* v;
	a->u.v = realloc(a->u.v, sizeof(reg_var_t) * (a->n + 1));
	v = a->u.v + a->n++;
	memset(v, 0, sizeof(reg_var_t));
	v->q = 0x10001;
	v->t = REG_CHR;
	v->u.c = c;
}
static void reg_arr_addSet(reg_arr_t* a, char *s)
{
	reg_var_t* v;
	a->u.v = realloc(a->u.v, sizeof(reg_var_t) * (a->n + 1));
	v = a->u.v + a->n++;
	memset(v, 0, sizeof(reg_var_t));
	v->q = 0x10001;
	v->t = REG_SET;
	v->u.s = malloc(256);
	memcpy(v->u.s, s, 256);
}
static void reg_arr_addArr(reg_arr_t* a, reg_arr_t* a2)
{
	reg_var_t* v;
	a->u.v = realloc(a->u.v, sizeof(reg_var_t) * (a->n + 1));
	v = a->u.v + a->n++;
	memset(v, 0, sizeof(reg_var_t));
	v->q = 0x10001;
	v->t = REG_ARR;
	v->u.a = malloc(sizeof(reg_arr_t));
	memcpy(v->u.a, a2, sizeof(reg_arr_t));
}
static reg_arr_t* reg_arr_addOr(reg_arr_t* a, reg_arr_t* a2)
{
	reg_arr_t* a3;
	a->u.a = realloc(a->u.a, sizeof(reg_arr_t) * (a->n + 1));
	a3 = a->u.a + a->n++;
	memcpy(a3, a2, sizeof(reg_arr_t));
	return a3;
}

/************************************************************
 *
 ************************************************************/
static void reg_arr_clear(reg_arr_t* a);

static void reg_var_clear(reg_var_t* v)
{
	if (v->t == REG_SET) {
		free(v->u.s);
	} else if (v->t == REG_ARR) {
		reg_arr_clear(v->u.a);
		free(v->u.a);
	}
}
static void reg_arr_clear(reg_arr_t* a)
{
	int i;
	if (a->q & REG_OR) {
		if (a->u.a) {
			for (i = 0; i < a->n; i++) {
				reg_arr_clear(a->u.a + i);
			}
			free(a->u.a);
			a->u.a = NULL;
		}
	} else {
		if (a->u.v) {
			for (i = 0; i < a->n; i++) {
				reg_var_clear(a->u.v + i);
			}
			free(a->u.v);
			a->u.v = NULL;
		}
	}
	a->n = 0;
	a->q = 0;
}
static void reg_arr_free(reg_arr_t* a)
{
	reg_arr_clear(a);
	free(a);
}
static void reg_free(reg_t* reg)
{
	if (reg->a) {
		reg_arr_free(reg->a);
	}
	memset(reg, 0, sizeof(reg_t));
}

/************************************************************
 *
 ************************************************************/
static int reg_comp_arr(reg_arr_t* top, reg_comp_t* cp)
{
	reg_arr_t *arr, ar2;
	char *s, *e;
	int c, k, v;
	if (!cp->s || !(cp->s < cp->e)) {
		return 0;
	}
	arr = top;
	k = v = 0;
	s = cp->s;
	e = cp->e;
	while ((s < e) && ((c = *s) != 0)) {
		cp->s = s++;
		if (k) {
			if (c == ']') {
				exerrif(cp, k >= 0, "bad set end");
				reg_arr_addSet(arr, cp->set);
				k = 0;
				continue;
			}
		}
		else if (c == ')') {
			exerrif(cp, !(top->q & REG_SUB), "bad sub end");
			cp->s = s;
			return 1;
		}
		else if (c == '|') {
			if (!(top->q & REG_OR)) {
				memcpy(&ar2, top, sizeof(reg_arr_t));
				memset(top, 0, sizeof(reg_arr_t));
				reg_arr_addOr(top, &ar2);
				top->q |= REG_OR;
			}
			memset(&ar2, 0, sizeof(reg_arr_t));
			arr = reg_arr_addOr(top, &ar2);
			continue;
		}
		else if (c == '$') {
			exerrif(cp, arr->q & REG_EO, "bad bot");
			arr->q |= REG_EO;
			continue;
		}
		else if (arr->q & REG_EO) {
			exerrif(cp, arr->q & REG_EO, "bad char after bot");
		}
		else if (c == '^') {
			exerrif(cp, arr->n || (arr->q & REG_SO), "bad sot");
			arr->q |= REG_SO;
			continue;
		}
		else if (c == '(') {
			exerrif(cp, s >= e, "bad sub");
			memset(&ar2, 0, sizeof(reg_arr_t));
			if (*s == '?') {
				s++; /* escape sub */
			} else {
				ar2.q |= REG_SUB;
			}
			cp->s = s;
			if (!reg_comp_arr(&ar2, cp)) {
				reg_arr_clear(&ar2);
				return 0;
			}
			reg_arr_addArr(arr, &ar2);
			s = cp->s;
			continue;
		}
		else if (c == '{') {
			exerrif(cp, !arr->n || (s >= e), "bad qtz");
			k = v = 0;
			while ((s < e) && isdigit(*s)) {
				k = (k * 10) + (*s++ - '0');
			}
			v = k;
			if (*s == ',') {
				v = 0;
				if (isdigit(*s)) {
					while ((s < e) && isdigit(*s)) {
						v = (v * 10) + (*s++ - '0');
					}
				}
			}
			exerrif(cp, (s >= e) || (*s++ != '}'), "bad qtz end");
			exerrif(cp, (k > 0x7fff) || (v > 0xffff), "bad qtz range");
			(arr->u.v + (arr->n - 1))->q = (k << 16) | v;
			k = v = 0;
			continue;
		}
		else if (strchr("*?+", c)) {
			exerrif(cp, !arr->n, "bad qtz char");
			(arr->u.v + (arr->n - 1))->q
				= ((c == '+') << 16) | (c == '?');
			k = v = 0;
			continue;
		}
		else if (c == '.') {
			memset(cp->set, 1, 256);
			if (cp->f & REG_EL) { /* each line */
				cp->set[0xa] = 0;
				cp->set[0xd] = 0;
			}
			reg_arr_addSet(arr, cp->set);
			continue;
		}
		else if (c == '[') {
			k = -1;
			v = 1;
			if ((s < e) && (*s == '^')) {
				s++;
				v = 0;
			}
			memset(cp->set, !v, 256);
			continue;
		}
		if ((c == '\\') && (s < e) && *s) {
			if (strchr("swd", *s)) {
				c = *s++;
				if (!k) {
					memset(cp->set, 0, 256);
					v = 1;
				} else {
					exerrif(cp, k > 0, "bad set range");
				}
				if (c == 's') {
					for (c = 0x7; c<=0xd; c++) cp->set[c]=v;
					cp->set[' ']=v;
				} else if (c == 'w') {
					for (c = '0'; c<='9'; c++) cp->set[c]=v;
					for (c = 'a'; c<='z'; c++) cp->set[c]=v;
					for (c = 'A'; c<='Z'; c++) cp->set[c]=v;
					cp->set['_']=v;
				} else if (c == 'd') {
					for (c = '0'; c<='9'; c++) cp->set[c]=v;
				}
				if (!k) {
					reg_arr_addSet(arr, cp->set);
				}
				continue;
			} else {
				c = meta_to_c(&s, 0);
			}
		}
		if (cp->f & REG_IC) {
			c = tolower(c);
		}
		if (!k) {
			reg_arr_addChr(arr, c);
		} else if (k < 0) {
			if ((s < e) && (*s == '-')) {
				s++;
				k = c ? c : 1;
			} else {
				cp->set[c]=v;
			}
		} else {
			exerrif(cp, c < k, "bad set range");
			while (k <= c) {
				cp->set[k++]=v;
			}
			k = -1;
		}
	}
	exerrif(cp, k != 0, "bad set end");
	exerrif(cp, top->q & REG_SUB, "bad sub end");
	cp->s = s;
	return 1;
}
static int reg_comp(reg_t* reg, char *s, int n, int f, FILE* fp)
{
	reg_comp_t cp;
	char set[256];
	
	memset(reg, 0, sizeof(reg_t));
	
	if (!s || (n <= 0)) {
		/* null reg match with null string */
		return 0;
	}
	
	memset(&cp, 0, sizeof(reg_comp_t));
	
	cp.set = set;
	cp.s = s;
	cp.e = s + n;
	cp.f = f;
	cp.err = fp;
	
	reg->f = f;
	reg->a = reg_arr_alloc();
	
	if (!reg_comp_arr(reg->a, &cp)) {
		if (fp) {
			reg_comp_trace(&cp, fp);
		}
		reg_arr_free(reg->a);
		reg->a = NULL;
		return 0;
	}
	
	return 1;
}

/************************************************************
 *
 ************************************************************/
static int reg_exec_c(int c, reg_exec_t* ex);
static int reg_exec_set(char *set, reg_exec_t* ex);
static int reg_exec_or(reg_arr_t* arr, reg_exec_t* ex);
static int reg_exec_and(reg_arr_t* arr, reg_exec_t* ex);
static int reg_exec_arr(reg_arr_t* a, reg_exec_t* ex);
static int reg_exec_vars(reg_var_t* v, int n, reg_exec_t* ex);

static int reg_exec_c(int c, reg_exec_t* ex)
{
	if (!((ex->i >= 0) && (ex->i < ex->n))) {
		return 0;
	} else if (ex->f & REG_IC) {
		if (tolower(ex->s[ex->i]) != c) return 0;
	} else {
		if (ex->s[ex->i] != c) return 0;
	}
	ex->i++;
	return 1;
}
static int reg_exec_set(char *set, reg_exec_t* ex)
{
	if (!((ex->i >= 0) && (ex->i < ex->n))) {
		return 0;
	} else if (ex->f & REG_IC) {
		if (!set[tolower(ex->s[ex->i])]) return 0;
	} else {
		if (!set[(int)ex->s[ex->i]]) return 0;
	}
	ex->i++;
	return 1;
}

#ifdef REG_MM
static int reg_exec_next(reg_var_t* xp, int vn, reg_exec_t* ex)
{
	reg_var_t *up, *vp;
	int un, i, m, e;
	i = ex->i;
	m = ex->m;
	up = ex->vp;
	un = ex->vn;
	if (--vn > 0) {
		vp = xp + 1;
	} else {
		vp = up;
		vn = un;
		ex->vp = NULL;
		ex->vn = 0;
	}
	if (!vp || !vn) {
		if (ex->vd <= 0) { // top vars
			e = ex->n;
		} else {
			e = 0;
		}
	} else {
		e = i;
		while (e < ex->n) {
			ex->i = e;
			if (reg_exec_vars(vp, vn, ex)) {
				break;
			}
			e++;
		}
		if (e >= ex->n) {
			e = 0;
		} else {
			e++;
		}
	}
	ex->i = i;
	ex->m = m;
	ex->vp = up;
	ex->vn = un;
	xp->e = e;
	return (e > 0);
}
#endif

static int reg_exec_child(reg_var_t* vp, int vn, reg_exec_t* ex)
{
	if (vp->t == REG_CHR) {
		return reg_exec_c(vp->u.c, ex);
	} else if (vp->t == REG_SET) {
		return reg_exec_set(vp->u.s, ex);
	}
#ifdef REG_MM
	else if (ex->f & REG_MM) {
		reg_var_t *up;
		int un, r;
		if (--vn > 0) {
			up = ex->vp;
			un = ex->vn;
			ex->vp = vp + 1;
			ex->vn = vn;
			ex->vd++;
		}
		r = reg_exec_arr(vp->u.a, ex);
		if (vn > 0) {
			ex->vp = up;
			ex->vn = un;
			ex->vd--;
		}
		return r;
	}
#endif
	return reg_exec_arr(vp->u.a, ex);
}

static int reg_exec_vars(reg_var_t* vp, int vn, reg_exec_t* ex)
{
	int len, min, max;
	for (; vn > 0; vn--, vp++) {
		assert((vp->t >= REG_CHR) && (vp->t <= REG_ARR));
		min = vp->q >> 16;
		max = vp->q & 0xffff;
		len = 0;
		while ((ex->i >= 0) && (ex->i < ex->n)) {
			if (max > 0) {
				if (len >= max) {
					break;
				}
			}
#ifdef REG_MM
			else if ((ex->f & REG_MM) && (len >= min)) {
				if (vp->e <= 0) {
					if (!reg_exec_next(vp, vn, ex)) {
						break;
					}
				}
				if ((ex->i + 1) >= vp->e) {
					break;
				}
			}
#endif
			if (!reg_exec_child(vp, vn, ex)) {
				break;
			}
			len++;
		}
		if (len < min) {
			break;
		}
	}
	return (vn <= 0);
}

static int reg_exec_and(reg_arr_t* arr, reg_exec_t* ex)
{
	if ((arr->q & REG_SO) && (ex->i > 0)) {
		/* if `^` and not first position  */
		if (!(ex->f & REG_EL) || !iscrlf(ex->s[ex->i - 1])) {
			/* if searching as a buffer,
			 or searching lines but prev postion is not crlf */
			return 0;
		}
	}
	if (!reg_exec_vars(arr->u.v, arr->n, ex)) {
		return 0;
	}
	if ((arr->q & REG_EO) && (ex->i < ex->n)) {
		/* if `$` and not end position */
		if (!(ex->f & REG_EL) || !iscrlf(ex->s[ex->i])) {
			/* if searching as a buffer,
			 or searching lines but next position is not crlf */
			return 0;
		}
	}
	return 1;
}
static int reg_exec_or(reg_arr_t* arr, reg_exec_t* ex)
{
	int i = 0;
	while (i < arr->n) {
		if (reg_exec_arr(arr->u.a + i, ex)) {
			return 1;
		}
		i++;
	}
	return 0;
}
static int reg_exec_arr(reg_arr_t* arr, reg_exec_t* ex)
{
	int i,m;
	i = ex->i;
	if (arr->q & REG_SUB) {
		m = ex->m++;
	} else {
		m = -1;
	}
	if (arr->q & REG_OR) {
		if (!reg_exec_or(arr, ex)) {
			ex->i = i + 1;
			if (m >= 0) {
				ex->m = m;
			}
			return 0;
		}
	} else {
		if (!reg_exec_and(arr, ex)) {
			ex->i = i + 1;
			if (m >= 0) {
				ex->m = m;
			}
			return 0;
		}
	}
	if ((m >= 0) && (m < 10)) {
		ex->so[m] = i;
		ex->eo[m] = ex->i;
	}
	return 1;
}
static int reg_exec_top(reg_arr_t* arr, reg_exec_t* ex)
{
	int i = ex->i;
	arr->q |= REG_SUB;
	if (!(arr->q & REG_SO) && (arr->q & REG_EO)) {
		/* if req `$` and not `^` */
		/* reverse search */
		if (ex->f & REG_EL) {
			/* search lines */
			if (i > 0) {
				/* if not first, skip crlf */
				while ((i < ex->n) && iscrlf(ex->s[i])) i++;
			}
			/* seek crlf (this line's end) */
			while ((i < ex->n) && !iscrlf(ex->s[i])) i++;
			/* set position to prev of the crlf */
			if (i > 0) i--;
		} else {
			if (i > 0) return 0;
			i = ex->n - 1;
		}
		while (i >= 0) {
			ex->i = i;
			if (reg_exec_arr(arr, ex)) {
				return 1;
			}
			i--;
		}
		return 0;
	}
	if ((arr->q & REG_SO) && (i > 0)) {
		/* if req `^` and not not first */
		if (!(ex->f & REG_EL)) return 0;
		/* if searching lines, seek next line */
		if (!iscrlf(ex->s[i - 1])) {
			/* prev search may not reached to crlf so, seek next crlf  */
			while ((i < ex->n) && !iscrlf(ex->s[i])) i++;
		}
		/* skip appeared (prev line's) crlf */
		while ((i < ex->n) && iscrlf(ex->s[i])) i++;
	}
	while (i < ex->n) {
		ex->i = i;
		if (reg_exec_arr(arr, ex)) {
			return 1;
		}
		if (arr->q & REG_SO) {
			return 0;
		}
		i++;
	}
	return 0;
}
static int reg_exec(reg_t* reg, char *s, int n, int i, int so[10], int eo[10], FILE* fp)
{
	reg_exec_t ex;
	
	if (!s) {
		s = "";
		n = 0;
	}
	if (!reg->a) {
		/* pattern is null */
		if (!n && !i) {
			so[0] = eo[0] = 0;
			/* match with null once */
			return 1;
		}
		return 0;
	}
	if ((i != 0) && (reg->f & REG_OM)) {
		return 0;
	}
	
	memset(&ex, 0, sizeof(reg_exec_t));
	ex.s = s;
	ex.n = n;
	ex.i = i;
	ex.m = 0;
	ex.f = reg->f;
	ex.err = fp;
	
	if (!reg_exec_top(reg->a, &ex)) {
		if (fp) {
			reg_exec_trace(&ex, fp);
		}
		return 0;
	}
	memcpy(so, ex.so, 10 * sizeof(int));
	memcpy(eo, ex.eo, 10 * sizeof(int));
	
	assert(ex.m > 0);
	
	return ex.m;
}

/************************************************************
 *
 ************************************************************/
int regex_comp(exec_t *ex, void *p, char *s, int n, int f)
{
	return reg_comp(p, s, n, f, ex->err);
}
int regex_exec(exec_t *ex, void *p, char *s, int n, int i, int so[10], int eo[10])
{
	return reg_exec(p, s, n, i, so, eo, NULL);
}
void regex_trace(exec_t* ex, void *p)
{
	reg_trace(p, ex->out);
}
void regex_free(exec_t* ex, void *p)
{
	free_link(ex, p);
}
void* regex_alloc(exec_t* ex)
{
	return alloc_link(ex, sizeof(reg_t), regex_proto);
}
void regex_clear(exec_t* ex, void* p)
{
	reg_free(p);
}
