/*
 * Copyright (c) 2007, to-do. All rights reserved.
 */
#include "util.h"
#include "var.h"
#include "_sym.h"
#include "_ext.h"

/************************************************************
 *
 ************************************************************/
#define REG_CHR   0
#define REG_SET   1
#define REG_ARR   2

#define REG_SO    1
#define REG_EO    2
#define REG_OR    4
#define REG_SUB   8

#define REG_IC     (1 << ('i' - 'a'))
#define REG_EL     (1 << ('e' - 'a'))
#define REG_OM     (1 << ('o' - 'a'))

typedef struct reg_exec_t reg_exec_t;
typedef struct reg_comp_t reg_comp_t;
typedef struct reg_var_t  reg_var_t;
typedef struct reg_arr_t  reg_arr_t;
typedef struct reg_t      reg_t;

struct reg_exec_t {
	char *s;
	int n;
	int i;
	int m;
	int so[10];
	int eo[10];
};

struct reg_comp_t {
	char *s;
	char *e;
	char *set;
};

struct reg_var_t {
	int t;
	int q;
	union {
		int c;
		char *s;
		reg_arr_t *a;
	} u;
};

struct reg_arr_t {
	union {
		reg_var_t* v;
		reg_arr_t *a;
	} u;
	int n;
	int q;
};

struct reg_t {
	void* a;
	int f;
};

/************************************************************
 *
 ************************************************************/
static void reg_var_trace(reg_var_t* v);

static void reg_exec_trace(reg_exec_t* rm)
{
	int n;
	n = rm->n - rm->i;
	if (n > 0) {
		if (n > 16) n = 16;
		trace("match: [%d] %.*s\n", rm->i, n, rm->s + rm->i);
	}
}
static void reg_comp_trace(reg_comp_t* cp)
{
	int n;
	n = cp->e - cp->s;
	if (n > 0) {
		if (n > 16) n = 16;
		trace("comp: %.*s\n", n, cp->s);
	}
}
static void reg_arr_trace(reg_arr_t* a)
{
	int i;
	trace("arr: [%d] %s%s%s%s\n", a->n,
		(a->q & REG_SO)  ? "^ " : "",
		(a->q & REG_EO)  ? "$ " : "",
		(a->q & REG_OR)  ? "| " : "",
		(a->q & REG_SUB) ? "()" : ""
	);
	if (a->q & REG_OR) {
		for (i = 0; i < a->n; i++) {
			reg_arr_trace(a->u.a + i);
		}
	} else {
		for (i = 0; i < a->n; i++) {
			reg_var_trace(a->u.v + i);
		}
	}
}
static void reg_var_trace(reg_var_t* v)
{
	int i,c;
	trace("var: 0x%04x - 0x%04x\n", v->q >> 16, v->q & 0xFFFF);
	if (v->t == REG_CHR) {
		if (isprint(v->u.c)) {
			trace("chr: '%c'\n", v->u.c);
		} else {
			trace("chr: 0x%02x\n", v->u.c);
		}
	} else if (v->t == REG_SET) {
		for (c = i = 0; i < 256; i++) {
			c += v->u.s[i];
		}
		trace("set: %d\n", c);
	} else if (v->t == REG_ARR) {
		reg_arr_trace(v->u.a);
	} else {
		trace("? (%d)\n", v->t);
	}
}
static void reg_trace(reg_t* reg)
{
	char s[32];
	int i, c;
	i = 0;
	for (c = 'a'; c <= 'z'; c++) {
		if (reg->f & (1 << (c - 'a'))) {
			s[i++] = c;
		}
	}
	s[i] = 0;
	trace("reg: %s\n", s);
	if (reg->a) {
		reg_arr_trace(reg->a);
	}
}

/************************************************************
 *
 ************************************************************/
static reg_arr_t* reg_arr_alloc()
{
	return calloc(1, sizeof(reg_arr_t));
}
static void reg_arr_addChr(reg_arr_t* a, int c)
{
	reg_var_t* v;
	a->u.v = realloc(a->u.v, sizeof(reg_var_t) * (a->n + 1));
	v = a->u.v + a->n++;
	v->q = 0x10001;
	v->t = REG_CHR;
	v->u.c = c;
}
static void reg_arr_addSet(reg_arr_t* a, char *s)
{
	reg_var_t* v;
	a->u.v = realloc(a->u.v, sizeof(reg_var_t) * (a->n + 1));
	v = a->u.v + a->n++;
	v->q = 0x10001;
	v->t = REG_SET;
	v->u.s = malloc(256);
	memcpy(v->u.s, s, 256);
}
static void reg_arr_addArr(reg_arr_t* a, reg_arr_t* a2)
{
	reg_var_t* v;
	a->u.v = realloc(a->u.v, sizeof(reg_var_t) * (a->n + 1));
	v = a->u.v + a->n++;
	v->q = 0x10001;
	v->t = REG_ARR;
	v->u.a = malloc(sizeof(reg_arr_t));
	memcpy(v->u.a, a2, sizeof(reg_arr_t));
}
static reg_arr_t* reg_arr_addOr(reg_arr_t* a, reg_arr_t* a2)
{
	reg_arr_t* a3;
	a->u.a = realloc(a->u.a, sizeof(reg_arr_t) * (a->n + 1));
	a3 = a->u.a + a->n++;
	memcpy(a3, a2, sizeof(reg_arr_t));
	return a3;
}

/************************************************************
 *
 ************************************************************/
static void reg_arr_clear(reg_arr_t* a);

static void reg_var_clear(reg_var_t* v)
{
	if (v->t == REG_SET) {
		free(v->u.s);
	} else if (v->t == REG_ARR) {
		reg_arr_clear(v->u.a);
		free(v->u.a);
	}
}
static void reg_arr_clear(reg_arr_t* a)
{
	int i;
	if (a->q & REG_OR) {
		if (a->u.a) {
			for (i = 0; i < a->n; i++) {
				reg_arr_clear(a->u.a + i);
			}
			free(a->u.a);
			a->u.a = NULL;
		}
	} else {
		if (a->u.v) {
			for (i = 0; i < a->n; i++) {
				reg_var_clear(a->u.v + i);
			}
			free(a->u.v);
			a->u.v = NULL;
		}
	}
	a->n = 0;
	a->q = 0;
}
static void reg_arr_free(reg_arr_t* a)
{
	reg_arr_clear(a);
	free(a);
}
static void reg_free(reg_t* reg)
{
	if (reg->a) {
		reg_arr_free(reg->a);
	}
	memset(reg, 0, sizeof(reg_t));
}

/************************************************************
 *
 ************************************************************/
static int reg_comp_arr(reg_arr_t* top, reg_comp_t* cp, int ref)
{
	reg_arr_t *arr, ar2;
	char *s, *e;
	int c, k, v;
	if (!cp->s || !(cp->s < cp->e)) {
		return 0;
	}
	arr = top;
	k = v = 0;
	s = cp->s;
	e = cp->e;
	while ((s < e) && ((c = *s) != 0)) {
		cp->s = s++;
		if (k) {
			if (c == ']') {
				warnif(k >= 0, "bad set end");
				reg_arr_addSet(arr, cp->set);
				k = 0;
				continue;
			}
		}
		else if (c == ')') {
			warnif(!(top->q & REG_SUB), "bad sub end");
			cp->s = s;
			return 1;
		}
		else if (c == '|') {
			if (!(top->q & REG_OR)) {
				memcpy(&ar2, top, sizeof(reg_arr_t));
				memset(top, 0, sizeof(reg_arr_t));
				reg_arr_addOr(top, &ar2);
				top->q |= REG_OR;
			}
			memset(&ar2, 0, sizeof(reg_arr_t));
			arr = reg_arr_addOr(top, &ar2);
			continue;
		}
		else if (c == '$') {
			warnif(arr->q & REG_EO, "bad bot");
			arr->q |= REG_EO;
			continue;
		}
		else if (arr->q & REG_EO) {
			warnif(arr->q & REG_EO, "bad char after bot");
		}
		else if (c == '^') {
			warnif(arr->n || (arr->q & REG_SO), "bad sot");
			arr->q |= REG_SO;
			continue;
		}
		else if (c == '(') {
			warnif(s >= e, "bad sub");
			memset(&ar2, 0, sizeof(reg_arr_t));
			if (*s == '?') {
				s++; /* escape sub */
			} else {
				ar2.q |= REG_SUB;
			}
			cp->s = s;
			if (!reg_comp_arr(&ar2, cp, ref)) {
				reg_arr_clear(&ar2);
				return 0;
			}
			reg_arr_addArr(arr, &ar2);
			s = cp->s;
			continue;
		}
		else if (c == '{') {
			warnif(!arr->n || (s >= e), "bad qtz");
			k = v = 0;
			while ((s < e) && isdigit(*s)) {
				k = (k * 10) + (*s++ - '0');
			}
			if (*s != ',') {
				v = k;
			} else if (*(++s) == '}') {
				v = 0xffff;
			} else {
				while ((s < e) && isdigit(*s)) {
					v = (v * 10) + (*s++ - '0');
				}
			}
			warnif((s >= e) || (*s++ != '}'), "bad qtz end");
			warnif((k > 0x7fff) || (v > 0xffff) || (k > v), "bad qtz range");
			(arr->u.v + (arr->n - 1))->q = (k << 16) | v;
			k = 0;
			continue;
		}
		else if (strchr("*?+", c)) {
			warnif(!arr->n, "bad qtz char");
			k = (c == '+') ? 1 : 0;
			v = (c == '?') ? 1 : 0xffff;
			(arr->u.v + (arr->n - 1))->q = (k << 16) | v;
			k = 0;
			continue;
		}
		else if (c == '.') {
			memset(cp->set, 1, 256);
			if (ref & REG_EL) { /* each line */
				cp->set[0xa] = 0;
				cp->set[0xd] = 0;
			}
			reg_arr_addSet(arr, cp->set);
			continue;
		}
		else if (c == '[') {
			k = -1;
			v = 1;
			if ((s < e) && (*s == '^')) {
				s++;
				v = 0;
			}
			memset(cp->set, !v, 256);
			continue;
		}
		if ((c == '\\') && (s < e) && *s) {
			if (strchr("swd", *s)) {
				c = *s++;
				if (!k) {
					memset(cp->set, 0, 256);
					v = 1;
				} else {
					warnif(k > 0, "bad set range");
				}
				if (c == 's') {
					for (c = 0x7; c<=0xd; c++) cp->set[c]=v;
					cp->set[' ']=v;
				} else if (c == 'w') {
					for (c = '0'; c<='9'; c++) cp->set[c]=v;
					for (c = 'a'; c<='z'; c++) cp->set[c]=v;
					for (c = 'A'; c<='Z'; c++) cp->set[c]=v;
					cp->set['_']=v;
				} else if (c == 'd') {
					for (c = '0'; c<='9'; c++) cp->set[c]=v;
				}
				if (!k) {
					reg_arr_addSet(arr, cp->set);
				}
				continue;
			} else {
				c = unmetac(&s, 0);
			}
		}
		if (ref & REG_IC) {
			c = tolower(c);
		}
		if (!k) {
			reg_arr_addChr(arr, c);
		} else if (k < 0) {
			if ((s < e) && (*s == '-')) {
				s++;
				k = c ? c : 1;
			} else {
				cp->set[c]=v;
			}
		} else {
			warnif(c < k, "bad set range");
			while (k <= c) {
				cp->set[k++]=v;
			}
			k = -1;
		}
	}
	warnif(k != 0, "bad set end");
	warnif(top->q & REG_SUB, "bad sub end");
	cp->s = s;
	return 1;
}
static int reg_comp(reg_t* reg, char *s, int n, int f)
{
	reg_comp_t cp;
	char set[256];
	
	memset(reg, 0, sizeof(reg_t));
	
	if (!s || (n <= 0)) {
		/* null reg match with null string */
		return 0;
	}
	
	memset(&cp, 0, sizeof(reg_comp_t));
	
	cp.set = set;
	cp.s = s;
	cp.e = s + n;
	
	reg->f = f;
	reg->a = reg_arr_alloc();
	
	if (!reg_comp_arr(reg->a, &cp, f)) {
		if (0) {
			reg_comp_trace(&cp);
		}
		reg_arr_free(reg->a);
		reg->a = NULL;
		return 0;
	}
	
	return 1;
}

/************************************************************
 *
 ************************************************************/
static int reg_exec_var(reg_var_t* v, reg_exec_t* rm, int ref);
static int reg_exec_arr(reg_arr_t* a, reg_exec_t* rm, int ref);

static int reg_exec_c(int c, reg_exec_t* rm, int ref)
{
	if (!((rm->i >= 0) && (rm->i < rm->n))) {
		return 0;
	} else if (ref & REG_IC) {
		if (tolower(rm->s[rm->i]) != c) return 0;
	} else {
		if (rm->s[rm->i] != c) return 0;
	}
	rm->i++;
	return 1;
}
static int reg_exec_set(char *set, reg_exec_t* rm, int ref)
{
	if (!((rm->i >= 0) && (rm->i < rm->n))) {
		return 0;
	} else if (ref & REG_IC) {
		if (!set[tolower(rm->s[rm->i])]) return 0;
	} else {
		if (!set[(int)rm->s[rm->i]]) return 0;
	}
	rm->i++;
	return 1;
}
static int reg_exec_and(reg_arr_t* arr, reg_exec_t* rm, int ref)
{
	int i = 0;
	if (arr->q & REG_SO) {
		if (ref & REG_EL) {
			if ((rm->i > 0) && !iscrlf(rm->s[rm->i - 1])) {
				return 0;
			}
		} else {
			if (rm->i > 0) {
				return 0;
			}
		}
	}
	while (i < arr->n) {
		if (!reg_exec_var(arr->u.v + i, rm, ref)) {
			return 0;
		}
		i++;
	}
	if (arr->q & REG_EO) {
		if (ref & REG_EL) {
			if ((rm->i < rm->n) && !iscrlf(rm->s[rm->i])) {
				return 0;
			}
		} else {
			if (rm->i < rm->n) {
				return 0;
			}
		}
	}
	return 1;
}
static int reg_exec_or(reg_arr_t* arr, reg_exec_t* rm, int ref)
{
	int i = 0;
	while (i < arr->n) {
		if (reg_exec_arr(arr->u.a + i, rm, ref)) {
			return 1;
		}
		i++;
	}
	return 0;
}
static int reg_exec_var(reg_var_t* v, reg_exec_t* rm, int ref)
{
	int n = 0;
	while ((rm->i >= 0) && (rm->i < rm->n) && (n < (v->q & 0xFFFF))) {
		
		assert((v->t >= REG_CHR) && (v->t <= REG_ARR));
		
		if (v->t == REG_CHR) {
			if (!reg_exec_c(v->u.c, rm, ref)) break;
		} else if (v->t == REG_SET) {
			if (!reg_exec_set(v->u.s, rm, ref)) break;
		} else if (v->t == REG_ARR) {
			if (!reg_exec_arr(v->u.a, rm, ref)) break;
		}
		n++;
	}
	return (n >= (v->q >> 16));
}
static int reg_exec_arr(reg_arr_t* arr, reg_exec_t* rm, int ref)
{
	int i,m;
	i = rm->i;
	if (arr->q & REG_SUB) {
		m = rm->m++;
	} else {
		m = -1;
	}
	if (arr->q & REG_OR) {
		if (!reg_exec_or(arr, rm, ref)) {
			rm->i = i + 1;
			if (m >= 0) {
				rm->m = m;
			}
			return 0;
		}
	} else {
		if (!reg_exec_and(arr, rm, ref)) {
			rm->i = i + 1;
			if (m >= 0) {
				rm->m = m;
			}
			return 0;
		}
	}
	if ((m >= 0) && (m < 10)) {
		rm->so[m] = i;
		rm->eo[m] = rm->i;
	}
	return 1;
}
static int reg_exec_top(reg_arr_t* arr, reg_exec_t* rm, int ref)
{
	int i = rm->i;
	arr->q |= REG_SUB;
	if (!(arr->q & REG_SO) && (arr->q & REG_EO)) {
		/* reverse search */
		if (ref & REG_EL) {
			if (i > 0) {
				while ((i < rm->n) && iscrlf(rm->s[i])) i++;
			}
			while ((i < rm->n) && !iscrlf(rm->s[i])) i++;
			if (i > 0) i--;
		} else {
			if (i > 0) {
				return 0;
			}
			i = rm->n - 1;
		}
		while (i >= 0) {
			rm->i = i;
			if (reg_exec_arr(arr, rm, ref)) {
				return 1;
			}
			i--;
		}
		return 0;
	}
	if (arr->q & REG_SO) {
		if (ref & REG_EL) {
			if (i > 0) {
				if (!iscrlf(rm->s[i - 1])) {
					while ((i < rm->n) && !iscrlf(rm->s[i])) i++;
				}
				while ((i < rm->n) && iscrlf(rm->s[i])) i++;
			}
		} else {
			if (i > 0) {
				return 0;
			}
		}
	}
	while (i < rm->n) {
		rm->i = i;
		if (reg_exec_arr(arr, rm, ref)) {
			return 1;
		}
		if (arr->q & REG_SO) {
			return 0;
		}
		i++;
	}
	return 0;
}
static int reg_exec(reg_t* reg, char *s, int n, int i, int so[10], int eo[10])
{
	reg_exec_t rm;
	
	if (!s) {
		s = "";
		n = 0;
	}
	if (!reg->a) {
		/* pattern is null */
		if (!n && !i) {
			so[0] = eo[0] = 0;
			/* match with null once */
			return 1;
		}
		return 0;
	}
	if (i && (reg->f & REG_OM)) {
		return 0;
	}
	
	memset(&rm, 0, sizeof(reg_exec_t));
	rm.s = s;
	rm.n = n;
	rm.i = i;
	rm.m = 0;
	
	if (!reg_exec_top(reg->a, &rm, reg->f)) {
		if (0) {
			reg_exec_trace(&rm);
		}
		return 0;
	}
	memcpy(so, rm.so, 10 * sizeof(int));
	memcpy(eo, rm.eo, 10 * sizeof(int));
	
	assert(rm.m > 0);
	
	return rm.m;
}

/************************************************************
 *
 ************************************************************/
int regex_exec(void *p, char *s, int n, int i, int so[10], int eo[10])
{
	return reg_exec(p, s, n, i, so, eo);
}
int regex_comp(void *p, char *s, int n, int f)
{
	if (!reg_comp(p, s, n, f)) {
		/* free_link(p) ? */
		warns("regex compile error: %s\n", s);
		return 0;
	}
	return 1;
}

/************************************************************
 *
 ************************************************************/
void regex_free(exec_t* ex, void *p)
{
	free_link(ex, p);
}
void* regex_alloc(exec_t* ex)
{
	return alloc_link(ex, sizeof(reg_t), &regex_proto);
}
void regex_clear(exec_t* ex, void* p)
{
	reg_free(p);
}
void regex_trace(void *p)
{
	reg_trace(p);
}
