#include <cstdlib>
#include "parse.h"

//#define TESTRUN

#ifdef TESTRUN

#include <iostream>

#define TRACE(s) (std::cerr << s << std::endl)

#else

#define TRACE(s)

#endif

#define ISSPACE(c)  \
 (((c) == ' ') ||  \
  ((c) == '\t') || \
  ((c) == '\r') || \
  ((c) == '\n'))

#ifdef __GNUC__
#define stricmp(s1, s2) strcasecmp(s1, s2)
#endif

static std::list<std::string> sepexp(const std::string &s);

void parser::enter(enum states next) {
  quote = 0;
  buff = "";
  state = next;
  tagrec *child = new tagrec;
  child->offset = bp->value.size();
  child->owner = bp;
  bp->child.push_back(child);
  bp = child;
}

void parser::leave(enum states next) {
  state = next;
  bp = bp->owner;
}

int parser::checknext(bool b, enum states next) {
  if (b) {
    state = next;
    return 0;
  } else {
    state = stop;
    return -1;
  }
}

bool parser::acceptable() {
  return (root == bp) && (state == text);
}

bool parser::checkerror() {
  return state == stop;
}

int parser::step(int c) {
  switch (state) {
  case none:
    TRACE("none");
    return pnone(c);
  case text:
    TRACE("text");
    return ptext(c);
  case eps1:
    TRACE("eps1");
    return peps1(c);
  case eps2:
    TRACE("eps2");
    return peps2(c);
  case eps3:
    TRACE("eps3");
    return peps3(c);
  case stag:
    TRACE("stag");
    return pstag(c);
  case etag:
    TRACE("etag");
    return petag(c);
  case decl:
    TRACE("decl");
    return pdecl(c);
  case com1:
    TRACE("com1");
    return pcom1(c);
  case com2:
    TRACE("com2");
    return pcom2(c);
  case com3:
    TRACE("com3");
    return pcom3(c);
  case pis1:
    TRACE("pis1");
    return ppis1(c);
  case pis2:
    TRACE("pis2");
    return ppis2(c);
  case cdt1:
    TRACE("cdt1");
    return pcdt1(c);
  case cdt2:
    TRACE("cdt2");
    return pcdt2(c);
  case cdt3:
    TRACE("cdt3");
    return pcdt3(c);
  case cdt4:
    TRACE("cdt4");
    return pcdt4(c);
  case cdt5:
    TRACE("cdt5");
    return pcdt5(c);
  case cdt6:
    TRACE("cdt6");
    return pcdt6(c);
  case cdt7:
    TRACE("cdt7");
    return pcdt7(c);
  case cdt8:
    TRACE("cdt8");
    return pcdt8(c);
  case cdt9:
    TRACE("cdt9");
    return pcdt9(c);
  default:
    TRACE("stop");
    return -1;
  }
}

int parser::pnone(int c) {
  state = text;
  bp = root = new tagrec;
  return step(c);
}

int parser::ptext(int c) {
  if (c == '<') {
    state = eps1;
  } else {
    bp->value += char(c);
  }
  return 0;
}

int parser::peps1(int c) {
  if (c == '/') {
    state = etag;
    buff = "";
  } else if (c == '>') { // ERROR
    state = stop; // ƥ>ϥ顼
  } else if (c == '?') { // PROC(PI)
    enter(pis1);
    bp->label += '?';
  } else if (c == '!') { // COMMENT or CDATA or DECLARE
    state = eps2;
  } else {
    enter(stag);
    return step(c);
  }
  return 0;
}

int parser::peps2(int c) {
  if (c == '-') {
    state = eps3;
  } else if (c == '[') { // CDATA
    enter(cdt1);
  } else { // DECLARE
    enter(decl);
    bp->label += '!';
    return step(c);
  }
  return 0;
}

int parser::peps3(int c) {
  if (c == '-') {
    state = com1;
  } else {
    state = stop;
    return -1;
  }
  return 0;
}

int parser::pcom1(int c) {
  /* Ȥ
     ҤˤϤǤʤˤϽ񤱤ʤ */
  if (c == '-') {
    state = com2;
  } else {
    ; // SKIP(COMMENT)
  }
  return 0;
}

int parser::pcom2(int c) {
  if (c == '-') {
    state = com3;
  } else {
    state = com1;
  }
  return 0;
}

int parser::pcom3(int c) {
  if (c == '>') {
    state = text;
  } else if (c == '-') {
    /* ̩ˤstop--äƤƤϤʤ */
    state = com2;
  } else {
    /* ̩ˤstop--äƤƤϤʤ */
    state = com1;
  }
  return 0;
}

int parser::pdecl(int c) {
  if (!quote && (c == '>')) {
    leave(text);
  } else if (!quote && ((c == '"') || (c == '\''))) {
    quote = c;
    bp->label += char(c);
  } else if (quote && (c == quote)) {
    quote = 0;
    bp->label += char(c);
  } else {
    bp->label += char(c);
  }
  return 0;
}

int parser::pstag(int c) {
  if (!quote && (c == '>')) {
    septag();
    TRACE(bp->label);
    if (bp->label.size() && (bp->label[bp->label.size() - 1] == '/')) {
      leave(text);
    } else {
      bp->issingle = false;
      state = text;
    }
  } else if (!quote && ((c == '"') || (c == '\''))) {
    quote = c;
    bp->label += char(c);
  } else if (quote && (c == quote)) {
    quote = 0;
    bp->label += char(c);
  } else {
    bp->label += char(c);
  }
  return 0;
}

int parser::petag(int c) {
  if (c == '>') {
    if (stricmp(buff.c_str(), bp->label.c_str()) == 0) {
      TRACE("ETAG");
      leave(text);
    } else {
      state = stop; // cross tag
      return -1;
    }
  } else {
    buff += char(c);
  }
  return 0;
}

int parser::pcdt1(int c) { return checknext(c == 'C', cdt2); }
int parser::pcdt2(int c) { return checknext(c == 'D', cdt3); }
int parser::pcdt3(int c) { return checknext(c == 'A', cdt4); }
int parser::pcdt4(int c) { return checknext(c == 'T', cdt5); }
int parser::pcdt5(int c) { return checknext(c == 'A', cdt6); }

int parser::pcdt6(int c) { 
  if (c == '[') {
    bp->label = "!CDATA ";
  } else {
    state = stop;
    return -1;
  }
  return 0;
}

int parser::pcdt7(int c) {
  if (c == ']') {
    state = cdt8;
  } else {
    bp->label += char(c);
  }
  return 0;
}

int parser::pcdt8(int c) {
  if (c == ']') {
    state = cdt9;
  } else {
    state = cdt7;
    bp->label += ']';
    bp->label += char(c);
  }
  return 0;
}

int parser::pcdt9(int c) {
  if (c == '>') {
    leave(text);
  } else if (c == ']') {
    bp->label += ']';
  } else {
    state = cdt7;
    bp->label += ']';
    bp->label += ']';
    bp->label += char(c);
  }
  return 0;
}

int parser::ppis1(int c) {
  if (!quote && (c == '?')) {
    state = pis2;
  } else if (!quote && ((c == '"') || (c == '\''))) {
    quote = c;
    bp->label += char(c);
  } else if (quote && (c == quote)) {
    quote = 0;
    bp->label += char(c);
  } else {
    bp->label += char(c);
  }
  return 0;
}

int parser::ppis2(int c) {
  if (c == '>') {
    septag();
    leave(text);
  } else {
#if 0
    state = stop; // ?θ夬>Ǥʤ˥顼ˤ
    return -1;
#else
    bp->label += '?'; // ?θ夬>ǤʤƤ
    state = pis1;
    return step(c);
#endif
  }
  return 0;
}

std::list<std::string> parser::septag() {
  std::list<std::string> ls = sepexp(bp->label);
  if (ls.size()) {
    std::list<std::string>::iterator it = ls.begin();
    bp->label = *it;
    it++;
    while (it != ls.end()) {
      std::string l, r;
      int pos = it->find('=');
      if (pos != std::string::npos) {
	l = it->substr(0, pos);
	r = it->substr(pos + 1);
      } else {
	l = *it;
	r = "";
      }
      bp->attrs.push_back(
       std::pair<std::string,std::string>(l, r));
      it++;
    }
  }
  return ls;
}

static std::list<std::string> sepexp(const std::string &e) {
  std::list<std::string> ls;
  std::string buff, s;
  char quote = 0;
  bool slash = false;
  if (e[e.size() - 1] == '/') {
    slash = true;
    s = e.substr(0, e.size() - 1);
  } else {
    s = e;
  }
  for (std::string::const_iterator it = s.begin();
   it != s.end(); it++) {
    if (!quote && ((*it == '"') || (*it == '\''))) {
      quote = *it;
    } else if (quote && (*it == quote)) {
      quote = 0;
    } else if (quote) {
      buff += *it;
    } else if (!quote && ISSPACE(*it) && buff.size()) {
      ls.push_back(buff);
      buff = "";
    } else {
      buff += *it;
    }
  }
  if (quote) { // SYNTAX ERROR(PUREN)
    ; // ˥åƤΤǥǤϵʤ
  }
  if (buff.size()) {
    ls.push_back(buff);
  }
  if (slash) ls.front() += '/';
  return ls;
}

tagrec::~tagrec() {
  for (std::list<tagrec *>::iterator it = child.begin();
   it != child.end(); it++) {
    delete *it;
  }
}

bool tagrec::tagcmp(const std::string &s) {
  return stricmp(s.c_str(), label.c_str()) == 0;
}

int tagrec::scan(std::vector<tagrec *> &ls, const std::string &s) {
  int n = 0;
  for (std::list<tagrec *>::iterator it = child.begin();
   it != child.end(); it++) {
    if ((*it)->tagcmp(s)) {
      ls.push_back(*it);
      n++;
    }
  }
  return n;
}

bool tagrec::scanattr(std::pair<std::string, std::string> &v,
 const std::string &s) {
  for (std::list<std::pair<std::string, std::string> >::iterator
   it = attrs.begin(); it != attrs.end(); it++) {
    if (stricmp((*it).first.c_str(), s.c_str()) == 0) {
      v = *it;
      return true;
    }
  }
  return false;
}

int tagrec::find(std::vector<tagrec *> &ls, const std::string &s) {
  int n = 0;
  for (std::list<tagrec *>::iterator it = child.begin();
   it != child.end(); it++) {
    if ((*it)->tagcmp(s)) {
      ls.push_back(*it);
      n++;
    }
    n += (*it)->find(ls, s);
  }
  return n;
}

tagrec *tagrec::findroot() {
  tagrec *p = owner;
  while (p) p = p->owner; 
  return p;
}

int tagrec::path(std::vector<tagrec *> &ls, const std::string &s) {
  int pos = s.find('/');
  if (pos == string::npos) {
    return scan(ls, s);
  } else if (pos == 0) { // ƬslashХѥ
    return findroot()->path(ls, s.substr(1));
  } else if (pos == (s.size() - 1)) { // slash̵
    return path(ls, s.substr(0, s.size() - 1));
  } else {
    std::vector<tagrec *> t;
    std::string l = s.substr(0, pos);
    std::string r = s.substr(pos + 1);
    if (scan(t, l)) {
      for (std::vector<tagrec *>::iterator pi = t.begin();
       pi != t.end(); pi++) {
          (*pi)->path(ls, r);
      }
      return ls.size();
    } else {
      return 0;
    }
  }
}

std::string tagrec::dump() {
  std::string s;
  if (label.size()) {
    s += "<";
    s += label;
    for (std::list<std::pair<std::string, std::string> >::iterator
     it = attrs.begin(); it != attrs.end(); it++) {
      s += " ";
      s += it->first;
      if (it->second.size()) {
        s += "=\"";
        s += it->second;
        s += "\"";
      }
    }
    if (label[0] == '?') s += '?';
    s += ">";
  }
  int cnt = 0;
  for (std::list<tagrec *>::iterator it = child.begin(); 
   it != child.end(); it++) {
    if (cnt < (*it)->offset) {
      s += value.substr(cnt, (*it)->offset - cnt);
      cnt = (*it)->offset;
    }
    s += (*it)->dump();
  }
  if (cnt < value.size()) {
    s += value.substr(cnt, value.size() - cnt);
  }
  if (!issingle) {
    s += "</";
    s += label;
    s += ">";
  }
  return s;
}
