%{
    /* C-HTML scanner */

    /* assume no ISO2022 string */
    /* reference: [1] RFC1866, [2] http://www.nttdocomo.co.jp/i/tag/ */

#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include "html2hdml.h"
#include "html2hdml-parse.c"

int gl_elem;
int gl_prev_c;
int gl_text_len = 0;
int gl_lineno = 1;
int gl_last_ret = -1; /* no such token*/
int gl_dummyspc_printed = 0;

#define YYLVAL yylval.str
#define SETLVAL() (YYLVAL = my_strdup(yytext))
#define FLUSH_TEXT() \
        if (gl_text_len) { \
    	    yyless(gl_text_len); \
    	    yytext[gl_text_len] = '\0'; \
    	    gl_text_len = 0; \
    	    SETLVAL(); \
    	    RETURN(TEXT); \
        } else { SETLVAL(); }

#define APPEND_TEXT() { \
	yymore(); \
	gl_text_len = yyleng; \
	/*yymore();*/ }

#define RETURN_DUMMYSPC(ret) { \
	    YYLVAL = my_strdup(""); \
	    RETURN(ret); \
    	}

#define INSERT_DUMMYSPC() \
    	if (gl_last_ret != ' ' && gl_last_ret != '\n') { \
    	    yyless(0); \
    	    RETURN_DUMMYSPC(' '); \
    	}

#define RETURN(ret) { \
	gl_last_ret = ret; \
	return ret; }

#define POPSTATE() { \
	BEGIN gl_prev_c; }

#define PUSHSTATE(state) { \
	gl_prev_c = YYSTATE; \
	BEGIN state; }

#define ELEM_RET(i) { \
    SETLVAL(); \
    BEGIN C_ATTRNAME; \
    gl_elem = i; \
    RETURN(i); \
}

#define ATTR_RET(i, j) { \
    YYLVAL = my_strdup(yytext); \
    BEGIN C_EQ; \
    if (i) { RETURN(j); } \
    else { RETURN(UNKNOWNATTR); } \
}

#define ATTR_RET_1(ret, e1) { \
    YYLVAL = my_strdup(yytext); \
    BEGIN C_EQ; \
    if (gl_elem == e1) { RETURN(ret); } \
    else { RETURN(UNKNOWNATTR); } \
}

#define ATTR_RET_2(ret, e1, e2) { \
    YYLVAL = my_strdup(yytext); \
    BEGIN C_EQ; \
    if (gl_elem == e1 || gl_elem == e2) { RETURN(ret); } \
    else { RETURN(UNKNOWNATTR); } \
}

#define ATTR_RET_3(ret, e1, e2, e3) { \
    YYLVAL = my_strdup(yytext); \
    BEGIN C_EQ; \
    if (gl_elem == e1 || gl_elem == e2 || gl_elem == e3) { RETURN(ret); } \
    else { RETURN(UNKNOWNATTR); } \
}

#define ATTR_RET_4(ret, e1, e2, e3, e4) { \
    YYLVAL = my_strdup(yytext); \
    BEGIN C_EQ; \
    if (gl_elem == e1 || gl_elem == e2 || gl_elem == e3 || \
	gl_elem == e4) { RETURN(ret); } \
    else { RETURN(UNKNOWNATTR); } \
}

#define ATTR_RET_5(ret, e1, e2, e3, e4, e5) { \
    YYLVAL = my_strdup(yytext); \
    BEGIN C_EQ; \
    if (gl_elem == e1 || gl_elem == e2 || gl_elem == e3 || \
	gl_elem == e4 || gl_elem == e5) { RETURN(ret); } \
    else { RETURN(UNKNOWNATTR); } \
}

#define ATTR_RET_6(ret, e1, e2, e3, e4, e5, e6) { \
    YYLVAL = my_strdup(yytext); \
    BEGIN C_EQ; \
    if (gl_elem == e1 || gl_elem == e2 || gl_elem == e3 || \
	gl_elem == e4 || gl_elem == e5 || gl_elem == e6) { RETURN(ret); } \
    else { RETURN(UNKNOWNATTR); } \
}

#define ATTR_RET_7(ret, e1, e2, e3, e4, e5, e6, e7) { \
    YYLVAL = my_strdup(yytext); \
    BEGIN C_EQ; \
    if (gl_elem == e1 || gl_elem == e2 || gl_elem == e3 || \
	gl_elem == e4 || gl_elem == e5 || gl_elem == e6 || \
	gl_elem == e7) { RETURN(ret); } \
    else { RETURN(UNKNOWNATTR); } \
}

#define ATTR_RET_8(ret, e1, e2, e3, e4, e5, e6, e7, e8) { \
    YYLVAL = my_strdup(yytext); \
    BEGIN C_EQ; \
    if (gl_elem == e1 || gl_elem == e2 || gl_elem == e3 || \
	gl_elem == e4 || gl_elem == e5 || gl_elem == e6 || \
	gl_elem == e7 || gl_elem == e8) { RETURN(ret); } \
    else { RETURN(UNKNOWNATTR); } \
}

#define ATTR_RET_9(ret, e1, e2, e3, e4, e5, e6, e7, e8, e9) { \
    YYLVAL = my_strdup(yytext); \
    BEGIN C_EQ; \
    if (gl_elem == e1 || gl_elem == e2 || gl_elem == e3 || \
	gl_elem == e4 || gl_elem == e5 || gl_elem == e6 || \
	gl_elem == e7 || gl_elem == e8 || gl_elem == e9) { RETURN(ret); } \
    else { RETURN(UNKNOWNATTR); } \
}

#define ATTR_RET_10(ret, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10) { \
    YYLVAL = my_strdup(yytext); \
    BEGIN C_EQ; \
    if (gl_elem == e1 || gl_elem == e2 || gl_elem == e3 || \
	gl_elem == e4 || gl_elem == e5 || gl_elem == e6 || \
	gl_elem == e7 || gl_elem == e8 || gl_elem == e9 || \
	gl_elem == e10) { RETURN(ret); } \
    else { RETURN(UNKNOWNATTR); } \
}

#undef YY_INPUT
#define YY_INPUT(b, r, ms) (r=my_yyinput(b, ms))

int my_yyinput(char *buf, int max_size);

%}

%option case-insensitive
%option noyywrap 

%x C_ELEM C_ATTRNAME C_EQ C_ATTRVALUE
%x C_AVAL_Q C_AVAL_DQ
%x C_TAGC
%x C_COM
%x C_NONASCII

NAME  [A-Z0-9\.\-]
SP    [ \t\r\n]

SPGT  [> \t\r\n]
NSPGT [^> \t\r\n]
SPEQ  [= \t\r\n]
SPGE  [=> \t\r\n]
NSPGE [^=> \t\r\n]

%%

%{
%}

<*>{
    \r\n/[\r\n] { FLUSH_TEXT(); gl_lineno++; yymore(); }
    \r/[\r]     { FLUSH_TEXT(); gl_lineno++; yymore(); }
    \n/[\r\n]   { FLUSH_TEXT(); gl_lineno++; yymore(); }
}

"<!>"  { FLUSH_TEXT(); RETURN(COMMENT); }
"<!--" { FLUSH_TEXT(); BEGIN C_COM; RETURN(COMO); }

<C_COM>{
    "--"/{SP}*">" { FLUSH_TEXT(); BEGIN C_TAGC; RETURN(COMC); } 
}

    /* mark declaration */
"<!" { FLUSH_TEXT(); BEGIN C_TAGC; RETURN(MARKO); }

    /* processing instruction */
"<?" { FLUSH_TEXT(); BEGIN C_TAGC; RETURN(PROCO); }

"</" { FLUSH_TEXT(); BEGIN C_ELEM; RETURN(ETAGO); }
"<"  { FLUSH_TEXT(); BEGIN C_ELEM; RETURN(STAGO); }

<C_ELEM>{
    A/{SPGT}          { ELEM_RET(A); }
    BASE/{SPGT}       { ELEM_RET(BASE); }
    BLOCKQUOTE/{SPGT} { ELEM_RET(BLOCKQUOTE); }
    BODY/{SPGT}       { ELEM_RET(BODY); }
    BR/{SPGT}         { ELEM_RET(BR); }
    CENTER/{SPGT}     { ELEM_RET(CENTER); }
    DIR/{SPGT}        { ELEM_RET(DIR); }
    DL/{SPGT}         { ELEM_RET(DL); }
    DT/{SPGT}         { ELEM_RET(DT); }
    DD/{SPGT}         { ELEM_RET(DD); }
    DIV/{SPGT}        { ELEM_RET(DIV); }
    FORM/{SPGT}       { ELEM_RET(FORM); }
    HEAD/{SPGT}       { ELEM_RET(HEAD); }
    H1/{SPGT}         { ELEM_RET(H1); }
    H2/{SPGT}         { ELEM_RET(H2); }
    H3/{SPGT}         { ELEM_RET(H3); }
    H4/{SPGT}         { ELEM_RET(H4); }
    H5/{SPGT}         { ELEM_RET(H5); }
    H6/{SPGT}         { ELEM_RET(H6); }
    HR/{SPGT}         { ELEM_RET(HR); }
    HTML/{SPGT}       { ELEM_RET(HTML); }
    IMG/{SPGT}        { ELEM_RET(IMG); }
    INPUT/{SPGT}      { ELEM_RET(INPUT); }
    LI/{SPGT}         { ELEM_RET(LI); }
    MENU/{SPGT}       { ELEM_RET(MENU); }
    OL/{SPGT}         { ELEM_RET(OL); }
    OPTION/{SPGT}     { ELEM_RET(OPTION); }
    P/{SPGT}          { ELEM_RET(P); }
    PLAINTEXT/{SPGT}  { ELEM_RET(PLAINTEXT); }
    PRE/{SPGT}        { ELEM_RET(PRE); }
    SELECT/{SPGT}     { ELEM_RET(SELECT); }
    TEXTAREA/{SPGT}   { ELEM_RET(TEXTAREA); }
    TITLE/{SPGT}      { ELEM_RET(TITLE); }
    UL/{SPGT}         { ELEM_RET(UL); }

    {NAME}+/{SPGT}    { ELEM_RET(UNKNOWNELEM); }
    {NSPGT}+/{SPGT}   { ELEM_RET(UNKNOWNELEM); }
}

<C_ATTRNAME>{
    "="    { yyless(0); BEGIN C_EQ; }

    NAME/{SPGE}      { ATTR_RET_4(NAME,      A, INPUT, SELECT, TEXTAREA); }
    HREF/{SPGE}      { ATTR_RET_2(HREF,      A, BASE); }
    ACCESSKEY/{SPGE} { ATTR_RET_2(ACCESSKEY, A, INPUT); }
    CLEAR/{SPGE}     { ATTR_RET_1(CLEAR,     BR); }
    ALIGN/{SPGE}     { ATTR_RET_10(ALIGN,    DIV, H1, H2, H3, H4, H5, H6,
					     HR, IMG, P); }
				   
    ACTION/{SPGE}    { ATTR_RET_1(ACTION,    FORM); }
    METHOD/{SPGE}    { ATTR_RET_1(METHOD,    FORM); }

    SIZE/{SPGE}      { ATTR_RET_3(SIZE,      HR, INPUT, SELECT); }
    WIDTH/{SPGE}     { ATTR_RET_2(WIDTH,     HR, IMG); }
    NOSHADE/{SPGE}   { ATTR_RET_1(NOSHADE,   HR); }

    SRC/{SPGE}       { ATTR_RET_1(SRC,       IMG); }
    HEIGHT/{SPGE}    { ATTR_RET_1(HEIGHT,    IMG); }
    HSPACE/{SPGE}    { ATTR_RET_1(HSPACE,    IMG); }
    VSPACE/{SPGE}    { ATTR_RET_1(VSPACE,    IMG); }
    ALT/{SPGE}       { ATTR_RET_1(ALT,       IMG); }
    BORDER/{SPGE}    { ATTR_RET_1(BORDER,    IMG); }

    TYPE/{SPGE}      { ATTR_RET_3(TYPE,      INPUT,
					     LI, OL); } /* 2.0 */
	
    MAXLENGTH/{SPGE} { ATTR_RET_1(MAXLENGTH, INPUT); }
    VALUE/{SPGE}     { ATTR_RET_4(VALUE,     INPUT, OPTION, INPUT,
					     LI); } /* 2.0 */  
				
    CHECKED/{SPGE}   { ATTR_RET_1(CHECKED,   INPUT); }

    START/{SPGE}     { ATTR_RET_1(START,     OL); } /* 2.0 */ 

    SELECTED/{SPGE}  { ATTR_RET_1(SELECTED,  OPTION); }
    MULTIPLE/{SPGE}  { ATTR_RET_1(MULTIPLE,  SELECT); } /* 2.0 */

    ROWS/{SPGE}      { ATTR_RET_1(ROWS,      TEXTAREA); }
    COLS/{SPGE}      { ATTR_RET_1(COLS,      TEXTAREA); }
    ISTYLE/{SPGE}    { ATTR_RET_1(ISTYLE,    TEXTAREA); } /* 2.0 */

    {NAME}+/{SPGE}   { ATTR_RET(1, UNKNOWNATTR); }
    {NSPGE}+/{SPGE}  { ATTR_RET(1, UNKNOWNATTR); }

}

<C_EQ>{
    "="     { SETLVAL(); BEGIN C_ATTRVALUE; RETURN('=');}
    {NAME}  { yyless(0); BEGIN C_ATTRNAME; }
    {NSPGE} { yyless(0); BEGIN C_ATTRNAME; }
}

<C_ATTRVALUE>{
    "'"    { BEGIN C_AVAL_Q; RETURN('\''); }
    \"     { BEGIN C_AVAL_DQ; RETURN('"'); }
    {SPGT} { yyless(0); BEGIN C_ATTRNAME; RETURN_DUMMYSPC(ATTRVALUESTR); }
    [^\'\"\r\n \t]{NSPGT}* {
	SETLVAL(); BEGIN C_ATTRNAME; RETURN(ATTRVALUESTR);
    }
}

<C_ELEM,C_ATTRNAME,C_EQ,C_ATTRVALUE,C_TAGC>{
    ">" { FLUSH_TEXT(); INSERT_DUMMYSPC(); BEGIN INITIAL; RETURN(TAGC); }
}

<C_AVAL_Q>[^\'\r\n]+  |
<C_AVAL_DQ>[^\"\r\n]+ { SETLVAL(); RETURN(ATTRVALUESTR); }

<C_AVAL_Q>"'"/{SP} { BEGIN C_ATTRNAME; RETURN('\''); }
<C_AVAL_DQ>\"/{SP} { BEGIN C_ATTRNAME; RETURN('"'); }

<C_AVAL_Q>"'"      |
<C_AVAL_DQ>\"      {
    static int is_firsttime = 1;
    int ret;
    if (is_firsttime) {
	is_firsttime = 0;
	SETLVAL();
	ret = yytext[0];
	yyless(0);
//	RETURN(yytext[0]);
	RETURN(ret);
    } else {
	is_firsttime = 1;
	BEGIN C_ATTRNAME;
	RETURN_DUMMYSPC(' ');
    }
}

<*>{
    &#63[6-9][0-9][0-9]; {
	int i;
	FLUSH_TEXT();

	i = 63000+
	    (yytext[yyleng-4] - '0')*100 +
		(yytext[yyleng-3] - '0')*10 +
		    (yytext[yyleng-2] - '0');
	YYLVAL = my_malloc(sizeof(char)*3);
	sprintf(YYLVAL, "%c%c", i/256, i%256);
	RETURN(TEXT);
    }
    (\r\n?|\n) { FLUSH_TEXT(); gl_lineno++; RETURN('\n'); }
    [ \t]+     { FLUSH_TEXT(); RETURN(' '); }
    '\0'       { FLUSH_TEXT(); }
    .          { APPEND_TEXT(); }
}

%%

#if 0
int yywrap()
{
//    fputs("</DISPLAY>\n", yyout);
//    fputs("</HDML>\n", yyout);
    return 1;
}
#endif

extern char myinput[];
extern char *myinputptr;
extern char *myinputlim;

#ifndef min
#define min(a, b) ((a) > (b) ? (b) : (a))
#endif /* min */

int my_yyinput(char *buf, int max_size)
{
    int n = min(max_size, myinputlim - myinputptr);
    if (n > 0) {
	memcpy(buf, myinputptr, n);
	myinputptr += n;
    }
    return n;
}

/*
YYSTYPE my_strdup(char *str, int print_this)
{
    struct my_yystype *p;
    p = (struct my_yystype *)my_malloc(sizeof(struct my_yystype));
    p->print_this = print_this;
    p->noneed_tag = 0;
    p->top = (struct strlist *)my_malloc(sizeof(struct strlist));
    p->last = p->top;
    p->top->str = my_strdup(str);
    p->top->next = NULL;

    return p;
}
*/
