/* Copyright (C) 2022 Momi-g

 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
 the Free Software Foundation; either version 3 of the License, or
 (at your option) any later version.

 This program is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 GNU General Public License for more details.

 You should have received a copy of the GNU General Public License
 along with this program. If not, see <http://www.gnu.org/licenses/>.
*/

/*SH_doc
title=*SH_bn* section=3 repnl=\040

@name *SH_bn*
@_brief grep comments from c-lang srccode
@_syno
#include "ccmt.h"

char* ccmt(void* src [, const char* mode] );
char* ccmt_v(void* src [, const char* mode] );
//needs free rtnptr

@tl_dr
		@(code)@
	#include "ccmt.h"
	
	int main(int argc, char** argv) {
	  const char* src = "//1.cmt\n 2."//nocmt"\n /*3.cmt\n *" "/\n";
	  char* p = ccmt(src);
	  puts(p); free(p);		//: 1. (2.removed) 3.
	
	  char* p = ccmt_v(src);	//: grep -v
	  puts(p); free(p);		//: (1.) 2. (3.)
  
	  p = ccmt(stdin, "fp");	//: ag1 == filepointer
	  free(p);
	  p = ccmt("mysrc.c", "file");	//: ag1 == filename
	  free(p);
	  return 0;
	}
	//~$ gcc src.c
	//~$ printf '//1\n 2\n' | ./a.out
		@()
@_desc
	ccmt greps c-lang cmt(// or /*..+/) using posix-ERE/BRE regex from src 
	and rtn malloc()ed ptr.
	param is:
	--
	@(list)
	`_src`: src str/fp/filename pointer. treat as strptr if ag2 isnt.
	`_mode`: optional. set ag1 pointer type with string. --
		"p"/"str"/noset >> src is literal string, cosnt char* --
		"fp" >> src is opened file pointer, FILE* --
		"file" >> src is filename. do open-read-close sequence. --
	@()
	--
	ccmt_v() gets cmt removed string as 'grep -v XXX'.--
	ccmt assumes the src doesnt holds trigraphs ??X and long-line syntax \(\n).
	
@return_value
	malloc()ed pointer. rtn NULL + set errno if error.
@conforming_to posix-2001+
@copyright Copyright 2022 momi-g, GPLv3+
@_ver 2022-03-20 v1.0.1 (2022-03-16 v1.0.0)
@_see `regex(3)` --
	https://gcc.gnu.org/onlinedocs/gcc-3.2.3/cpp/Initial-processing.html --
	https://math.stackexchange.com/questions/2482191/how-do-you-draw-a-dfa-from-a-regular-expression --
	https://cs.stackexchange.com/questions/311/deriving-the-regular-expression-for-c-style-comments --
	https://cyberzhg.github.io/toolbox/nfa2dfa?regex=KChiKikoYWIpKGIqKSkq --
//SH_docE*/
/* tool macros */
#ifndef ERRact
#include <stdio.h>
 #if (199901L <= __STDC_VERSION__ +0)	/* nealy 200112L, _POSIX_C_SOURCE	c99*/
	#include <sys/types.h>
	#include <unistd.h>
	#define ERRactag	__func__, getpid()
 #else
	#define ERRactag	"func:c99+", 0
 #endif
 #include <string.h>
 #include <errno.h>
 #define ERRact(xpr, msg, act)	if(xpr){ int en_=errno; fprintf(stderr, \
	"ERR: %s %d %s() pid:%d %s msg:%s sys:%s\n",__FILE__,__LINE__, ERRactag \
	, "hit(" #xpr ")", msg, strerror(en_) ); act; }
 #define STOP(xpr, msg)	ERRact(xpr, msg, fputs("STOP\n",stderr);exit(1) )
#endif
#define loop(a)		for(int lpcnt=1;lpcnt<=a;lpcnt++)
/*tool end*/

#ifdef TEST
	#include <assert.h>
	#include "*SH_bn*.h"	//*SH_co*	*
	#include "hcut.h"
	#include "msgp.h"
	#include "laptime.h"
	#define qu(...)		Qsub(__VA_ARGS__)
	#define Qsub(...)	#__VA_ARGS__
#endif

#include "*SH_bn*.h"
#include <stdarg.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>		//exit()
#include <errno.h>
#include <unistd.h>	//getpid()	fd, STDERR_FILENO.  FILE* ... stdout stream.
#include <stdint.h>	//intptr_t type

#include <regex.h>

static int ccmt_flg=0;
static char* ccmt_search(char* str, int vflg){
	//rtnbuff
	char* res = strdup(str);
	STOP( res==NULL, "fatal: strcpy() failed");
	size_t reslen=strlen(res);
	memset(res, 0, reslen+1);

	//re_compile:
	int flg = REG_EXTENDED;
	const char* ptnc = "/[*]([^*]*[/]|[*]*[^*/])*[*]+/";
	if(ccmt_flg== -1){ ptnc = "/[*](([^*]*[/])*([*]*[^*/])*)*[*]+/"; }
	// ptn:
	// yx((a|b|y)*y|x*(a|b))*x+y
	// yx(  [^x]*[y]  |  [x]*[^xy] )*  x+y		>> ERE only
	// yx(  ([^x]*[y]){0,1}  ([x]*[^xy]){0,1} )*  x+y	>> conv to BRE
	// yx(  ([^x]*[y])*  ([x]*[^xy])* )*  x+y		>> use repeat op '|' >> *

	const char* ptnl = "\"([\\].|[^\"])*\"";
	const char* ptncc = "//[^\n]*\n";
	
	// regex valid test 
	// https://math.stackexchange.com/questions/2482191/how-do-you-draw-a-dfa-from-a-regular-expression
	// https://cs.stackexchange.com/questions/311/deriving-the-regular-expression-for-c-style-comments
	// https://cyberzhg.github.io/toolbox/nfa2dfa?regex=KChiKikoYWIpKGIqKSkq

	//switch ERE<>BRE
	if(ccmt_flg==1){
		flg = 0;
//	/[*] (  [^*]*[/]  |  [*]*[^*/]  )*  [*]+/
//	/[*]  \(  \([^*]*[/]\)*  \([*]*[^/*]\)  \)*  [*]\{1,\}/
		ptnc = "/[*]\\(\\([^*]*[/]\\)*\\([*]*[^/*]\\)*\\)*[*]\\{1,\\}/";
		ptnl = "\"\\(\\([\\].\\)*[^\"\\]*\\)*\"";
	}
	if(ccmt_flg==2){
		flg = 0;
//	/[*] \( \([^*]*[/]\)\{0,\}  \([*]*[^/*]\)\{0,\} \)\{0,\} [*]\{1,\}/
		ptnc = "/[*]\\(\\([^*]*[/]\\)\\{0,\\}\\([*]*[^/*]\\)\\{0,\\}\\)\\{0,\\}[*]\\{1,\\}/";
		ptnl = "\"\\(\\([\\].\\)\\{0,1\\}[^\"\\]\\{0,1\\}\\)*\"";
	}
ccmt_flg?
printf("mode, %d-v%d\n", ccmt_flg, vflg),puts(ptnc),puts(ptnl):0;	//dbg

	regex_t cobj, lobj, ccobj;
	int rc =  regcomp(&cobj, ptnc, flg);
	rc +=  regcomp(&lobj, ptnl, flg);
	rc +=  regcomp(&ccobj, ptncc, flg);
	STOP( rc, "fatal: reptn comp failed");

	//search
	regmatch_t robj_c[1], robj_l[1], robj_cc[1];
	flg=0;
	char* np = str;
	int ee = strlen(str) +1;
	char* wptr = res;
	for(;;){
		//1:posbuff size, [0] only
		char* rptr = np;
		int cp = regexec(&cobj, rptr, 1, robj_c, flg) ? ee: robj_c[0].rm_so;
		int lp = regexec(&lobj, rptr, 1, robj_l, flg) ? ee: robj_l[0].rm_so;
		int ccp= regexec(&ccobj, rptr, 1, robj_cc,flg) ? ee: robj_cc[0].rm_so;
//dbg(cp, lp, ccp, ee*3);		
		//code only, break
		if(cp+lp+ccp==ee*3){
//dbg(vflg, rptr, wptr-1);
			if(vflg){ strcpy(wptr, rptr); }
			break;
		}
		//hit
		char* p;
		regoff_t sz;
		//lit
		if(lp<cp && lp<ccp){
			p = rptr + robj_l[0].rm_eo;	//skip
			sz = 0;
		}
		// lcmt
		else if(ccp<cp){
			p = rptr + robj_cc[0].rm_so;	//hit start
			sz = robj_cc[0].rm_eo - robj_cc[0].rm_so;
		}
		// mcmt
		else{
			p = rptr + robj_c[0].rm_so;	//hit start
			sz = robj_c[0].rm_eo  - robj_c[0].rm_so;
		}
		np = p+sz;
		if(vflg==0){
			strncpy(wptr, p, sz);
		}else{
//dbg(p, rptr);
			int nflg = sz && ccp<cp;
			sz = p - rptr;
			strncpy(wptr, rptr, p-rptr);
			if(nflg){ wptr[sz]='\n'; sz++; }	// //..\n eats \n for maincode
		}
		wptr = wptr + sz;
	}
	regfree(&cobj);
	regfree(&lobj);
	regfree(&ccobj);
	return res;
}

static char* mload(FILE* fp, const char* fname){
	if( (fp && fname) || (!fp&&!fname) ){
		STOP(1, "fatal: fp/fname is seletive");
	}

	if(fp==NULL){
		fp = fopen(fname, "r");
		if(fp==NULL){ return NULL; }
	}
	
	size_t msz=128;
	size_t cur = 0;
	char* p = malloc(msz);
	STOP(p==NULL, "fatal: malloc failed");
	while(1){
		size_t n = fread(p+cur, 1, msz-cur, fp);
		cur += n;
		if( feof(fp) ){ break; }
		if(ferror(fp) ){
			free(p);
			errno = EIO;
			break;
		}
		if(cur == msz){
			p = realloc(p, msz*2);
			msz = msz *2;
		}
	}
	if(p){ p[cur] = 0;}
	if(fname){fclose(fp);}
	return p;
}


char* ccmt_impl(void* srcp, int vflg, const char* mode){
	char* resp=NULL;
	if(mode==NULL||strcmp(mode, "p")==0 || strcmp(mode, "str")==0){
		resp = ccmt_search(srcp, vflg);
		goto lb_RTN;
	}
	if(strcmp(mode, "fp")==0){
		srcp = mload(srcp, NULL);
		if(srcp){
			resp = ccmt_search(srcp, vflg);
			free(srcp);
		}
		goto lb_RTN;
	}
	if(strcmp(mode, "file")==0){
		srcp = mload(NULL, srcp);
		if(srcp){
			resp = ccmt_search(srcp, vflg);
			free(srcp);
		}
		goto lb_RTN;
	}
	STOP(1 , "invalid ag2: str/p, fp, file only");
lb_RTN:;
	return resp;
}

#ifdef TEST
HCUT_ADD(t_0) {
	eq_i(errno, 0);
}
#endif


#ifdef TEST_
HCUT_ADD(t_*SH_bn*) {
	eq_i(errno, 0);
	
	char* s = "abc \n \"hw\" //1.0\"nest\"\n 2.\n /*3. \n*/\n 4.\n";
puts(s);
	char* ret = ccmt(s);
	ret ? puts(ret), free(ret), 1 : puts("nohit");
	ret = ccmt_v(s);
	ret ? puts(ret), free(ret), 1 : puts("nohit");

	ccmt_flg= -1;
	ret = ccmt(s);
	ret ? puts(ret), free(ret), 1 : puts("nohit");
	ret = ccmt_v(s);
	ret ? puts(ret), free(ret), 1 : puts("nohit");
	
	ccmt_flg=1;
	ret = ccmt(s);
	ret ? puts(ret), free(ret), 1 : puts("nohit");
	ret = ccmt_v(s);
	ret ? puts(ret), free(ret), 1 : puts("nohit");
	
	ccmt_flg=2;
	ret = ccmt(s);
	ret ? puts(ret), free(ret), 1 : puts("nohit");
	ret = ccmt_v(s);
	ret ? puts(ret), free(ret), 1 : puts("nohit");

}
#endif

#ifdef TEST_
HCUT_ADD(t_stdin) {
	puts("*SH_bn*.c");
	char* ret = ccmt("*SH_bn*.c", "file");
	ret ? puts(ret), free(ret), 1 : puts("nohit");
}
#endif


#ifdef TEST_
HCUT_ADD(t_speed) {
	laptime(0);
	laptime("Tree");
	dbg(1);
}
#endif

/*SH_SMP
#include "*SH_bn*.h"
int main(){
	char* s = "abc\n \"hw//\" //1.0\"a\"\n 2.\n /*3. \n*""/\n 4.\n";
	puts(s);
	char* ret = ccmt(s);
	ret ? puts(ret), free(ret), 1 : puts("nohit");
	ret = ccmt_v(s);
	ret ? puts(ret), free(ret), 1 : puts("nohit");
	return 0;
}
//	~$ gcc smpl.c *SH_bn*.c

//SH_SMPE*/

#ifdef TEST
HCUT_RUN("stderr", 1,	/* keep newline. use for SH sed edit, -t test.*/
t_ccmnt);
#endif

/*
 change log
 --
2022-03-20  Momi-g	<dmy@dmy.dmy>

	* *SH_bn*.c (ccmt_impl): add ere/bre regex, min-dfa check@doc_see

2022-03-16  Momi-g	<dmy@dmy.dmy>

	* *SH_bn*.c (all): init

*/
