#include <stdio.h>
#include <stdlib.h>

#define OFFSET_FILE      "morogram.offset.bin"
#define POINTER_FILE     "morogram.pointer.bin"
#define COINCIDENCE_FILE "morogram.coincidence.bin"
#define UTF8TEMP_FILE    "morogram.utf8"

long read_offset(FILE *fp, long pointer, int offset){
	long dummy = 0;
	int i;
	fseek(fp, pointer * offset, 0);
	for (i = 0; i < offset; i++){
		dummy <<= 8;
		dummy += fgetc(fp);
	}
	return dummy;
}

int write_offset(FILE *fp, long pointer, long item, int offset){
	int i, j = 0;
	for (i = 0; i < offset; i++){
		fseek(fp, pointer * offset + offset - i - 1, 0);
		fputc(item & 0xff, fp);
		item >>= 8;
		j++;
	}
	return j;
}

void print_ucs4(FILE *fp, long pointer, long gram){
	int i;
	long dummy;
	for (i = 0; i < gram; i++){
		dummy = read_offset(fp, pointer + i, 4);

		if(dummy <= 0x7F){
			putc((int) dummy,	stdout);
		} else if(dummy <= 0x7FF){
			putc((int) (192 |  dummy >> 6),		stdout);
			putc((int) (128 | (dummy & 63)),	stdout);
		} else if(dummy <= 0xFFFF){
			putc((int) (224 |  dummy >> 12),		stdout);
			putc((int) (128 | (dummy >> 6) & 63),	stdout);
			putc((int) (128 | (dummy       & 63)),	stdout);
		} else if(dummy <= 0xEFFFF){
			putc((int) (240 |  dummy >> 18),		stdout);
			putc((int) (128 | (dummy >> 12) & 63),	stdout);
			putc((int) (128 | (dummy >>  6) & 63),	stdout);
			putc((int) (128 | (dummy        & 63)),	stdout);
		} else if(dummy <= 0x10FFFF){
			dummy -= 0xEFFFF;
			fprintf(stdout,"&M%06d;", dummy);
		}
	}
	return;
}

void print_ucs4raw(FILE *fp, long pointer, long gram){
	int i;
	long dummy;
	for (i = 0; i < gram; i++){
		dummy = read_offset(fp, pointer + i, 4);
		fprintf(stdout, "%c%c%c%c",
				(int) (dummy >> 24) & 0xff,
				(int) (dummy >> 16) & 0xff,
				(int) (dummy >>  8) & 0xff,
				(int) (dummy >>  0) & 0xff);
	}
	return;
}

int noise(char *str){	return fprintf(stderr,str);	}

void error(char *str){
	fprintf(stderr,"\n\t*** %s ***\n",str);
	exit(255);
}



int main(int argc, char *argv[]){
	int offset = 4, sort_sw;
	long length_input_file, gram_min, gram_max, frequency_min, frequency_max;
	long i, j, k, pi, pj, max, sort_gap, data1, data2;
	long coincidence_num, max_coincidence_num = 0;
	long gram, frequency = 0;
	char str[256];
	void *func;
	FILE *fpo, *fpp, *fpc;
	/* FILE *err; */

	if(argc != 5 && argc != 6){
		fprintf(stderr,"%s\n","This program is internally used by morogram.");
		exit(8);
	}

	/* e|t@CpOFFSETݒ */
	length_input_file = atol(argv[1]);

	if(length_input_file < 0){
		func = (void*)&print_ucs4raw;
		length_input_file = -length_input_file;

		if(freopen(UTF8TEMP_FILE,"wb",stdout) == 0){
			error("can't open temporary output file");
		}
	}else{
		func = (void*)&print_ucs4;
	}

	if(length_input_file <= 256){
		offset = 1;
	} else if(length_input_file <= 65536){
		offset = 2;
	}

	sprintf(str,"\toffset length is %d.\n", offset);
	noise(str);

	/* őOƍŏOƍŏpx */
	gram_min = atol(argv[2]);
	gram_max = atol(argv[3]);
	frequency_min = atol(argv[4]);
	frequency_max = argv[5] == NULL ? length_input_file : atol(argv[5]);
/*
	fprintf(stderr, "gram_min = %10d\n", gram_min);
	fprintf(stderr, "gram_max = %10d\n", gram_max);
	fprintf(stderr, "freq_min = %10d\n", frequency_min);
	fprintf(stderr, "freq_max = %10d\n", frequency_max);
*/
	/* pointer table̍쐬 */
	noise("\tcreating pointer table...");
	if((fpp = fopen(POINTER_FILE, "wb")) == 0){
		error("can't create pointer table");
	}
	for (i = 0; i < length_input_file; i++){
		write_offset(fpp, i, i, offset);
	}
	fclose(fpp);
	noise("done.\n");



	/* First Stage (1) */

	if((fpo = fopen(OFFSET_FILE, "rb")) == 0){
		error("can't open offset table");
	}
	if((fpp = fopen(POINTER_FILE, "r+b")) == 0){
		error("can't open pointer table");
	}

	/* Comb Sort (Ql: http://www.ffortune.net/comp/slib/sort/combsort.htm) */
	noise("\tsorting pointer table...");
	if(length_input_file < 1) goto loop_e;
	sort_gap = length_input_file - 1;
	loop_1:
		sort_gap = sort_gap * 10 / 13;
		if(sort_gap == 0){
			sort_gap = 1;
		} else if(sort_gap == 9 || sort_gap == 10){
			sort_gap = 11;
		}
		sort_sw = 0;
   		i = 0;
	loop_2:
		j = i + sort_gap;
		pi = read_offset(fpp, i, offset);
		pj = read_offset(fpp, j, offset);

		if(pi < pj){
			max = length_input_file - pj;
		} else {
			max = length_input_file - pi;
		}
		for (k = 0; k < max; k++){
			data1 = read_offset(fpo, pi + k, 4);
			data2 = read_offset(fpo, pj + k, 4);
			if(data1 > data2){
				write_offset(fpp, i, pj, offset);
				write_offset(fpp, j, pi, offset);
				sort_sw = 1;
				goto loop_3;
			} else if(data1 < data2){
				goto loop_3;
			}
		}
	loop_3:
		if((j + 1) < length_input_file){
			i++;
			goto loop_2;
		}
		if((sort_sw == 1) || (sort_gap > 1))
			goto loop_1;
	loop_e:
	fclose(fpp);
	noise("done.\n");



	/* First Stage (2) */

	if((fpp = fopen(POINTER_FILE, "rb")) == 0){
		error("can't open pointer table");
	}
	if((fpc = fopen(COINCIDENCE_FILE, "wb")) == 0){
		error("can't create coincidence table");
	}

	/* err = fopen("err.log", "w"); */
	noise("\tcounting coincidence number of characters...");
	for (i = 0; i < length_input_file - 1; i++){
		coincidence_num = 0;
		pi = read_offset(fpp, i    , offset);
		pj = read_offset(fpp, i + 1, offset);
		/* fprintf(err, "%04x\t%04x\t", pi, pj); */
		if(pi < pj)
			max = length_input_file - pj;
		else
			max = length_input_file - pi;
		for (k = 0; k < max; k++){
			data1 = read_offset(fpo, pi + k, 4);
			data2 = read_offset(fpo, pj + k, 4);
			if(data1 != data2) break;
			coincidence_num++;
		}
		write_offset(fpc, i, coincidence_num, offset);
		/* fprintf(err, "%d\t%d\n", max, coincidence_num); */
		if(coincidence_num > max_coincidence_num)
			max_coincidence_num = coincidence_num;
	}
	/* write_offset(fpc, i + 1, 0, offset); */

	fclose(fpc);
	sprintf(str, "done.\n\tlargest coincidence number is %d.\n", max_coincidence_num);
	noise(str);

	if((frequency_min > 1) && (gram_max > max_coincidence_num))
		gram_max = max_coincidence_num;

	/* Second Stage */

	noise("\tcalculating ");
	if((fpc = fopen(COINCIDENCE_FILE, "rb")) == 0){
		error("can't open coincidence table");
	}
	/* fprintf(stdout, "\xEF\xBB\xBF"); */

	for (gram = gram_max; gram >= gram_min; gram--){
		sprintf(str, "% 10d-gram frequency.", gram);
		noise(str);
		i = 0;
		j = 0;
		loop: {
			do {
				coincidence_num = read_offset(fpc, j, offset);
				frequency++;
				j++;
			} while (coincidence_num >= gram);

			if((frequency_min <= frequency) && (frequency <= frequency_max)){
				pi = read_offset(fpp, i, offset);
				if((length_input_file - pi) >= gram){
					fprintf(stdout, "%d\t", frequency);
					(*( (void(*)(FILE*,long,long))func ))(fpo, pi, gram);
					fprintf(stdout, "\t%d\n", gram);
				}
			}
			i += frequency;
			frequency = 0;
			if(i < length_input_file) goto loop;
		}
		noise("\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b");
	}
	noise("\n");
	fclose(fpo);
	fclose(fpp);
	fclose(fpc);
	fclose(stdout);

	return 0;
}
