/*
mpaligner is program to align string and string.
Copyright (C) 2010, 2011 Keigo Kubo

mpaligner is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
any later version.

mpaligner is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with mpaligner.  If not, see <http://www.gnu.org/licenses/>.

Imprement main function of mnimum pattern alignment algorithm
date:   2010/9/16
author: Keigo Kubo
belong: Nara Institute Sience and Technology (NAIST)
e-mail: keigo-k{@}is.naist.jp   << Please transform {@} into @
*/

#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include "mpAlign.h"

void usage(){
	fprintf(stderr,"usage:"
		"./mpaligner -i <string> [-o <string>] [-p <string>] [-ai <string>]\n"
		"            [-ao <string>] [-s] [-h <int>] [-dx] [-dy] [-rx <int>]\n"
		"            [-ry <int>] [-is <char>] [-os <char>] [-oj <char>]\n"
		"            [-dc <char>] [-t <int or float>] [-no_double_alignment]\n"
		"            [-f_nbest <int>][-n or -s_nbest <int>]\n");

	fprintf(stderr,"options:\n"
		"  -i <string>\n    input training file name.\n\n"
		"  -o <string>\n    output alignments file name. (default <input file name>.align)\n\n"
		"  -p <string>\n    previous knowledge file name. \n\n"
		"  -ai <string>\n    input align model file name.\n\n"
		"  -ao <string>\n    output align model file name. (default <input file name>.model)\n\n"
		"  -s\n    print score in output file. (default don't print)\n\n"
		"  -h <int>\n    hash size. (default 18000000)\n\n"
		"  -dx\n    allow to delete part string X. (default don't allow)\n\n"
		"  -dy\n    allow to delete part string Y. (default don't allow)\n\n"
		"  -dp <float>\n    penalty value of deletion. (default 0.75)\n\n"
		"  -rx <int>\n    restrict length of part string X.\n\n  (default 0: 0 is no restrict.)\n\n"
		"  -ry <int>\n    restrict length of part string Y.\n\n  (default 0: 0 is no restrict.)\n\n"
		"  -is <char>\n    separate char in input file. (default ' ')\n\n"
		"  -os <char>\n    separate char in output file. (default '|')\n\n"
		"  -oj <char>\n    join char in output file. This is multi-byte character code measures.(default ':')\n\n"
		"  -dc <char>\n    deletion char. (default '_')\n\n"
		"  -t <int or float>\n    Threshold for the end of the training.\n"
		"    if it's more than 1, it's the number of iteration of the training.\n"
		"    if it's less than 1, it's threshold that is in change values of a\n"
		"    parameter by training. if a total of change values of a parameter\n"
		"    is less than it, the training is end. (defalut 0.1)\n\n"
		"  -no_double_alignment\n    only first alignment.\n\n"
		"  -f_nbest <int>\n    n-best output of first alignment. (defalut 1)\n\n"
		"  -n or -s_nbest <int>\n    n-best output of double alignment. (defalut 1)\n\n");
}

int main(int argc, char **argv){
	int len=0;
	time_t time1, time2;
	time(&time1);

	TOTAL_INFO info ={NULL,     // input file
		NULL,     // output file
		NULL,     // previous knowledge file
		NULL,	    // input align model file
		NULL,     // output align model file	
		0,        // print Score
		100000000, // COP hash size
		0,	  // COP sqrt hash size because hash is 2 order hash.
		NULL,     // pair data
		0,        // allow to delete X 
		0,        // allow to delete Y
		0.75,      // penalty value of deletion.
		0,        // restrict length of part string X
		0,        // restrict length of part string Y
		0,        // max length of X
		0,        // max length of Y
		0,	  // maximum length
		' ',      // separate char of input file
		'|',      // separate char of output file
		':',      // join char of output file
		"_",      // deletion char
		0,        // total of update value by training
		0,        // total of change value by training
		0,		  // lowest value of para
		0.1,     // Threshold for the end of the training
		1,		  // double alignment
		1,		  // first n-best
		1};       // second n-best

	argv++;
	while(*argv!=NULL){
		if(**argv=='-'){
			(*argv)++;	  
			if(**argv=='i'){
				(*argv)++;
				if(**argv=='s'){
					if(*(++argv)!=NULL){
						info.input_sepchar=**argv;	
					}else{
						fprintf(stderr,"Don't set input separate char after -is option.\nPlease set input separate char after -is option.\n");
						usage();
						exit(EXIT_FAILURE);
					}
				}else if(*(++argv)!=NULL){
					info.input_file=*argv;    
				}else{
					fprintf(stderr,"Don't set input file after -i option.\nPlease set input file after -i option.\n");
					usage();
					exit(EXIT_FAILURE);
				}
			}else if(**argv=='o'){
				(*argv)++;
				if(**argv=='s'){
					if(*(++argv)!=NULL){
						info.output_sepchar=**argv;		
					}else{
						fprintf(stderr,"Don't set output separate char after -os option.\nPlease set output separate char after -os option.\n");
						usage();
						exit(EXIT_FAILURE);
					}
				}else if(**argv=='j'){
					if(*(++argv)!=NULL){
						info.output_joinchar=**argv;
					}else{
						fprintf(stderr,"Don't set output join char after -oj option.\nPlease set output join char after -oj option.\n");
						usage();
						exit(EXIT_FAILURE);
					}
				}else if(*(++argv)!=NULL){
					info.output_file=*argv;    
				}else{
					fprintf(stderr,"Don't set output file after -o option.\nPlease set output file after -o option.\n");
					usage();
					exit(EXIT_FAILURE);
				}
			}else if(**argv=='p'){
				if(*(++argv)!=NULL){
					info.previous_knowledge_file=*argv;    
				}else{
					fprintf(stderr,"Don't set previous knowledge file after -p option.\nPlease set previous knowledge file after -p option.\n");
					usage();
					exit(EXIT_FAILURE);
				}
			}else if(**argv=='a'){
				(*argv)++;
				if(**argv=='i'){
					if(*(++argv)!=NULL){
						info.input_align_file=*argv;
					}else{
						fprintf(stderr,"Don't set input align model file after -ai option.\nPlease set input align model file after -ai option.\n");
						usage();
						exit(EXIT_FAILURE);
					}
				}else if(**argv=='o'){
					if(*(++argv)!=NULL){
						info.output_align_file=*argv;
					}else{
						fprintf(stderr,"Don't set output align model file after -ao option.\nPlease set output align model file after -ao option.\n");
						usage();
						exit(EXIT_FAILURE);
					}
				}else{
					fprintf(stderr,"Error unknown option:%s\n", ((*argv)-2));
					usage();
					exit(EXIT_FAILURE);
				}
			}else if(**argv=='s'){
				(*argv)++;
				if(**argv=='\0'){
					info.printScore=1;
				}else{
					if(!strcmp(*argv,"_nbest")){
						if(*(++argv)!=NULL){
							info.second_n_best=atoi(*argv);
						}else{
							fprintf(stderr,"Don't set n of n-best of double alignment after -s_nbest option.\nPlease set n of n-best of double alignment with int after -s_nbest option.\n");
							usage();
							exit(EXIT_FAILURE);
						}
					}else{
						fprintf(stderr,"Error unknown option:%s\n", ((*argv)-2));
						usage();
						exit(EXIT_FAILURE);
					}
				}
			}else if(**argv=='h'){
				if(*(++argv)!=NULL){
					info.hash_size=atoi(*argv);
				}else{
					fprintf(stderr,"Don't set hash size after -h option.\nPlease set hash size with int after -h option.\n");
					usage();
					exit(EXIT_FAILURE);
				}
			}else if(**argv=='d'){
				(*argv)++;
				if(**argv=='x'){
					info.delX=1;		
				}else if(**argv=='y'){
					info.delY=1;		
				}else if(**argv=='p'){
					if(*(++argv)!=NULL){
						info.del_penalty=atof(*argv);
					}else{
						fprintf(stderr,"Don't set deletion penalty after -dp option.\nPlease set deletion penalty with float after -dp option.\n");
						usage();
						exit(EXIT_FAILURE);
					}
				}else if(**argv=='c'){
					if(*(++argv)!=NULL){
						info.del_char=*argv;
					}else{
						fprintf(stderr,"Don't set deletion char after -dc option.\nPlease set deletion char after -dc option.\n");
						usage();
						exit(EXIT_FAILURE);				
					}		    
				}else{
					fprintf(stderr,"Error unknown option:%s\n", ((*argv)-2));
					usage();
					exit(EXIT_FAILURE);
				}
			}else if(**argv=='r'){
				(*argv)++;
				if(**argv=='x'){
					if(*(++argv)!=NULL){
						info.restrictX=atoi(*argv);
					}	
				}else if(**argv=='y'){
					if(*(++argv)!=NULL){
						info.restrictY=atoi(*argv);
					}
				}else{
					fprintf(stderr,"Error unknown option:%s\n", ((*argv)-2));
					usage();
					exit(EXIT_FAILURE);
				}				
			}else if(**argv=='t'){
				if(*(++argv)!=NULL){
					info.threshold_eot=atof(*argv);
				}else{
					fprintf(stderr,"Don't set threshold for the end of the training after -t option.\nPlease set threshold for the end of the training after -t option.\n");
					usage();
					exit(EXIT_FAILURE);
				}				
			}else if(**argv=='n'){
				(*argv)++;
				if(**argv=='\0'){
					if(*(++argv)!=NULL){
						info.second_n_best=atoi(*argv);
					}else{
						fprintf(stderr,"Don't set n of n-best of double alignment after -n option.\nPlease set n of n-best of double alignment with int after -n option.\n");
						usage();
						exit(EXIT_FAILURE);
					}
				}else if(!strcmp(*argv,"o_double_alignment")){
					info.double_alignment=0;
				}else{
					fprintf(stderr,"Error unknown option:%s\n",(*argv)-2);
					usage();
					exit(EXIT_FAILURE);
				}    				
			}else if(!strcmp(*argv,"f_nbest")){
				if(*(++argv)!=NULL){
					info.first_n_best=atoi(*argv);
				}else{
					fprintf(stderr,"Don't set n of n-best of first alignment after -f_nbest option.\nPlease set n of n-best of first alignment with int after -f_nbest option.\n");
					usage();
					exit(EXIT_FAILURE);
				}	
			}else{
				fprintf(stderr,"Error unknown option:%s\n",*argv);
				usage();
				exit(EXIT_FAILURE);
			}    
		}else{
			fprintf(stderr,"Error unknown option:%s\n",*argv);
			usage();
			exit(EXIT_FAILURE);
		}
		argv++;
	}    

	info.sqrt_hash_size=(COP_HASH_VALUE) sqrt((double)info.hash_size);
	COP_init(info.sqrt_hash_size,info.sqrt_hash_size);
	if(info.input_file!=NULL && info.input_align_file==NULL){
		// Training with input file
		readInputFile(&info);

		if(info.previous_knowledge_file!=NULL){
			readPreviousKnowledge(&info);
		}

		training(&info);

		if(info.output_align_file!=NULL){
			writeAlignToFile(&info);
		}else{
			len=strlen(info.input_file);
			if((info.output_align_file=(char *)malloc(len+6))==NULL){
				fprintf(stderr,"Don't get memory in malloc.\nYou must need more memory.\n");
				exit(EXIT_FAILURE);
			}

			strcpy(info.output_align_file, info.input_file);
			strcat(info.output_align_file,".model");
			writeAlignToFile(&info);
			free(info.output_align_file);
		}

	}else if(info.input_file!=NULL && info.input_align_file!=NULL){
		readInputFile(&info);

		fprintf(stderr,"Read align model.\n");
		readAlignFromFile(&info);
		if(info.previous_knowledge_file!=NULL){
			readPreviousKnowledge(&info);
		}
	}else{
		fprintf(stderr,"Please set option for -i or -a.\n\n");
		usage();
		exit(EXIT_FAILURE);
	}

	// Align input file
	if(info.output_file!=NULL){
		requireAlignments(&info);
	}else{
		len=strlen(info.input_file);
		if((info.output_file=(char *)malloc(len+7))==NULL){
			fprintf(stderr,"Don't get memory in malloc.\nYou must need more memory.\n");
			exit(EXIT_FAILURE);
		}

		strcpy(info.output_file, info.input_file);
		strcat(info.output_file,".align");
		requireAlignments(&info);
		free(info.output_file);		
	}
	COP_destroy();
	time(&time2);
	fprintf(stderr,"finish: %f (sec)\n",difftime(time2,time1));
	return 1;
}

