#:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
# This file is part of G-language Genome Analysis Environment package
#
#     Copyright (C) 2001-2009 Keio University
#:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
# 
#   $Id: Mapping.pm,v 1.1.1.1 2002/04/02 20:25:45 gaou Exp $
#
# G-language GAE is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public
# License as published by the Free Software Foundation; either
# version 2 of the License, or (at your option) any later version.
# 
# G-language GAE is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
# See the GNU General Public License for more details.
# 
# You should have received a copy of the GNU General Public
# License along with G-language GAE -- see the file COPYING.
# If not, write to the Free Software Foundation, Inc.,
# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
# 
#END_HEADER
#

package G::Tools::Mapping;

use SubOpt;
use G::Tools::Blast;
use G::Tools::Repeat;
use G::Messenger;

use strict;
use base qw(Exporter);
use SelfLoader;

our @EXPORT = qw(
		 _mask_repeat_for_mapping
		 _foreach_mask_repeat_for_mapping
		 _cutquery_for_mapping
		 _blast_db_for_mapping
		 _formatdb_for_mapping
		 _blast_for_mapping
		 _jstat_for_mapping
		 _blastpointer_for_mapping
		 _foreach_blastpointer_for_mapping
		 _file_list_for_mapping
);

__DATA__

#::::::::::::::::::::::::::::::
#        Methods Start
#::::::::::::::::::::::::::::::


###################################################
#  mapping tools                                  #
###################################################
#mask_repeat
#cdna=$ARGV[2]
sub _mask_repeat_for_mapping{     
    my $cdna=shift;
    my $work=shift;

    chdir($work);
    _repeatmasker($cdna,@_);

    rename $cdna,$cdna.'.original';
    rename $cdna.'.masked',$cdna;
}


sub _foreach_mask_repeat_for_mapping{
    my $cdna_dir=shift;
    my $lib=shift;
    my @file;

    opendir(DIR,$cdna_dir);
    @file=readdir(DIR);
    
    foreach(@file){
	if(/\.fst/){
	    &_mask_repeat_for_mapping($_,$cdna_dir,$lib);
	}
    }
}


#cutquery
#$query_first=$ARGV[0]
sub _cutquery_for_mapping{
    my $cdna=shift;
    my $work=shift;
    my $filenumber=0;

    open(IN, $cdna);
    while(<IN>){
	
        if($_ =~ /^>/){
	    $filenumber++;
	    open(OUT, ">$work$filenumber.fst");
	    printf OUT "$_";
        }

        else{
	    printf OUT "$_";
        }
    }
    
    close(OUT);
    close(IN);
}


###################################################
## $database = $ARGV[0]  |  $query = $ARGV[1]
## usage:perl blast_db.pl seq-file where_to_cut
sub _blast_db_for_mapping{
    &opt_default(limit=>10000);
    my @args=opt_get(@_);

    my $data_file=shift @args;
    my $work_dir=shift @args;
    my $limit=opt_val('limit');
    my $len;
    my $i;
    my $extra;
    my @lab;
    my $lens;
    my @file;
    my $file;
    my $data;
    my @filename;
    my $k;
    my $w;
    
    if($data_file!~/\*/){
	push(@file,$data_file);
    }
    else{
	$file=qx!ls $data_file!;
	@file=split("\n",$file);
    }
    
    foreach $k (@file){
	
	open(INFILE, $k);
	$w=substr($k,rindex($k,'/'));
	open(OUTFILE,'>'."$work_dir"."$w".'.db');
	push(@filename,"$work_dir"."$w".'.db');
	$i=1;

	while(<INFILE>){
	    if(/\>/){
		@lab=split(/\s/,$_);
		print OUTFILE $lab[0],"_$i\n";
	    }
	    unless(/\>/){
		tr/\n//d;
		$len=$len+length($_);
		if($len>=$limit){
		    $i++;
		    $extra=$len-$limit;
		    print OUTFILE substr($_,0,length($_)-$extra);
		    print OUTFILE "\n",$lab[0],"_$i\n";
		    print OUTFILE substr($_,length($_)-$extra),"\n";
		    $len=$extra;
		}
		else{
		    print OUTFILE $_,"\n";
		}
	    }
	}
	close(OUTFILE);
	close(INFILE);
    }

    return \@filename; 
}


###################################################
## $database=$ARGV[0]
## usage:perl formatdb.pl databasedirectory 
sub _formatdb_for_mapping{
    my $filenames=shift;

    foreach(@{$filenames}){
	_formatdb($_,@_);
    }
}


##################################################
## $database = $ARGV[0] | $query = $ARGV[1]
## usage:perl blast.pl databasedirectory querylist

sub _blast_for_mapping{
    my $work_dir=shift;
    my $filenames=shift;    
    my @query;
    my $data;
        
    opendir(DIRD, $work_dir);
    @query=readdir(DIRD);
    closedir(DIRD);

    foreach(@{$filenames}){
	$data.=$_."\\ ";
    }
    $data='"'.$data.'"';
         
    chdir($work_dir);

    foreach(@query){
	if(/\.fst$/){
	    _blast($data,$_,-o=>"$_.blast",-v=>20,-b=>20,-qr=>'on');
	}
    }
}


sub _jstat_for_mapping{
    my $jstat;
    my $who=qx!whoami!;
    my $switch=1;
    my @line;

    $who=~tr/\n//d;
    while($switch==1){
	$switch=0;
	$jstat=qx!jstat!;
	@line=split(/\n/,$jstat);
	foreach(@line){
	    $switch=1 if(/^${who}.*\sblastall\s.*/);
	    $switch=1 if(/^${who}.*def_$who.*/);
	    $switch=1 if(/jobs in queue def_${who}, queue is active,/);
        }
	sleep 60;
    }
    
}
    

sub _blastpointer_for_mapping{
    my $filename=shift;
    my $limit=shift;
    my $switch;
    my $switch2;
    my $switch3;
    my $switch4;
    my @ID;
    my @aln;
    my @CHR;
    my @line;
    my $seq;
    my $len;
    my $hit;
    my @Evalue;
    my @sbjct_line;
    my @query_line;
    my %qpos;
    my %spos;
    my $q;
    my $tmp;
    my @tmp2;
    my $name;
    my $cond;
    my $start;
    my $stop;
    my %hash;
    my $ind;
    my $rind;
    my $Nnum;
    my $t;

    open(OUTFILE,'>>gene_list.txt');
    open(INFILE,$filename);
    while(<INFILE>){
	tr/\n//d;
	$cond=0;
	if($switch4==1){
	    $len=$_;
	    $len=~tr/\(\) letters\n//d;
	    $switch4=0;
	}
	if(/Query=/){
	    $name=$_;
	    $name=~s/Query= //;
	    $name=~tr/\n//d;
	    $switch4=1;
	}

	if(/Sequences producing significant alignments\:/){
	    $switch=1;
	}
	elsif($switch==1 && $_ ne "" && $_ !~ /\>/){
	    if(/(\S+)\s+\d+\s(.+)/){
		$line[0]=$1;
		$line[2]=$2;
	    }
	    if($line[2]=~m/e-/){
		@Evalue=split(/-/,$line[2]);
		if($Evalue[1]>100){
		    push(@ID,$line[0]);
		}
	    }
	    elsif($line[2]=="0.0"){
		push(@ID,$line[0]);
	    }    
	}
	if(/^\>/){
	    if(%qpos){
		@tmp2=sort{$a <=> $b}keys(%qpos);
		$tmp=substr($seq,$tmp2[0]-1,abs($tmp2[-1]-$tmp2[0])+1);
		$Nnum=$tmp=~tr/N/N/;
		if($Nnum<50){
		    @tmp2=sort{$a <=> $b}keys(%qpos);
		    for(my $i=$tmp2[0]-1;$i<=$tmp2[-1]-1;$i++){
			substr($seq,$i,1)="N";
		    }
		    foreach(keys(%spos)){
			$hash{$q}{pos}{$_}=1;
		    }
		}
	    }
	    %spos=();
	    %qpos=();
	    $hit=$seq=~tr/N/N/;
	    foreach $tmp (sort{$a <=> $b} keys(%hash)){
		@tmp2=sort{$a <=> $b} keys(%{$hash{$tmp}{pos}});
		$ind=index($seq,"N")+1;
		$rind=rindex($seq,"N")+1;
		print OUTFILE $name,"\t",$hash{$tmp}{chromosome},"\t",$tmp2[0],"\t",$tmp2[-1],"\t",$tmp2[-1]-$tmp2[0]+1,"\t",$hash{$tmp}{strand},"\t",$hash{$tmp}{evalue},"\t",$ind,"\t",$rind,"\t",sprintf("%.2f",$hit/$len),"\($hit/$len\)","\t",sprintf("%.2f",$hit/($tmp2[-1]-$tmp2[0]+1)),"\($hit/",$tmp2[-1]-$tmp2[0]+1,"\)","\t",((split(/\//,$filename))[-1]),"\n" if($hash{$tmp}{evalue});
	    }
	    %hash=();
	    $seq="";
	    for(my $i=0;$i<$len;$i++){
		$seq.="Y";
	    }
	    $switch=0;
	        
	    $q= $_;
	    $q=~ tr/\>\n //d;
	        
	    @CHR=split(/_/,$_);     
	    $CHR[1]=~tr/\n //d;
	        
	    foreach $t (@ID){
		if($q eq $t){
		    $switch=2;
		           
		    $hash{$q}{chromosome}=$_;
		    $hash{$q}{chromosome}=~s/\>//;
		    $hash{$q}{chromosome}=~s/\_\w+//;
		    $hash{$q}{chromosome}=~tr/\n//d;
		}
	    }
	}
	elsif($switch==2){
	    if(/Expect \= /){
		unless($hash{$q}{evalue}){
		    $hash{$q}{evalue}=(split(/ \= /,$_))[2];
		}
		if(%qpos){
		    @tmp2=sort{$a <=> $b}keys(%qpos);
		    $tmp=substr($seq,$tmp2[0]-1,abs($tmp2[-1]-$tmp2[0])+1);
		    $Nnum=$tmp=~tr/N/N/;
		    if($Nnum<50){
			@tmp2=sort{$a <=> $b}keys(%qpos);
			for(my $i=$tmp2[0]-1;$i<=$tmp2[-1]-1;$i++){
			    substr($seq,$i,1)="N";
			}
			foreach(keys(%spos)){
			    $hash{$q}{pos}{$_}=1;
			}
		    }
		}
		%spos=();
		%qpos=();
	    }
	    if(/Strand \= /){
		tr/\n//d;
		unless($hash{$q}{strand}){
		    $hash{$q}{strand}=(split(/ \/ /,$_))[1];
		}
		if($hash{$q}{strand} eq (split(/ \/ /,$_))[1]){
		    $switch3=0;
		}
		else{
		    $switch3=1;
		}
	    }
	    if($switch3==0){
		if(/Query:/ || $switch2==1){
		    $switch2=1;
		        
		    push(@aln,$_);
		     
		    if(/Sbjct:/){
			@query_line=split(/\s+/,$aln[0]);
			$qpos{$query_line[1]}=1;
			$qpos{$query_line[3]}=1;
			@sbjct_line=split(/\s+/,$aln[2]);
			$spos{$sbjct_line[1]+$limit*($CHR[1]-1)}=1;
			$spos{$sbjct_line[3]+$limit*($CHR[1]-1)}=1;
			@aln=();
			$switch2=0;
		    }  
		}
	    }
	}
    }    
    close(INFILE);
    close(OUTFILE);    
}


sub _foreach_blastpointer_for_mapping{
    &opt_default(limit=>10000);
    my @args=opt_get(@_);
    
    my $dir=shift @args;
    my $limit=opt_val("limit");
    my @filedata;
    my %hash;
    
    opendir(DIRD, $dir);
    
    @filedata=readdir(DIRD);
    closedir(DIRD);

    foreach(sort{$a <=> $b}@filedata){
	if(/\.blast$/){
	    &_blastpointer_for_mapping($dir.$_,$limit);
	}
    }
}


sub _file_list_for_mapping{
    my $query=shift;
    my @file;
    my $line;

    opendir(DIR,$query);
    @file=readdir(DIR);
    
    open(OUT,'>ID_list.txt');
    chdir($query);
    foreach(@file){
	if(/\.fst$/){
	    open(FILE,$_);
	    $line=<FILE>;
	    $line=~tr/>\n//d;
	    close(FILE);
	    print OUT "$line\t$_\.rst\n";
	}
    }
    close(OUT);
}

1;
