#:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
# This file is part of G-language Genome Analysis Environment package
#
#     Copyright (C) 2001-2009 Keio University
#:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
# 
#   $Id: Pathway.pm,v 1.1.1.1 2002/04/02 20:25:38 gaou Exp $
#
# G-language GAE is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public
# License as published by the Free Software Foundation; either
# version 2 of the License, or (at your option) any later version.
# 
# G-language GAE is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
# See the GNU General Public License for more details.
# 
# You should have received a copy of the GNU General Public
# License along with G-language GAE -- see the file COPYING.
# If not, write to the Free Software Foundation, Inc.,
# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
# 
#END_HEADER
#

package G::Seq::PathwayAlignment;

use SubOpt;
use G::Messenger;
use G::DB::SDB;

use strict;
use base qw(Exporter);
use SelfLoader;

our @EXPORT = qw(
		 align_pathway
);


my $gene2ec;
my $path2ec;
my $org2ec;
my $orgpath2ec;


=head1 NAME

    G::Seq::PathwayAlignmnt - Methods for aligning metabolic pathway

=head1 DESCRIPTION

    This class is a part of G-language Genome Analysis Environment, 
    collecting sequence analysis methods related to Pathway Alignment.

=cut


#::::::::::::::::::::::::::::::
#        Methods Start
#::::::::::::::::::::::::::::::


sub get_ecvector{
    my @argv = opt_get(@_);
    my $gb = opt_as_gb(shift @argv);

    load_gene2ec();

    my $flag = 0;
    foreach my $cds ($gb->cds()){
	if(length $gb->{$cds}->{sp_xref}){
	    $flag = 1;
	    last;
	}
    }

    annotate_with_swissprot($gb) unless($flag);

    my $result = {};
    foreach my $cds ($gb->cds()){
	if(length $gb->{$cds}->{sp_xref}){
	    foreach my $ec (keys %{$gene2ec->{$gb->{$cds}->{sp_xref}}}){
		$result->{$ec} ++;
	    }
	}

	if(length $gb->{$cds}->{EC_number}){
	    foreach my $ec (split(/\s+/, $gb->{$cds}->{EC_number})){
		$result->{$ec} ++;
	    }
	}
    }

    return $result;
}


sub align_pathway{
    require List::Compare;

    opt_default(db=>"org", left_penalty=>0.2, right_penalty=>0.2, output=>"stdout", cutoff=>"10");
    my @argv = opt_get(@_);
    my $query = shift @argv;

    my $db = opt_val("db");
    my $dbref = $org2ec;

    load_enzyme2ec();

    if($db eq 'orgpath'){
	$dbref = $orgpath2ec;
    }elsif($db eq 'path'){
	$dbref = $path2ec;
    }

    my $lp = opt_val("left_penalty");
    my $rp = opt_val("right_penalty");
    my $output = opt_val("output");
    my $cutoff = opt_val("cutoff");

    if($output eq 'stdout'){
	msg_send "\n\nGPACT: G-language Pathway Alignment and Comparison Tool\n";
	msg_send "                   Copyright 2005 G-language Project\n\n";
	msg_send "Database: $db\n";
	msg_send "Left Penalty:  $lp\n";
	msg_send "Right Penalty: $rp\n";
	msg_send "Showing Top $cutoff\n\n";
	msg_send "Query List: ",scalar keys %{$query}, " enzymes\n\n";
	msg_send "Summary:\n-----------------------------------------------------------------\n\n";
    }

    my $result;
    foreach my $entry (keys %{$dbref}){
	my $lc = List::Compare->new([sort keys %{$query}],[sort keys %{$dbref->{$entry}}]);
	my @intersection = $lc->get_intersection();
	my @left = $lc->get_Lonly;
	my @right = $lc->get_Ronly;
	$result->{$entry}->{entry} = $entry;
	$result->{$entry}->{intersection} = scalar @intersection;
	$result->{$entry}->{intersection_a} = \@intersection;
	$result->{$entry}->{left} = scalar @left;
	$result->{$entry}->{left_a} = \@left;
	$result->{$entry}->{right} = scalar @right;
	$result->{$entry}->{right_a} = \@right;
	$result->{$entry}->{identity} = sprintf "%.2f", 
	    scalar(@intersection)/scalar(keys %{$query})*100;
	$result->{$entry}->{score} = 
	    scalar @intersection - (scalar(@left) * $lp + scalar(@right) * $rp); 
    }

    my @order = sort {$result->{$b}->{score} <=> $result->{$a}->{score}} keys %{$result};
    my $return;
    my $i;
    for($i = 0; $i < $cutoff; $i ++){
	$return->[$i] = $result->{$order[$i]};
	last if ($return->[$i]->{intersection} == 0);

	if($output eq 'stdout'){
	    msg_send(sprintf "Hit %3d: %s         score: %3s (%3.2f%s identity)  common: %3d   left: %3d   right:%3d\n",
		     $i + 1, $return->[$i]->{entry}, $return->[$i]->{score},
		     $return->[$i]->{identity}, '%', $return->[$i]->{intersection},
		     $return->[$i]->{left}, $return->[$i]->{right});
	}
    }

    msg_send "\n\nResults:\n-----------------------------------------------------------------\n\n";

    for($i = 0; $i < $cutoff; $i ++){
	$return->[$i] = $result->{$order[$i]};
	last if ($return->[$i]->{intersection} == 0);

	if($output eq 'stdout'){
	    msg_send(sprintf "Hit %3d: %s            score: %s (%.2f%s identity)  left: %d   right:%d\n\n",
		     $i + 1, $return->[$i]->{entry}, $return->[$i]->{score},
		     $return->[$i]->{identity}, '%', $return->[$i]->{left},
		     $return->[$i]->{right});
	    msg_send("    Intersection:\n");
	    my @tmp = @{$return->[$i]->{intersection_a}};
	    while(scalar @tmp){
		msg_send("        ", map {sprintf "%11s", $_} splice(@tmp, 0, 7), "\n");
	    }
	    msg_send("\n    Left only list:\n");
	    my @tmp = @{$return->[$i]->{left_a}};
	    while(scalar @tmp){
		msg_send("        ", map {sprintf "%11s", $_} splice(@tmp, 0, 7), "\n");
	    }
	    msg_send("\n    Right only list:\n");
	    my @tmp = @{$return->[$i]->{right_a}};
	    while(scalar @tmp){
		msg_send("        ", map {sprintf "%11s", $_} splice(@tmp, 0, 8), "\n");
	    }
	    msg_send("\n\n\n");
	}
    }

    if($output eq 'stdout'){
	msg_send("Total $i hits.\n\n");
    }

    return $return;
}


sub _gblaster{
    my $options = shift;

    my @blast = `blastall $options`;
    my @result;
    foreach my $tmp (@blast){
	chomp($tmp);
	my ($query, $subject, $percent, $length, undef, undef, $qstart, $qend, 
	    $sstart, $send, $eval, $score) = split(/\s+/, $tmp, 12);
	push(@result, [$query, $subject, $percent, $length, $qstart, $qend, 
		       $sstart, $send, $eval, $score]);
    }

    return @result;
}


sub sp_blast{
    my $seq = shift;

    unless(-e "/tmp/glang/swissprot" eq ''){
	my $cwd = getcwd();
	mkdir("/tmp/glang", 0777);
	chdir("/tmp/glang");
	system("wget ftp://ftp.ncbi.nih.gov/blast/db/FASTA/swissprot.gz -O swissprot.gz");
	system("gunzip swissprot.gz");
	system("formatdb -i swissprot -p T");
	chdir($cwd);
    }

    my $filename = to_fasta($seq, -filename=>"/tmp/seq.fasta");
    my @table = _gblaster("-p blastp -d /tmp/glang/swissprot -i $filename -m8");

    my $data = {};
    my $i = 0;

    foreach my $line (@table){
	my @gi = split(/\|/, $$line[1]);
	$data->{$i}->{identity} = $$line[2];
	$data->{$i}->{length} = $$line[3];
	$data->{$i}->{eval} = $$line[8];
	$data->{$i}->{score} = $$line[9];
	$data->{$i}->{gi} = $gi[1];
	$data->{$i}->{accession} = $gi[3];
	$data->{$i}->{entry} = $gi[4];
	$i ++;
    }

    return $data;
}


sub annotate_with_swissprot{
    opt_default(eval=>"10e-25");
    my @argv = opt_get(@_);
    my $gb = opt_as_gb(shift @argv);
    
    my $eval = opt_val("eval");

    foreach my $cds ($gb->cds()){
	my $seq = $gb->{$cds}->{translation};
	$seq = translate($gb->get_geneseq($cds)) unless(length $seq);
	
	my $result = sp_blast($seq);
	if($result->{0}->{eval} <= $eval){
	    $gb->{$cds}->{sp_blast} = 
		sprintf("%.2f pct identical to sw:%s \(%s\) gi:%s e-val: %s score: %s", 
			$result->{0}->{identity}, $result->{0}->{accession},
			$result->{0}->{entry}, $result->{0}->{gi}, 
			$result->{0}->{eval}, $result->{0}->{score}
			);
	    $gb->{$cds}->{sp_xref} = $result->{0}->{entry};
	    $gb->{$cds}->{sp_xref_ac} = $result->{0}->{accession};
	}
    }

    return 1;
}



sub get_enzyme_list{
    my $name = shift;

    load_enzyme2ec();

    if($name =~ /:/){
	die("ERROR - get_enzyme_list(): organism/pathway combination \"$name\" not available.\n")
	    unless(defined %{$orgpath2ec->{$name}});
	return $orgpath2ec->{$name};
    }elsif($name =~ /^\d+$/){
	die("ERROR - get_enzyme_list(): pathway \"$name\" not available.\n")
	    unless(defined %{$path2ec->{$name}});
	return $path2ec->{$name};
    }else{
	die("ERROR - get_enzyme_list(): organism \"$name\" not available.\n")
	    unless(defined %{$org2ec->{$name}});
	return $org2ec->{$name};
    }
}


sub load_gene2ec{
    my $file = shift;

    if(sdb_exists("GLANG_GENE2EC")){
	$gene2ec = sdb_load("GLANG_GENE2EC");
	return $gene2ec;
    }

    unless(-e $file){
	system("wget http://www.g-language.org/data/gem/gene2ec.txt -O /tmp/gene2ec.txt -q")
	    unless(-e "/tmp/gene2ec.txt");
	$file = "/tmp/gene2ec.txt";
    }

    open(FILE, $file) || die($!);
    while(<FILE>){
        chomp;
        my ($sp, $ec) = split(/\t/, $_, 2);

	foreach my $splitec (split(/\s+/, $ec)){
	    $gene2ec->{$sp}->{$splitec} ++;
	}
    }
    close(FILE);
    
    sdb_save($gene2ec, "GLANG_GENE2EC");
       
    return $gene2ec;
}



sub load_enzyme2ec{
    require List::Compare;
    my $file = shift;

    if(sdb_exists("GLANG_ORGPATH2EC")){
	$orgpath2ec = sdb_load("GLANG_ORGPATH2EC");
	$org2ec = sdb_load("GLANG_ORG2EC");
	$path2ec = sdb_load("GLANG_PATH2EC");
	return 1;
    }

    unless(-e $file){
	system("wget ftp://ftp.genome.ad.jp/pub/kegg/ligand/enzyme -O /tmp/enzyme -q")
	    unless(-e "/tmp/enzyme");
	$file = "/tmp/enzyme";
    }

    my $tmp = 1;
    my $key;
    my $entry;
    open(FILE, $file) || die($!);
    while(<FILE>){
        chomp;

	my $line = '';
	if(/^(\S+?)\s+(.*)/){
	    $key = $1;
	    $line = $2;
	    $entry = $1 if($key eq 'ENTRY' && $line =~ /EC\s+(.*\d)/);
	}elsif(/^\s+(.*)/){
	    $line = $1;
	}

	if($key eq 'PATHWAY' && $line =~ /PATH: .+?(\d+)\s/){
	    $path2ec->{$1}->{$entry} ++;
	}elsif($key eq 'GENES' && $line =~ /(\S+)\:/){
	    $org2ec->{lc($1)}->{$entry} ++;
	}
    }
    close(FILE);

    foreach my $org (keys %{$org2ec}){
	foreach my $path (keys %{$path2ec}){
	    my $lc = List::Compare->new([sort keys %{$org2ec->{$org}}],[sort keys %{$path2ec->{$path}}]);
	    foreach my $enz ($lc->get_intersection()){
		$orgpath2ec->{"$org:$path"}->{$enz} ++;
	    }
	}
    }

    sdb_save($orgpath2ec, "GLANG_ORGPATH2EC");
    sdb_save($org2ec, "GLANG_ORG2EC");
    sdb_save($path2ec, "GLANG_PATH2EC");

    return 1;
}



1;
