#:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
# This file is part of G-language Genome Analysis Environment package
#
#     Copyright (C) 2001-2013 Keio University
#:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
# 
#   $Id: EPCR.pm,v 1.1.1.1 2002/04/02 20:25:44 gaou Exp $
#
# G-language GAE is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public
# License as published by the Free Software Foundation; either
# version 2 of the License, or (at your option) any later version.
# 
# G-language GAE is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
# See the GNU General Public License for more details.
# 
# You should have received a copy of the GNU General Public
# License along with G-language GAE -- see the file COPYING.
# If not, write to the Free Software Foundation, Inc.,
# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
# 
#END_HEADER
#

package G::Tools::COGs;

use SubOpt;
use G::Messenger;

use strict;
use base qw(Exporter);
use autouse 'HTTP::Request::Common'=>qw(POST);
use SelfLoader;

our @EXPORT = qw(
	     cognitor
	     );

my $cogpath = '/db/genesys/cogs/';


__DATA__

#::::::::::::::::::::::::::::::
#          Perldoc
#::::::::::::::::::::::::::::::

=head1 NAME

  G::Tools::COGs - Interface to NCBI COGs database

=head1 DESCRIPTION

  This class is a part of G-language Genome Analysis Environment, 
  providing functions using NCBI COGs database.

=cut

#::::::::::::::::::::::::::::::
#        Methods Start
#::::::::::::::::::::::::::::::

sub set_cogpath {
    $cogpath = shift;
    
    return $cogpath;
}

sub cognitor {
    require LWP::UserAgent;

    opt_default(hit=>3);
    my @args = opt_get(@_);
    my $gb = opt_as_gb(shift @args);
    my $hit = opt_val("hit");

    my $ua = LWP::UserAgent->new();
    my $req = POST 'http://www.ncbi.nlm.nih.gov/COG/old/xognitor.cgi',
    [seq=>$gb->{SEQ}, hit=>$hit];
    my $content = $ua->request($req)->as_string;

    if ($content =~ /NO related COG/){
	#return '';
    }elsif($content =~ /<table/){
	$content =~ s/\///g;
	my (undef, $line, $undef) = split(/table/, $content, 3);
	substr($line, 0, 1) = '<';
	$line =~ s/<.*?>/ /g;
	my (undef, @lines) = split(/\n/, $line);
	
	my $i = 0;
	foreach (@lines){
	    s/^\s*//g;
	    s/\s*$//g;
	    if (/(\d+)\s+proteins/){
		$lines[$i] = $1;
	    }elsif(/BeTs to\s+(\d+)\s+clades/){
		$lines[$i] = $1;
	    }elsif(/pet-score:\s+(\d+)/){
		$lines[$i] = $1;
	    }
	    
	    $i ++;
	}
	
	return ($lines[2], $lines[1], $lines[3], $lines[0], $lines[4], $lines[5]);
	#       COGID      FUNCTION   PRODUCT    HITS       BeTs       pet-score
    }
}

sub dignitor{
    my $id = time . '-' . int(rand() * 100000);

    open(DIG, '>/tmp/dignitor-' . $id . '.lst');
    my $gene = shift;
    my $translation = shift;

    open(OUT, '>/tmp/in-' . $id . '.seq');
    print OUT "\>$gene\n$translation\n";
    close(OUT);
    
    my @result = _gblaster('-p blastp -d ' . $cogpath . 'COG/COGall -i /tmp/in-' . $id . '.seq -m8 -a 2');
    
    foreach my $tmp (@result){
	my ($query, $subject, $percent, $length, $qstart, $qend, $sstart, $send, 
	    $eval, $score) = @{$tmp};
	
	printf DIG "%s - %s (%d %s)  %d\.\.%d  %d\.\.%d\n", $query, $subject, $score, $eval,
	$qstart, $qend, $sstart, $send;
    }
    close(DIG);

    my $command = $cogpath . 'zugnitor /tmp/dignitor-' . $id . '.lst ' . $cogpath . 'COG/COGs.txt 2>/dev/null';
    my @result = `$command`;

    unlink('/tmp/dignitor-' . $id . '.lst');
    unlink('/tmp/in-' . $id . '.seq');
    my @coglist = ();
    foreach my $line (@result){
	last unless($line =~ /^\s/);

	if ($line =~ /(COG\d+)/){
	    push(@coglist, $1);
	}
    }

    if (wantarray()){
	return @coglist;
    }else{
	return shift @coglist;
    }
	
}


1;
