#!/usr/bin/env perl

#:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
# This file is part of G-language Genome Analysis Environment package
#
#     Copyright (C) 2001-2007 Keio University
#:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
# 
#   $Id: Annotation.pm,v 1.1 2002/07/30 17:44:27 gaou Exp $
#
# G-language GAE is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public
# License as published by the Free Software Foundation; either
# version 2 of the License, or (at your option) any later version.
# 
# G-language GAE is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
# See the GNU General Public License for more details.
# 
# You should have received a copy of the GNU General Public
# License along with G-language GAE -- see the file COPYING.
# If not, write to the Free Software Foundation, Inc.,
# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
# 
#END_HEADER
#
# written by Kazuharu Arakawa <gaou@sfc.keio.ac.jp> at
# G-language Project, Institute for Advanced Biosciences, Keio University.
#


package G::Tools::GlimmerM;

use strict;
use G::Messenger;
use G::IO;

use vars qw($VERSION @ISA @EXPORT @EXPORT_OK @INC $AUTOLOAD);

use SelfLoader;

require Exporter;

@ISA = qw(Exporter AutoLoader);
@EXPORT = qw(
	     run_glimmerM
	     annotate_with_glimmerM
	     );

__DATA__

#::::::::::::::::::::::::::::::
#   Let the code begin...
#::::::::::::::::::::::::::::::

sub run_glimmerM {
    my $this = shift;
    my $file = shift;
    my $out = shift;
    my $program = shift || 'bin/glimmer_linux';
    my $train = shift || 'train/';

    system("./$program $file orfs -d $train > $out");
}

sub annotate_with_glimmerM {
    my $file = shift;
    my $out = shift;

    my $this = new G::IO("blessed");    
    open (FASTA, $file);
    while(<FASTA>){
	if (/^\>/){
	    s/\>//;
	    my @hoge = split;
	    $this->{LOCUS}->{id} = $hoge[0];
	    next;
	}else{
	    s/[^a-zA-Z]//g;
	    $this->{SEQ} .= lc($_);
	}
    }
    close(FASTA);

    $this->{"CDS0"}->{dummy} = 1;
    $this->{"FEATURE0"}->{dummy} = 1;

    my $count = 0;
    my $first = 0;
    open (GLIMMER, $out) || die();
    while(<GLIMMER>){last if (/\#/);}
    while(<GLIMMER>){
	chomp;
	s/^\s+//g;
	if ($_ eq ''){
	    $count ++;
	    $first = 1;
	}else{
	    my @line = split;
	    if ($first){
		$first = 0;
		$this->{"CDS$count"}->{feature} = $count;
		$this->{"FEATURE$count"}->{cds} = $count;
		$this->{"FEATURE$count"}->{type} = 'CDS';
		$this->{"CDS$count"}->{on} = 1;
		$this->{"FEATURE$count"}->{on} = 1;
		$this->{"FEATURE$count"}->{direction} = 'direct';
		$this->{"CDS$count"}->{direction} = 'direct';
		$this->{"FEATURE$count"}->{gene} = sprintf "gene%05d", $count;
		$this->{"CDS$count"}->{gene} = sprintf "gene%05d", $count;

		if($line[2] eq '-'){
		    $this->{"FEATURE$count"}->{direction} = 'complement';
		    $this->{"CDS$count"}->{direction} = 'complement';
		}
	    }

	    if ($line[3] eq 'Initial' || $line[3] eq 'Single'){
		$this->{"CDS$count"}->{start} = $line[4];
		$this->{"FEATURE$count"}->{start} = $line[4];
	    }
	    if ($line[3] eq 'Terminal' || $line[3] eq 'Single'){
		$this->{"CDS$count"}->{end} = $line[5];
		$this->{"FEATURE$count"}->{end} = $line[5];
	    }
	    $this->{"CDS$count"}->{join} .= sprintf "%d..%d,", $line[4], $line[5]
		unless($line[3] eq 'Single');
	    $this->{"FEATURE$count"}->{join} .= sprintf "%d..%d,", $line[4], $line[5]
		unless($line[3] eq 'Single');
	}
    }
    close(GLIMMER);

    return $this;
}



sub DESTROY {
    my $self = shift;

    undef %{$self};
}



#::::::::::::::::::::::::::::::
#          Perldoc
#::::::::::::::::::::::::::::::

1;

__END__

=head1 NAME

G::IO::Annotation

=head1 SYNOPSIS

    use G::Tools::GlimmerM;

   
=head1 DESCRIPTION

 Annotates the genome sequence. 

=back

=head1 AUTHOR

Kazuharu Arakawa, gaou@sfc.keio.ac.jp

=head1 SEE ALSO

perl(1).

=cut



