#!/usr/bin/env perl

#:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
# This file is part of G-language Genome Analysis Environment package
#
#     Copyright (C) 2001-2007 Keio University
#:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
# 
#   $Id: Operon.pm,v 1.2 2002/07/30 17:40:56 gaou Exp $
#
# G-language GAE is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public
# License as published by the Free Software Foundation; either
# version 2 of the License, or (at your option) any later version.
# 
# G-language GAE is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
# See the GNU General Public License for more details.
# 
# You should have received a copy of the GNU General Public
# License along with G-language GAE -- see the file COPYING.
# If not, write to the Free Software Foundation, Inc.,
# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
# 
#END_HEADER
#


package G::Seq::Operon;

use SubOpt;
use G::Messenger;

use LWP::Simple;

use strict;
use vars qw($VERSION @ISA @EXPORT @EXPORT_OK);


use SelfLoader;

require Exporter;

@ISA = qw(Exporter);
@EXPORT = qw(
	     set_operon
);


__DATA__

=head1 NAME

G::Seq::Operon - Perl extension for blah blah blah

=head1 SYNOPSIS

  use G::Seq::Operon;
  blah blah blah

=head1 DESCRIPTION

Stub documentation for G::Seq::Operon was created by h2xs. It looks like the
author of the extension was negligent enough to leave the stub
unedited.

Blah blah blah.

=head1 AUTHOR

A. U. Thor, a.u.thor@a.galaxy.far.far.away

=head1 SEE ALSO

perl(1).

=cut


######################################
#         Main Methods               #
######################################


=head2 set_operon

  Name: set_operon   -   set operon information from RegulonDB

  Description:
    This program retrieves the operon information from RegulonDB, and adds this
    to the given genome data. !!!This method currently only works for E.coli!!!

    Two attributes are added to each CDS hash.
        $genome->{$cds}->{operon}
    contains the name of the operon to which the gene belongs, and 
        $genome->{$cds}->{operonN}
    contains the rank order of the gene within the operon.

  Usage:
    set_operon($gb);

 Options:
   None.

  References:
   1. Salgado H et al. (2006) "RegulonDB (version 5.0): Escherichia coli K-12 transcriptional
      regulatory network, operon organization, and growth conditions", Nucleic Acids Res. 
      1;34(Database issue):D394-7

  Author: 
    Kazuharu Arakawa (gaou@sfc.keio.ac.jp)

  History:
   20070829-01 patched to match latest version of RegulonDB formamt
               (patch by Hiroyuki Nakamura <t04632hn@sfc.keio.ac.jp>
   20061003-01 updated to use data from RegulonDB
   20020207-01 initial posting

=cut


sub set_operon {
    my @args = opt_get(@_);
    my $gb = shift @args;

    if ($gb->{LOCUS}->{id} eq 'U00096' || $gb->{LOCUS}->{id} eq 'NC_000913'){

	my $url = "http://regulondb.ccg.unam.mx:80/data/OperonSet.txt";
	my $dir = $ENV{HOME} . '/.glang/data/OperonSet.txt';
	mirror($url, $dir);
	die("setOperon: cannot retrieve data from RegulonDB.") unless(-e $dir);

	my $flag = 0;
	open(FILE, $dir) || die($!);
	while (<FILE>) {
	    chomp;

	    if (/^Columns\:/) {
	      $flag++;
	      next;
	    }
	    elsif(/^\t\(\d\)\s/) {
	      $flag++;
	      next;
	    }

	    if($flag == 5){

		my %geneOrder;

		my ($operon, $num, $direction, $genes) = split(/\t/, $_, 4);
		next unless($num >= 2);

		foreach my $genepair (split(/,/, $genes)){
		    my ($gene, $locustag) = split(/\|/, $genepair, 2);
		    my $cds = $gb->gene2id($locustag);

		    $cds = $gb->gene2id($gene) unless(length $cds);

		    if($cds){
			$gb->{$cds}->{operon} = $operon;
			$geneOrder{$cds} = $gb->{$cds}->{start};
		    }
		}

		my $i = 1;
		if($direction eq 'forward'){
		    foreach my $cds (sort {$geneOrder{$a} <=> $geneOrder{$b}} keys %geneOrder){
			$gb->{$cds}->{operonN} = $i;
			$i ++;
		    }
		}else{
		    foreach my $cds (sort {$geneOrder{$b} <=> $geneOrder{$a}} keys %geneOrder){
			$gb->{$cds}->{operonN} = $i;
			$i ++;
		    }
		}

	    }else{
		msg_error($_, "\n");
	    }
	}
	close(FILE);

	foreach my $cds ($gb->cds()){
	    $gb->{$cds}->{operonN} = 0 unless(length $gb->{$cds}->{operon});
	}

   }else{
       msg_error("No Operon data for this species.\n\n");
   }

    return 1;
}






1;

