#!/usr/bin/env perl

#:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
# This file is part of G-language Genome Analysis Environment package
#
#     Copyright (C) 2001-2007 Keio University
#:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
# 
#   $Id: Primitive.pm,v 1.1 2002/07/30 17:44:27 gaou Exp $
#
# G-language GAE is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public
# License as published by the Free Software Foundation; either
# version 2 of the License, or (at your option) any later version.
# 
# G-language GAE is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
# See the GNU General Public License for more details.
# 
# You should have received a copy of the GNU General Public
# License along with G-language GAE -- see the file COPYING.
# If not, write to the Free Software Foundation, Inc.,
# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
# 
#END_HEADER
#

package G::Seq::ImaGene;

use SubOpt;
use G::Messenger;
use G::Tools::Graph;
use Statistics::Descriptive;

use strict;

use SelfLoader;

require Exporter;

our @ISA = qw(Exporter);
our @EXPORT = qw(
	     ma_normalize
	     ma_filter
	     ma_rfilter
);

__DATA__


#:::::::::::::::::::::::::::::::::
#       Perldoc
#:::::::::::::::::::::::::::::::::


=head1 NAME

  G::Seq::ImaGene - Analysis methods related to ImaGene

=head1 DESCRIPTION

    This class is a part of G-language Genome Analysis Environment, 
    collecting sequence analysis methods related to ImaGene.

=cut


#::::::::::::::::::::::::::::::
#        Methods Start
#::::::::::::::::::::::::::::::




sub _read_master{
    my (%ma2ec, %ma2jw);
    open(FILE, '/home/gaou/mori_data_0805/glang/MasterREFE4K10NEW.txt') || die($!);
    while(<FILE>){
	chomp;
	my ($maid, $ecid, $jwid, undef) = split(/\t/, $_, 4);
	$ma2ec{$maid} = $ecid;
	$ma2jw{$maid} = $jwid;
    }
    close(FILE);

    return (\%ma2ec, \%ma2jw);
}


#my $filename = "EC030805pgiCy370_data.txt";
#my $file = ma_normalize($filename);
#ma_filter($file);
#ma_rfilter($file);

sub ma_normalize{
    my ($tmp1, $tmp2) = _read_master();
    my %ma2ec = %$tmp1;
    my %ma2jw = %$tmp2;

    my $name;
    my $flag = 0;
    my %data;

    my $filename = shift;

    open(FILE, $filename) || die($!);
    while(<FILE>){
	if(/^Control image file:\s+(.*?)\.tif/){
	    $name = $1;
	}elsif(/^Begin Extracted Data/){
	    $flag = 1;
	}elsif(/^\d/ && $flag){
	    my (undef, undef, undef, undef, $id, 
		undef, undef, undef, $cy3s, $cy3b, 
		undef, undef, undef, undef, undef, 
		undef, undef, undef, undef, undef, 
		$cy5s, $cy5b, undef
		) = split(/\s+/, $_, 24);
	    $data{$id} .= "$cy3s $cy3b $cy5s $cy5b:";
	}
    }
    close(FILE);
    
    
    my %id2norm;
    my %id2ri;
    my (@R, @I);
    my %multi;
    open(OUT, ">$name-norm.txt") || die($!);
    foreach my $id (sort keys %data){
	foreach my $line (split(/:/, $data{$id})){
	    next if ($ma2jw{$id} eq '');
	    if (length $line){
		my ($cy3s, $cy3b, $cy5s, $cy5b) = split(/\s/, $line, 4);
		my $logn = '-';
		my $cy3 = $cy3s-$cy3b;
		my $cy5 = $cy5s-$cy5b;
		
		unless(($cy3s-$cy3b)/($cy5s-$cy5b) <= 0){
		    $logn = log(($cy3s-$cy3b)/($cy5s-$cy5b))/log(2);
		    my $logm = log($cy3*$cy5)/log(10);

		    printf OUT "%s\t%s\t%s\t%.4f\t%.4f\t%.4f\t%.4f\t%f\t%f\n",
		    $id, $ma2ec{$id}, $ma2jw{$id},
		    $cy3s, $cy3b, $cy5s, $cy5b, 
		    $logn, $logm;
		    if ($logn > $id2norm{$id}){
			$id2norm{$id} = $logn; 
			$id2ri{$id} = log(($cy3s-$cy3b)*($cy5s-$cy5b))/log(10);
		    }
		    push(@R, log(($cy3s-$cy3b)*($cy5s-$cy5b))/log(10));
		    push(@I, $logn);
		    $multi{$id} .= "$logn ";
		    next;
		}
		
		printf OUT "%s\t%s\t%s\t%.4f\t%.4f\t%.4f\t%.4f\t-\t-\n",
		$id, $ma2ec{$id}, $ma2jw{$id},
		$cy3s, $cy3b, $cy5s, $cy5b;
		unless($id2norm{$id}){
		    $id2norm{$id} = -1024;
		    $id2ri{$id} = -1024;
		    $multi{$id} .= "-1024 ";
		}
	    }
	}
    }
    close(OUT);
    
    _UniMultiGrapher(\@R, \@I, -title=>"R-I plot", -x=>'log 10 (R*G)', -y=>'log 2 (R/G)', 
		     -filename=>"R-Iplot$name.png", -style=>"points");
    
    system("display graph/R-Iplot$name.png &");


    my (@ONE, @TWO);
    foreach my $id (sort keys %multi){
	next if ($multi{$id} =~ /-1024/);
	my ($one, $two, undef) = split(/ /, $multi{$id}, 3);
	push(@ONE, $one);
	push(@TWO, $two);
    }
    
    _UniMultiGrapher(\@ONE, \@TWO, -style=>"points");
    system("display graph/graph.png &");

    return "$name-norm.txt";
}
    

sub ma_filter{
    my ($tmp1, $tmp2) = _read_master();
    my %ma2ec = %$tmp1;
    my %ma2jw = %$tmp2;

    opt_default(xmax=>1024, xmin=>-1024, ymax=>1024, -ymin=>-1024);
    my @args = opt_get(@_);
    my $file = shift @args;
    my $xmax = opt_val("xmax");
    my $xmin = opt_val("xmin");
    my $ymax = opt_val("ymax");
    my $ymin = opt_val("ymin");

    my $newfile = $file;
    if ($newfile =~ /-norm/){
	$newfile =~ s/-norm/-filter/g;
    }else{
	$newfile .= '-filter';
    }

    open(FILE, $file) || die ($!);
    open(OUT, '>' . $newfile) || die ($!);

    my @result;
    while(<FILE>){
	my $line = $_;
	chomp;
	my ($maid, $ecid, $jwid, $cy3s, $cy3b, $cy5s, $cy5b, $logn, $logm) = 
	    split (/\t/, $_, 9);
	next if ($logn eq '-' || $logm eq '-');

	if($logn >= $ymin && $logn <= $ymax && $logm >= $xmin && $logn <= $xmax){
	    push(@result, "$maid $logn");
	    print OUT $line;
	}
    }
    close(FILE);
    close(OUT);

    return ($newfile, \@result);
}



sub ma_rfilter{
    my ($tmp1, $tmp2) = _read_master();
    my %ma2ec = %$tmp1;
    my %ma2jw = %$tmp2;

    opt_default(z=>1);
    my @args = opt_get(@_);
    my $file = shift @args;
    my $z = opt_val("z");

    my $newfile = $file;
    if ($newfile =~ /-norm/){
	$newfile =~ s/-norm/-rfilter/g;
    }elsif($newfile =~ /-filter/){
	$newfile =~ s/-filter/-rfilter/g;
    }else{
	$newfile .= '-rfilter';
    }

    my %ratio;
    my $prev = '';
    open(FILE, $file) || die ($!);
    open(OUT, '>' . $newfile) || die ($!);

    while(<FILE>){
	my $line = $_;
	chomp;
	my ($maid, $ecid, $jwid, $cy3s, $cy3b, $cy5s, $cy5b, $logn, $logm) = 
	    split (/\t/, $_, 9);
	next if ($logn eq '-' || $logm eq '-');

	my ($pmaid, $pecid, $pjwid, $pcy3s, $pcy3b, $pcy5s, $pcy5b, $plogn, $plogm) =
	    split (/\t/, $_, 9);
	
	if ($pmaid eq $maid){
	    $ratio{$pmaid} = $plogn / $logn;
	}else{
	    $prev = $line;
	}
    }
    seek(FILE, 0, 0);

    my $stat = Statistics::Descriptive::Full->new();
    $stat->add_data(values %ratio);
    my $mean = $stat->mean();
    my $sd = $stat->standard_deviation();

    $prev = '';

    while(<FILE>){
	my $line = $_;
	chomp;
	my ($maid, $ecid, $jwid, $cy3s, $cy3b, $cy5s, $cy5b, $logn, $logm) = 
	    split (/\t/, $_, 9);
	next if ($logn eq '-' || $logm eq '-');

	my ($pmaid, $pecid, $pjwid, $pcy3s, $pcy3b, $pcy5s, $pcy5b, $plogn, $plogm) =
	    split (/\t/, $_, 9);
	
	if ($pmaid eq $maid && ($ratio{$pmaid} - $mean) >= $z * $sd){
	    print OUT $line, $prev;
	}else{
	    $prev = $line;
	}
    }

    close(FILE);
    close(OUT);

    return $newfile;
}

1;
