#:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
# This file is part of G-language Genome Analysis Environment package
#
#     Copyright (C) 2001-2014 Keio University
#:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
# 
#   $Id: G.pm,v 1.4 2002/07/30 17:40:56 gaou Exp $
#
# G-language GAE is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public
# License as published by the Free Software Foundation; either
# version 2 of the License, or (at your option) any later version.
# 
# G-language GAE is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
# See the GNU General Public License for more details.
# 
# You should have received a copy of the GNU General Public
# License along with G-language GAE -- see the file COPYING.
# If not, write to the Free Software Foundation, Inc.,
# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
# 
#END_HEADER
#
# written by Kazuharu Arakawa <gaou@sfc.keio.ac.jp> at
# G-language Project, Institute for Advanced Biosciences, Keio University.
#

package G;
require 5.008;

use strict;
use base qw(G::IO Exporter);

# import core extensions
use SubOpt;
use Rcmd;

# import Rcmd classes
use Rcmd::Clustering;
use Rcmd::Normality;
use Rcmd::Multivariate;
use Rcmd::Summary;

# import messenger
use G::Messenger;

# import shell functions
use G::Shell::EUtils;
use G::Shell::Help;

# import core modules
use G::DynamicLoader;

# import db classes
use G::DB::SDB;
use G::DB::BDB;

# import "odyssey" classes
use G::Seq::AminoAcid;
use G::Seq::Codon;
use G::Seq::Consensus;
use G::Seq::GCskew;
use G::Seq::Operon;
use G::Seq::OverLapping;
use G::Seq::Primitive;
use G::Seq::PatSearch;
use G::Seq::Tandem;
use G::Seq::Util;
use G::Seq::Align;
use G::Seq::GenomeMap;
use G::Tools::Graph;
use G::Tools::COGs;
use G::Tools::GPAC;
use G::Tools::GOA;
use G::Tools::Statistics;
use G::Tools::GMap;
use G::Tools::WebServices;

our @EXPORT = qw(load opt_list method_list);

# export odyssay functions
push(@EXPORT, @G::Seq::AminoAcid::EXPORT);
push(@EXPORT, @G::Seq::Codon::EXPORT);
push(@EXPORT, @G::Seq::Consensus::EXPORT);
push(@EXPORT, @G::Seq::GCskew::EXPORT);
push(@EXPORT, @G::Seq::Operon::EXPORT);
push(@EXPORT, @G::Seq::OverLapping::EXPORT);
push(@EXPORT, @G::Seq::Primitive::EXPORT);
push(@EXPORT, @G::Seq::PatSearch::EXPORT);
push(@EXPORT, @G::Seq::Tandem::EXPORT);
push(@EXPORT, @G::Seq::Util::EXPORT);
push(@EXPORT, @G::Seq::Align::EXPORT);
push(@EXPORT, @G::Seq::GenomeMap::EXPORT);
push(@EXPORT, @G::Tools::Graph::EXPORT);
push(@EXPORT, @G::Tools::COGs::EXPORT);
push(@EXPORT, @G::Tools::GPAC::EXPORT);
push(@EXPORT, @G::Tools::GOA::EXPORT);
push(@EXPORT, @G::Tools::Statistics::EXPORT);
push(@EXPORT, @G::Tools::GMap::EXPORT);
push(@EXPORT, @G::Tools::WebServices::EXPORT);
push(@EXPORT, @G::Shell::EUtils::EXPORT);
push(@EXPORT, @G::Shell::Help::EXPORT);
push(@EXPORT, @G::DB::SDB::EXPORT);
push(@EXPORT, @G::DB::BDB::EXPORT);
push(@EXPORT, @G::Messenger::EXPORT);
push(@EXPORT, @SubOpt::EXPORT);
push(@EXPORT, @Rcmd::Clustering::EXPORT);
push(@EXPORT, @Rcmd::Multivariate::EXPORT);
push(@EXPORT, @Rcmd::Summary::EXPORT);
push(@EXPORT, @Rcmd::Normality::EXPORT);

# export plugin functions (overrides all)
push(@EXPORT, @G::DynamicLoader::EXPORT);

our $VERSION = $G::IO::VERSION;

mkdir($ENV{HOME} . '/.glang/data/');

sub load{
    return new G(@_);
}

sub opt_list{
    my $sub = shift;

    SubOpt::opt_default();
    SubOpt::set_opt_list(1);
    eval("&{$sub}");
    SubOpt::set_opt_list(0);

    return opt_val();
}

sub method_list{
    my $opt = shift;
    my %system;

    for my $name (qw/
		  p puts say readFile writeFile 
		  opt_as_gb opt_default opt_get opt_list opt_val
		  msg_ask_interface msg_error msg_send msg_gimv msg_interface msg_percent msg_progress msg_set_gimv msg_system_console msg_term_console
		  sdb_exists sdb_load sdb_save _sdb_path _set_sdb_path
		  db_dbi db_exists db_load db_path db_save db_set_path
		  pass_send pass_get
		  /){
	$system{$name} ++;
    }

    if($opt eq 'system'){
	return (sort keys %system);
    }elsif($opt eq 'gb'){
	return sort qw/output seq seq_info find getseq get_gbkseq get_cdsseq
	    get_geneseq feature cds tRNA rRNA intergenic gene disable_pseudogenes next_feature
	    next_cds previous_feature previous_cds startcodon stopcodon before_startcodon
	    after_startcodon before_stopcodon after_stopcodon around_startcodon around_stopcodon
	    get_exon get_exons get_intron pos2feature pos2gene gene2id next_locus clone del_key
	    reverse_strand relocate_origin disclose set_essentiality
	    /;
    }else{
	my @return;

	for my $name (@EXPORT){
	    push(@return, $name) unless($system{$name} || $name eq 'cum_gcskew' || $name eq '_UniMultiGrapher');
	}

	return sort @return;
    }
}

#::::::::::::::::::::::::::::::
#          Perldoc
#::::::::::::::::::::::::::::::

1;

__END__

=head1 NAME

G - G-language Genome Analysis Environment core interface module

=head1 SYNOPSIS

 use G;                          # Imports G-language GAE module 
   
 $gb = new G("ecoli.gbk");       # Creates G's instance as $gb 
 $gb =  load("ecoli.gbk");       # this line is same as the above.
                                 # At the same time, read in ecoli.gbk. 
                                 # Read the annotation and sequence 
                                 # information 
                                 # See DESCRIPTION for details
   
 $gb->seq_info();                # Prints the basic sequence information.

 find_ori_ter($gb);              # Give $gb as the first argument to 
                                 # most of the analysis functions


=head1 DESCRIPTION

 The G-language GAE fully supports most sequence databases.

 Stored annotation information:

 LOCUS  
         $gb->{LOCUS}->{id}              -accession number 
         $gb->{LOCUS}->{length}          -length of sequence  
         $gb->{LOCUS}->{nucleotide}      -type of sequence ex. DNA, RNA  
         $gb->{LOCUS}->{circular}        -1 when the genome is circular.
                                          otherwise 0
         $gb->{LOCUS}->{type}            -type of species ex. BCT, CON  
         $gb->{LOCUS}->{date}            -date of accession 

 HEADER  
         $gb->{HEADER}  
         $gb->{DEFINITION}
         $gb->{ACCESSION}
         $gb->{SOURCE}
         $gb->{ORGANISM}

         $gb->{TAXONOMY}->{all}          -same as $gb->{TAXONOMY}->{1}
         $gb->{TAXONOMY}->{domain}       -same as $gb->{TAXONOMY}->{2}
         $gb->{TAXONOMY}->{phylum}       -same as $gb->{TAXONOMY}->{3}
         $gb->{TAXONOMY}->{class}        -same as $gb->{TAXONOMY}->{4}
         $gb->{TAXONOMY}->{order}}       -same as $gb->{TAXONOMY}->{5}
         $gb->{TAXONOMY}->{family}       -same as $gb->{TAXONOMY}->{6}
         $gb->{TAXONOMY}->{genus}
         $gb->{TAXONOMY}->{species}

 COMMENT  
         $gb->{COMMENT}  

 FEATURE  
         Each FEATURE is numbered(FEATURE1 .. FEATURE1172), and is a 
         hash structure that contains all the keys of Genbank.   
         In other words,  in most cases, FEATURE$i's hash at least 
         contains informations listed below: 
         $gb->{FEATURE$i}->{start}  
         $gb->{FEATURE$i}->{end}  
         $gb->{FEATURE$i}->{direction}
         $gb->{FEATURE$i}->{join}
         $gb->{FEATURE$i}->{note}  
         $gb->{FEATURE$i}->{type}        -CDS,gene,RNA,etc.
         $gb->{FEATURE$i}->{feature}     -same as $i

         To analyze each FEATURE, write: 

         foreach my $feature ($gb->feature()){
               print $gb->{$feature}->{type}, "\n";
         }  

         In the same manner, to analyze all CDS, write:  
 
         foreach my $cds ($gb->cds()){
               print $gb->{$cds}->{gene}, "\n";
         }

         Feature or gene information can also be accessed with CDS numbers:
         $gb->{CDS$i}->{start}

         or with locus_tags or gene names (for CDS, tRNA, and rRNA)
         $gb->{thrL}->{start}
         $gb->{b0001}->{start}

 BASE COUNT  
         $gb->{BASE_COUNT}  

 SEQ  
         $gb->{SEQ}              -sequence data following "ORIGIN" 

         or
 
         $gb->seq()

=head1 Supported methods of G-language Genome Analysis Environment

=cut

=head2 $gb = new G("genome file")

     Name: $gb = new G("genome file")   -   create a G instance

     see "help load" for more information.

=cut

=head2 load

     Name: load   -   load genome databases

         This funciton is used to load genome databases into memory.
         First option is the filename of the database. Default format is
         the GenBank database. Database format is guessed from the extensions.
         (eg. .gbk => GenBank, .fasta => FASTA, .embl => EMBL)
         Most of the major sequence formats are supported, including
         Fasta, Fastq, GenBank, EMBL, Swiss-Prot, GCG, PIR, and so on.

         Flatfile can be gzipped. If the file extension ends with ".gz",
         load() can automatically handle it as compressed file.

         There are also several sample bacterial genomes included in the system.
         $eco   = load("ecoli");    # Escherichia coli K12 MG1655 - NC_000913
         $bsub  = load("bsub");     # Bacillus subtilis           - NC_000964
         $mgen  = load("mgen");     # Mycoplasma genitalium       - NC_000908
         $cyano = load("cyano");    # Synechococcus sp.           - NC_005070
         $pyro  = load("pyro");     # Pyrococcus furiosus         - NC_003413
         $bbur  = load("bbur");     # Borrelia burgdorferi B31    - NC_001318
         $plasF = load("plasmidf"); # Plasmid F                   - NC_002483

         Data can be automatically donwloaded from public databases using
         Uniform Sequence Address (USA) keys.
         http://emboss.sourceforge.net/docs/themes/UniformSequenceAddress.html
         Currently supported database keys are: 
            swiss, genbank, embl, refseq 
         eg. 
            $gb = load("embl:xlrhodop");
            $gb = load("genbank:AY063336")
            $gb = load("swiss:ROA1_HUMAN")

         For bacteria, name of the species, its abbreviations, and NCBI 
         taxonomy IDs can also be used.
         All of the following examples load E.coli K12 genome (NC_000913).
         eg. 
            $gb = load("Escherichia coli");
            $gb = load("e.coli k12");
            $gb = load("511145");

         Subsequent arguments work as options. Multiple options can be given 
         in any order.

           'no msg'                  suprresses all STDOUT messages printed 
                                     when loading a database, including the
                                     copyright info and sequence statistics.

           'no cache'                suppresses the use of database caching.
                                     By default, databases are cached for
                                     optimized performance. (since v.1.6.4)

           'force cache'             rebuilds database cache.

           'multiple locus'          this option merges multiple loci in the 
                                     database and load the information
                                     as G-language instance.
                                     
                                     By supplying a number with this option, 
                                     eg. 'multiple locus 50'
                                     merged sequence contains 50 'N's between
                                     the contigs. By default, no 'N' is inserted.

           'longest ORF annotation'  this option predicts genes with longest ORF
                                     algorithm (longest frame from start codon
                                     to stop codon, with more than 17 amino 
                                     acids) and annotates the sequence.

           'Fasta'              this option loads a Fasta format database.
           'Fastq'              this option loads a FastQ format database.
           'EMBL'               this option loads a EMBL  format database.

=cut


=head2 method_list

   Name: method_list   -   get the list of availabel G-language GAE functions

   Description:
         Returns an array of available method names. 
         When 1 is supplied as an argument, returns an array of API-related
         method names.

         eg. @methods = method_list();     # contains more than 100 analysis functions
             @APImethods = method_list(1); # contains around 50 API-related methods.

   REST: 
      http://rest.g-language.org/method_list 

=cut

=head2 $gb->next_locus()

   Name: $gb->next_locus()   -   read the next locus and update the G instance

   Description:
         Reads the next locus.
         the G instance is then updated. 
         Load G instance with "no cache" option to use this feature.

         eg. 
           do{
  
           }while($gb->next_locus());
           #  Enables multiple loci analysis.        

   REST: 
      http://rest.g-language.org/NC_000913/next_locus

=cut

=head1 SEE ALSO

G::IO::Handler

=head1 AUTHOR

Kazuharu Arakawa, gaou@sfc.keio.ac.jp

=cut



