#:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
# This file is part of G-language Genome Analysis Environment package
#
#     Copyright (C) 2001-2009 Keio University
#:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
# 
#   $Id: $
#
# G-language GAE is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public
# License as published by the Free Software Foundation; either
# version 2 of the License, or (at your option) any later version.
# 
# G-language GAE is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
# See the GNU General Public License for more details.
# 
# You should have received a copy of the GNU General Public
# License along with G-language GAE -- see the file COPYING.
# If not, write to the Free Software Foundation, Inc.,
# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
# 
#END_HEADER
#

package G::Tools::Literature;

use SubOpt;
use G::Messenger;

use strict;
use base qw(Exporter);
use SelfLoader;

our @EXPORT = qw(
	     PubMedSearch
	     KeySearch
);

__DATA__

#::::::::::::::::::::::::::::::
#        Methods Start
#::::::::::::::::::::::::::::::


sub PubMedSearch{
    require LWP::Simple;
    require LWP::UserAgent;
    require HTTP::Cookies;
    
    my $time=time;
    &opt_default(limit=>500, dir=>'PUBMED'.$time, key=>'');
    my @args=opt_get(@_);
    
    my $query=shift @args;
    my $limit=opt_val('limit');
    my $dir=opt_val('dir');
    my $key=opt_val('key');

    my $com;
    my @date;
    my $req;
    my $res;
    my $ua;
    my $i;
    my $frag;
    my $frag2;
    my @line;
    my @line2;
    my @line3;
    my $url;
    my $url2;
    my $tmp;
    my $tmp2;
    my $head;
    my $abst;
    my $abstract;
    my $title;
    my $authors;
    my $affiliation;
    my $journal;
    my $pmid;
    my $download=0;
    my $invalid=0;
    my $permit=0;
    my $nopdf=0;
    my $notfound=0;
    
    $tmp=-d "$dir";
    if($tmp == 1){
	print "\"$dir\" : The directory has already existed.\n";
	return;
    }

    @date=localtime($time);
    $date[5]=$date[5]+1900;
    $date[4]=$date[4]+1;

    print '**************************************************************'."\n".'** PubMed Search (1.00) Automatic Paper Acquisition System  **'."\n".'**************************************************************'."\n\n";

    print "Date: $date[5]\/$date[4]\/$date[3]  $date[2]\:$date[1]\:$date[0]\n";
    print "Query word is \"$query\".\n";
    print "Search limit is \"$limit\".\n";
    print "Key word is \"$key\".\n" if($key);
    print "---------------------------------\n";

    mkdir("$dir",0777);
    open(LOG, ">$dir/$dir".'.log');

    print LOG '**************************************************************'."\n".'** PubMed Search (1.00) Automatic Paper Acquisition System  **'."\n".'**************************************************************'."\n\n";

    print LOG "Date: $date[5]\/$date[4]\/$date[3]  $date[2]\:$date[1]\:$date[0]\n";
    print LOG "Query: $query\n";
    print LOG "Limit: $limit\n";
    print LOG "Key: $key\n" if($key);
    print LOG "Directory: $dir\n";
    print LOG "Log file: $dir\/$dir\.log\n";
    print LOG "Key file: $dir\/$dir\.key\n" if($key);
    print LOG "---------------------------------\n\n";

    $query =~ tr/ /+/;

    $ua = LWP::UserAgent->new;
    $ua->cookie_jar(HTTP::Cookies->new(file => "lwpcookies.txt", autosave => 1));
    
    $com='http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?SUBMIT=y&DB=PubMed&cmd=&term='.$query.'&dispmax='.$limit;
    $req = HTTP::Request->new(GET => $com);
    $res = $ua->request($req);
    
    unless($res->is_success){
	print "Error occured: PubMed isn't available.\n";
	print "---------------------------------\n";
	return;
    }

    @line=split(/\n/,$res->as_string);
    foreach(@line){
	if(/\<td width\=\"100\%\"\>\<font size\=\"\-1\"\>\<a href\=\"(.*)\"\>.*/){
	    $i++;
	    $frag = 0;
	    $frag2 = 0;
	    $journal = "";
	    $title = "";
	    $authors = "";
	    $affiliation = "";
	    $abstract = "";
	    $pmid = "";
	    
	    $url=$1;
	    $url =~ s/amp\;//g;
	    $req = HTTP::Request->new(GET => "$url");
	    $abst = $ua->request($req);

#######################################################
#  Abstract                                           #
#######################################################
	    
	    if($abst->content =~ /\<input name\=\"uid\" type\=\"checkbox\" value\=\"\d+\"\>\<b\>1\: \<\/b\>(.*)\<\/td\>/){
		$journal=$1;
		if($journal =~ /Error occured\: cannot get document summary/){
		    $journal = 'Error occured! cannot get document summary';
		    print "$i\.txt: Not found $url.\n";
		    print "---------------------------------\n";
		    
		    open(TXT, ">$dir/$i".'.txt');
		    print TXT "$journal\n";
		    close(TXT);

		    print LOG "\[PAPER $i\]\n";
		    print LOG "Journal: $journal\n";
		    print LOG "URL: $url\n";
		    print LOG "FILE: $i\.txt\n";
		    print LOG "State: Not found the abstract.\n";
		    print LOG "---------------------------------\n\n";
		    $notfound++;
		    
		    $frag = 1;
		    $frag = 2;
		    next;
		}
	    }
	    if($abst->content =~ /\<br\>\<font size\=\"\+1\"\>\<b\>(.*)\<\/b\>\<\/font\>\<br\>\<br\>\<b\>(.*)\<\/b\>\<br\>\<br\>(.*)\<br\>\<br\>(.*)\<br\>\<br\>(PMID\: .*)\<\/dd\>\n/){
		$title=$1;
		$authors=$2;
		$affiliation=$3;
		$abstract=$4;
		$pmid=$5;
	    }
	    elsif($abst->content =~ /\<br\>\<font size\=\"\+1\"\>\<b\>(.*)\<\/b\>\<\/font\>\<br\>\<br\>\<b\>(.*)\<\/b\>\<br\>\<br\>(.*)\<br\>\<br\>(.*)(PMID\: .*)\<\/dd\>\n/){
                $title=$1;
                $authors=$2;
                $affiliation=$3;
                $abstract=$4;
                $pmid=$5;
            }
	    
	    print LOG "\[PAPER $i\]\n";
	    print LOG "Journal: $journal\n";
	    print LOG "Title: $title\n";
	    print LOG "Authors: $authors\n";
	    print LOG "$pmid\n";

	    @line2=split(/\n/,$abst->content);
	    foreach(@line2){
		if(/\<dd\>\<SPAN\>\<a href\=\"(.*)\" OnClick.*/){
		    $frag2 = 1;
		    $url=$1;
		    $url='http://www.ncbi.nlm.nih.gov:80'.$url;
		    
#######################################################
#  Direct Access                                      #
#######################################################
		    
		    if($url =~ /\.pdf$/){
			$req = HTTP::Request->new(GET => "$url");
			$res = $ua->request($req);
			
			if($res->is_success){
			    $frag = 1;
			    unless($res->content =~ /^\%PDF.*/){
				print "$i\.txt: Not permitted in $url. Only abstract is saved.\n";
				print "---------------------------------\n";

				open(TXT, ">$dir/$i".'.txt');
				print TXT "$journal\n$title\n$authors\n$affiliation\n$abstract\n$pmid\n";
				close(TXT);
				print LOG "URL: $url\n";
				print LOG "FILE: $i\.txt\n";
				print LOG "State: Not permitted. Only abstract is saved.\n";
				print LOG "---------------------------------\n\n";
				$permit++;
				last;
			    }
			    
			    open(PDF, ">$dir/$i".'.pdf');
			    print PDF $res->content;
			    close(PDF);
			    
			    print "$i\.pdf: Download from $url was successful.\n";
			    print "---------------------------------\n";

			    print LOG "URL: $url\n";
			    print LOG "FILE: $i\.pdf\n";
			    print LOG "State: Download successfully.\n";
			    print LOG "---------------------------------\n\n";

			    $download++;
			}
			last;
		    }

#######################################################
#  Access to the site                                 #
#######################################################
		    
		    $url =~ s/amp\;//g;
		    $url =~ s/amp\%3[Bb]//g;
		    $req = HTTP::Request->new(HEAD => "$url");
		    $head = $ua->request($req);

		    $url =  $head->{_request}->{_uri};
		    $url =~ s/amp\;//g;
		    $url =~ s/amp\%3[Bb]//g;

		    $req = HTTP::Request->new(GET => "$url");
		    $res = $ua->request($req);

		    unless($res->is_success){
			print "$i\.txt: Not found $url.\n";
			print "---------------------------------\n";
			
			open(TXT, ">$dir/$i".'.txt');
			print TXT "$journal\n$title\n$authors\n$affiliation\n$abstract\n$pmid\n";
			close(TXT);

			print LOG "URL: $url\n";
			print LOG "FILE: $i\.txt\n";
			print LOG "State: Not found the page. Only abstract is saved.\n";
			print LOG "---------------------------------\n\n";
			$notfound++;
			
			$url="not found";
			$frag = 1;
		    }
		    
#######################################################
#  Springer                                           #
#######################################################
		    
		    if($url =~ /\.springer\./){
			my $spfrag;

			$url =~ s/index.html//;
			@line3=split(/\n/, $res->content);
			foreach(@line3){
			    if(/\<frame title\=\"Navigation\" name\=\"nav\" src\=\"(.*)\".*/){
				$url2=$url.$1;
				$req = HTTP::Request->new(GET => "$url2");
				$res = $ua->request($req);
				last;
			    }
			    elsif(/Otherwise click \<a href\=\"\.\.\/\.\.(.*)\"\>here\!\<\/a\>\<\/p\>/){
				$tmp = $1;
				$url =~ s/(.*\/journals\/\d+)\/.*/$1$tmp/;

				$req = HTTP::Request->new(GET => "$url");
				$res = $ua->request($req);

				$url =~ s/index.html//;
				@line2=split(/\n/, $res->content);
				foreach(@line2){
				    if(/\<frame title\=\"Navigation\" name\=\"nav\" src\=\"(.*)\".*/){
					$url2=$url.$1;
					$req = HTTP::Request->new(GET => "$url2");
					$res = $ua->request($req);
					last;
				    }
				}
				last;
			    }
			    elsif(/\<a href\=\"\.\.\/\.\.(.*\.pdf)\"\>PDF/ || /\<a HREF\=\"\.\.\/\.\.(.*\.pdf)\"\>Article in PDF format/){
				$spfrag =1;
				$tmp=$1;
				$url =~ s/(.*\/journals\/\d+)\/.*/$1$tmp/;

				$req = HTTP::Request->new(GET => "$url");
				$res = $ua->request($req);
				
				if($res->is_success){
				    $frag = 1;
				    unless($res->content =~ /^\%PDF.*/){
					print "$i\.txt: Not permitted in $url.\n";
					print "---------------------------------\n";
					
					open(TXT, ">$dir/$i".'.txt');
					print TXT "$journal\n$title\n$authors\n$affiliation\n$abstract\n$pmid\n";
					close(TXT);
					print LOG "URL: $url\n";
					print LOG "FILE: $i\.txt\n";
					print LOG "State: Not permitted. Only abstract is saved.\n";
					print LOG "---------------------------------\n\n";
					$permit++;
					last;
				    }
				    
				    open(PDF, ">$dir/$i".'.pdf');
				    print PDF $res->content;
				    close(PDF);
				    
				    print "$i\.pdf: Download from $url was successful.\n";
				    print "---------------------------------\n";
				    
				    print LOG "URL: $url\n";
				    print LOG "FILE: $i\.pdf\n";
				    print LOG "State: Download successfully.\n";
				    print LOG "---------------------------------\n\n";
				    $download++;
				    last;
				}
			    }
			}
			
			last if($spfrag == 1);

			if($res->content =~ /.*\"(.*\.pdf)\".*/){
			    $url2 = $url.$1;
			    $req = HTTP::Request->new(GET => "$url2");
			    $res = $ua->request($req);
			    
			    if($res->is_success){
				$frag = 1;
				unless($res->content =~ /^\%PDF.*/){
				    print "$i\.txt: Not permitted in $url2.\n";
				    print "---------------------------------\n";
				    
				    open(TXT, ">$dir/$i".'.txt');
				    print TXT "$journal\n$title\n$authors\n$affiliation\n$abstract\n$pmid\n";
				    close(TXT);
				    print LOG "URL: $url2\n";
				    print LOG "FILE: $i\.txt\n";
				    print LOG "State: Not permitted. Only abstract is saved.\n";
				    print LOG "---------------------------------\n\n";
				    $permit++;
				    last;
				}
				
				open(PDF, ">$dir/$i".'.pdf');
				print PDF $res->content;
				close(PDF);
				
				print "$i\.pdf: Download from $url2 was successful.\n";
				print "---------------------------------\n";

				print LOG "URL: $url2\n";
				print LOG "FILE: $i\.pdf\n";
				print LOG "State: Download successfully.\n";
				print LOG "---------------------------------\n\n";
				$download++;
			    }
			}
		    }

#######################################################
#  Springer-ny                                        #
#######################################################
		    
		    if($url =~ /\.springer-ny\./){
			$url =~ s/index\.html//;
			$url2=$url.'paper/index.html';
			
			$req = HTTP::Request->new(GET => "$url2");
			$res = $ua->request($req);
			
			if($res->is_success){
			    $frag = 1;
			    unless($res->content =~ /^\%PDF.*/){
				print "$i\.txt: Not permitted in $url2.\n";
				print "---------------------------------\n";

				open(TXT, ">$dir/$i".'.txt');
				print TXT "$journal\n$title\n$authors\n$affiliation\n$abstract\n$pmid\n";
				close(TXT);
				print LOG "URL: $url2\n";
				print LOG "FILE: $i\.txt\n";
				print LOG "State: Not permitted. Only abstract is saved.\n";
				print LOG "---------------------------------\n\n";
				$permit++;
				last;
			    }
			    
			    open(PDF, ">$dir/$i".'.pdf');
			    print PDF $res->content;
			    close(PDF);
			    
			    print "$i\.pdf: Download from $url2 was successful.\n";
			    print "---------------------------------\n";

			    print LOG "URL: $url2\n";
			    print LOG "FILE: $i\.pdf\n";
			    print LOG "State: Download successfully.\n";
			    print LOG "---------------------------------\n\n";
			    $download++;
			}
		    }

#######################################################
#  Catchword                                          #
#######################################################
		    
		    if($url =~ /\.catchword\./){
			my $catchword;
			if($res->content =~ /SRC\=\"(.*)\" NAME\=\"toolbar\"/){
			    $url='http://www.catchword.com/'.$1;
			
			    $req = HTTP::Request->new(GET => "$url");
			    $res = $ua->request($req);
			    
			    @line3=split('\n', $res->content);
			    foreach(@line3){
				if($res->content =~ /\<a href\=\"(.*)\"alt\=\"full document\"/){
				    $catchword=1;
				    $url='http://www.catchword.com/'.$1;
				    
				    $req = HTTP::Request->new(GET => "$url");
				    $res = $ua->request($req);
			    	    
				    if($res->is_success){
					$frag = 1;
					unless($res->content =~ /^\%PDF.*/){
					    print "$i\.txt: Not permitted in $url.\n";
					    print "---------------------------------\n";
					    
					    open(TXT, ">$dir/$i".'.txt');
					    print TXT "$journal\n$title\n$authors\n$affiliation\n$abstract\n$pmid\n";
					    close(TXT);
					    print LOG "URL: $url\n";
					    print LOG "FILE: $i\.txt\n";
					    print LOG "State: Not permitted. Only abstract is saved.\n";
					    print LOG "---------------------------------\n\n";
					    $permit++;
					    last;
					}
			    
					open(PDF, ">$dir/$i".'.pdf');
					print PDF $res->content;
					close(PDF);
					
					print "$i\.pdf: Download from $url was successful.\n";
					print "---------------------------------\n";
					
					print LOG "URL: $url\n";
					print LOG "FILE: $i\.pdf\n";
					print LOG "State: Download successfully.\n";
					print LOG "---------------------------------\n\n";
					$download++;
				    }
				}
			    }
			    if($catchword == 0){
				$frag = 1;
				print "$i\.txt: Not permitted in $url.\n";
				print "---------------------------------\n";
				
				open(TXT, ">$dir/$i".'.txt');
				print TXT "$journal\n$title\n$authors\n$affiliation\n$abstract\n$pmid\n";
				close(TXT);
				print LOG "URL: $url\n";
				print LOG "FILE: $i\.txt\n";
				print LOG "State: Not permitted. Only abstract is saved.\n";
				print LOG "---------------------------------\n\n";
				$permit++;
			    }
			}
		    }
		    
#######################################################
#  InterScience                                       #
#######################################################
		    
		    elsif($url =~ /\.interscience\./){
			if($res->as_string =~ /.*\"(.*\.pdf)\".*/){
			    $url2 = 'http://www3.interscience.wiley.com'.$1;
			    
			    $req = HTTP::Request->new(GET => "$url2");
			    $res = $ua->request($req);
			    
			    if($res->is_success){
				$frag = 1;
				unless($res->content =~ /^\%PDF.*/){
				    print "$i\.txt: Not permitted in $url2.\n";
				    print "---------------------------------\n";

				    open(TXT, ">$dir/$i".'.txt');
				    print TXT "$journal\n$title\n$authors\n$affiliation\n$abstract\n$pmid\n";
				    close(TXT);
				    print LOG "URL: $url2\n";
				    print LOG "FILE: $i\.txt\n";
				    print LOG "State: Not permitted. Only abstract is saved.\n";
				    print LOG "---------------------------------\n\n";
				    $permit++;
				    last;
				}
				
				open(PDF, ">$dir/$i".'.pdf');
				print PDF $res->content;
				close(PDF);
				
				print "$i\.pdf: Download from $url2 was successful.\n";
				print "---------------------------------\n";

				print LOG "URL: $url2\n";
				print LOG "FILE: $i\.pdf\n";
				print LOG "State: Download successfully.\n";
				print LOG "---------------------------------\n\n";
				$download++;
			    }
			}
		    }

#######################################################
#  Wiley                                              #
#######################################################
		    
		    if($url =~ /doi\.wiley\.com/){
			if($res->content =~ /\<h1\>Error\<\/h1\>/){
			    print "$i\.txt: Not found $url.\n";
			    print "---------------------------------\n";
			    
			    open(TXT, ">$dir/$i".'.txt');
			    print TXT "$journal\n$title\n$authors\n$affiliation\n$abstract\n$pmid\n";
			    close(TXT);
			    
			    print LOG "URL: $url\n";
			    print LOG "FILE: $i\.txt\n";
			    print LOG "State: Not found the page. Only abstract is saved.\n";
			    print LOG "---------------------------------\n\n";
			    $notfound++;
			    
			    $url="not found";
			    $frag = 1;
			}	    
		    }
		    
#######################################################
#  Synergy                                            #
#######################################################
		    
		    elsif($url =~ /\.blackwell-synergy\./){
			if($res->as_string =~ /\<a href\=\"javascript\:newWindow\(\'(.*\.x\/pdf)\'.*/){
			    $url2 = 'http://www.blackwell-synergy.com'.$1;
			    
			    $req = HTTP::Request->new(GET => "$url2");
			    $res = $ua->request($req);
			    
			    if($res->as_string =~ /\<a href\=\"(.*pdf.*)\"\>/){
				$req = HTTP::Request->new(GET => "$1");
				$tmp = $1;
				$res = $ua->request($req);
				
				if($res->is_success){
				    $frag = 1;
				    unless($res->content =~ /^\%PDF.*/){
					print "$i\.txt: Not permitted in $tmp.\n";
					print "---------------------------------\n";

					open(TXT, ">$dir/$i".'.txt');
					print TXT "$journal\n$title\n$authors\n$affiliation\n$abstract\n$pmid\n";
					close(TXT);
					print LOG "URL: $tmp\n";
					print LOG "FILE: $i\.txt\n";
					print LOG "State: Not permitted. Only abstract is saved.\n";
					print LOG "---------------------------------\n\n";
					$permit++;
					last;
				    }
				    
				    open(PDF, ">$dir/$i".'.pdf');
				    print PDF $res->content;
				    close(PDF);
				    
				    print "$i\.pdf: Download from $tmp was successful.\n";
				    print "---------------------------------\n";

				    print LOG "URL: $tmp\n";
				    print LOG "FILE: $i\.pdf\n";
				    print LOG "State: Download successfully.\n";
				    print LOG "---------------------------------\n\n";
				    $download++;
				}
			    }
			}
		    }
		    
#######################################################
#  EMBO                                               #
#######################################################
		    
		    elsif($url =~ /\/\/emboj\./){
			if($res->as_string =~ /\<A HREF\=\"(.*)\"\>Reprint \(PDF\)/){
			    $url = 'http://emboj.oupjournals.org'.$1;
			    $url =~ s/content/reprint/;
			    $url = $url.'.pdf';
			    
			    $req = HTTP::Request->new(GET => "$url");
			    $res = $ua->request($req);
			    
			    if($res->is_success){
				$frag = 1;
				unless($res->content =~ /^\%PDF.*/){
				    print "$i\.txt: Not permitted in $url.\n";
				    print "---------------------------------\n";

				    open(TXT, ">$dir/$i".'.txt');
				    print TXT "$journal\n$title\n$authors\n$affiliation\n$abstract\n$pmid\n";
				    close(TXT);
				    print LOG "URL: $url\n";
				    print LOG "FILE: $i\.txt\n";
				    print LOG "State: Not permitted. Only abstract is saved.\n";
				    print LOG "---------------------------------\n\n";
				    $permit++;
				    last;
				}
				
				open(PDF, ">$dir/$i".'.pdf');
				print PDF $res->content;
				close(PDF);
				
				print "$i\.pdf: Download from $url was successful.\n";
				print "---------------------------------\n";

				print LOG "URL: $url\n";
				print LOG "FILE: $i\.pdf\n";
				print LOG "State: Download successfully.\n";
				print LOG "---------------------------------\n\n";
				$download++;
			    }
			}
		    }

#######################################################
#  JVMS                                               #
#######################################################

                    elsif($url =~ /\/\/jvms\./){
                        if($res->content =~ /\<a href\=\"(.*)\"\>PDF/){
                            $url = 'http://jvms.jstage.jst.go.jp'.$1;
	
                            $req = HTTP::Request->new(GET => "$url");
                            $res = $ua->request($req);

                            if($res->is_success){
                                $frag = 1;
                                unless($res->content =~ /^\%PDF.*/){
                                    print "$i\.txt: Not permitted in $url.\n";
				    print "---------------------------------\n";
				    
                                    open(TXT, ">$dir/$i".'.txt');
                                    print TXT "$journal\n$title\n$authors\n$affiliation\n$abstract\n$pmid\n";
                                    close(TXT);
                                    print LOG "URL: $url\n";
                                    print LOG "FILE: $i\.txt\n";
                                    print LOG "State: Not permitted. Only abstr\
act is saved.\n";
                                    print LOG "--------------------------------\
-\n\n";
                                    $permit++;
                                    last;
                                }

                                open(PDF, ">$dir/$i".'.pdf');
                                print PDF $res->content;
                                close(PDF);

                                print "$i\.pdf: Download from $url was successful.\n";
                                print "---------------------------------\n";

                                print LOG "URL: $url\n";
                                print LOG "FILE: $i\.pdf\n";
                                print LOG "State: Download successfully.\n";
                                print LOG "---------------------------------\n\n";
				$download++;
                            }
                        }
                    }

#######################################################
#  J Biol Chem, J Clinical Inv and Neurology          #
#######################################################

		    elsif($url =~ /\/\/(www\.jbc\.org)/ || $url =~ /\/\/(www\.jci\.org)/ || $url =~ /\/\/(www\.neurology\.org)/ || $url =~ /\/\/(circ\.ahajournals\.org)/ || $url =~ /\/\/(www\.pnas\.org)/ || $url =~ /\/\/(www\.fasebj\.org)/ || $url =~ /\/\/(www\.jneurosci\.org)/ || $url =~ /\/\/(bioinformatics\.oupjournals\.org)/){
			$tmp=0;
			$tmp2=$1;
			if($res->content =~ /window\.location \= \"(.*)\"\;/){
			    $url='http://'.$tmp2.$1;
			    $req = HTTP::Request->new(GET => "$url");
			    $res = $ua->request($req);
			    $tmp=1;
			}
			elsif($res->content =~ /\<A HREF\=\"(.*)\"\>Reprint \(PDF\)/ || $res->content =~ /\<A HREF\=\"(.*)\"\>Screen \(PDF\)/){
			    $url='http://'.$tmp2.$1.'.pdf';
			    $req = HTTP::Request->new(GET => "$url");
			    $res = $ua->request($req);
			    $tmp=1;
			}

			if($tmp == 1){
			    if($res->is_success){
				$frag = 1;
				unless($res->content =~ /^\%PDF.*/){
				    print "$i\.txt: Not permitted in $url.\n";
				    print "---------------------------------\n";

				    open(TXT, ">$dir/$i".'.txt');
				    print TXT "$journal\n$title\n$authors\n$affiliation\n$abstract\n$pmid\n";
				    close(TXT);
				    print LOG "URL: $url\n";
				    print LOG "FILE: $i\.txt\n";
				    print LOG "State: Not permitted. Only abstract is saved.\n";
				    print LOG "---------------------------------\n\n";
				    $permit++;
				    last;
				}
				
				open(PDF, ">$dir/$i".'.pdf');
				print PDF $res->content;
				close(PDF);
				
				print "$i\.pdf: Download from $url was successful.\n";
				print "---------------------------------\n";

				print LOG "URL: $url\n";
				print LOG "FILE: $i\.pdf\n";
				print LOG "State: Download successfully.\n";
				print LOG "---------------------------------\n\n";
				$download++;
			    }
			}
		    }

#######################################################
#  Nature                                             #
#######################################################

		    elsif($url =~ /\/\/www\.nature\.com/){
			if($res->content =~ /Full text.*\"(.*)\"\>PDF/){
			    $url='http://www.nature.com'.$1;
			    $req = HTTP::Request->new(GET => "$url");
			    $res = $ua->request($req);

			    if($res->is_success){
				$frag = 1;
				unless($res->content =~ /^\%PDF.*/){
				    print "$i\.txt: Not permitted in $url.\n";
				    print "---------------------------------\n";

				    open(TXT, ">$dir/$i".'.txt');
				    print TXT "$journal\n$title\n$authors\n$affiliation\n$abstract\n$pmid\n";
				    close(TXT);
				    print LOG "URL: $url\n";
				    print LOG "FILE: $i\.txt\n";
				    print LOG "State: Not permitted. Only abstract is saved.\n";
				    print LOG "---------------------------------\n\n";
				    $permit++;
				    last;
				}
				
				open(PDF, ">$dir/$i".'.pdf');
				print PDF $res->content;
				close(PDF);
				
				print "$i\.pdf: Download from $url was successful.\n";
				print "---------------------------------\n";

				print LOG "URL: $url\n";
				print LOG "FILE: $i\.pdf\n";
				print LOG "State: Download successfully.\n";
				print LOG "---------------------------------\n\n";
				$download++;
			    }
			}
		    }

#######################################################
#  Portlandpress                                      #
#######################################################

		    elsif($url =~ /\/\/cs\.portlandpress\.com/){
			if($res->content =~ /\<A class\=\"sidelinks\" HREF\=\"(.*\.pdf)\"\>\<img src/){
			    $url='http://cs.portlandpress.com'.$1;
			    $req = HTTP::Request->new(GET => "$url");
			    $res = $ua->request($req);

			    if($res->is_success){
				$frag = 1;
				unless($res->content =~ /^\%PDF.*/){
				    print "$i\.txt: Not permitted in $url.\n";
				    print "---------------------------------\n";

				    open(TXT, ">$dir/$i".'.txt');
				    print TXT "$journal\n$title\n$authors\n$affiliation\n$abstract\n$pmid\n";
				    close(TXT);
				    print LOG "URL: $url\n";
				    print LOG "FILE: $i\.txt\n";
				    print LOG "State: Not permitted. Only abstract is saved.\n";
				    print LOG "---------------------------------\n\n";
				    $permit++;
				    last;
				}
				
				open(PDF, ">$dir/$i".'.pdf');
				print PDF $res->content;
				close(PDF);
				
				print "$i\.pdf: Download from $url was successful.\n";
				print "---------------------------------\n";

				print LOG "URL: $url\n";
				print LOG "FILE: $i\.pdf\n";
				print LOG "State: Download successfully.\n";
				print LOG "---------------------------------\n\n";
				$download++;
			    }
			}
		    }

#######################################################
#  Elsevier                                           #
#######################################################
		    
		    elsif($url =~ /\/\/linkinghub\.elsevier\.com/){
			if($res->content =~ /\<a HREF\=\"(.*)\"\>\<img border.*src\=\"http\:\/\/www\.sciencedirect\.com\//){
			    $tmp = $1;
			    $tmp =~ s/amp\;//g;
			    $req = HTTP::Request->new(GET => "$tmp");
			    $res = $ua->request($req);
			}

			if($res->content =~ /.*\"(.*\.pdf)\".*/){
			    $req = HTTP::Request->new(GET => "$1");
			    $tmp = $1;
			    $res = $ua->request($req);
			    
			    if($res->is_success){
				$frag = 1;
				unless($res->content =~ /^\%PDF.*/){
				    print "$i\.txt: Not permitted in $tmp.\n";
				    print "---------------------------------\n";

				    open(TXT, ">$dir/$i".'.txt');
				    print TXT "$journal\n$title\n$authors\n$affiliation\n$abstract\n$pmid\n";
				    close(TXT);
				    print LOG "URL: $tmp\n";
				    print LOG "FILE: $i\.txt\n";
				    print LOG "State: Not permitted. Only abstract is saved.\n";
				    print LOG "---------------------------------\n\n";
				    $permit++;
				    last;
				}
				
				open(PDF, ">$dir/$i".'.pdf');
				print PDF $res->content;
				close(PDF);
				
				print "$i\.pdf: Download from $tmp was successful.\n";
				print "---------------------------------\n";

				print LOG "URL: $tmp\n";
				print LOG "FILE: $i\.pdf\n";
				print LOG "State: Download successfully.\n";
				print LOG "---------------------------------\n\n";
				$download++;
			    }
			}
			else{
			    $frag=1;
			    
			    print "$i\.txt: Not permitted in $tmp.\n";
			    print "---------------------------------\n";
			    
			    open(TXT, ">$dir/$i".'.txt');
			    print TXT "$journal\n$title\n$authors\n$affiliation\n$abstract\n$pmid\n";
			    close(TXT);
			    print LOG "URL: $tmp\n";
			    print LOG "FILE: $i\.txt\n";
			    print LOG "State: Not permitted. Only abstract is saved.\n";
			    print LOG "---------------------------------\n\n";
			    $permit++;
			    last;
			} 
		    }

#######################################################
#  ScienceDirect and Others                           #
#######################################################
		    
		    else{
			if($res->as_string =~ /.*\"(.*\.pdf)\".*/){
			    $tmp = $1;
			    $tmp =~ s/UADB\/xppview\/// if($url =~ /\.acs\.org\//);
			    $req = HTTP::Request->new(GET => "$tmp");
			    $res = $ua->request($req);

			    if($res->is_success){
				if($res->content =~ /^\%PDF.*/){
				    $frag = 1;
				    open(PDF, ">$dir/$i".'.pdf');
				    print PDF $res->content;
				    close(PDF);
				    
				    print "$i\.pdf: Download from $tmp was successful.\n";
				    print "---------------------------------\n";

				    print LOG "URL: $tmp\n";
				    print LOG "FILE: $i\.pdf\n";
				    print LOG "State: Download successfully.\n";
				    print LOG "---------------------------------\n\n";
				    $download++;
				}
				else{
				    if($url =~ /(http\:\/\/.*?)\/.*/){
					$tmp=$1.$tmp;
				    }
				    $req = HTTP::Request->new(GET => "$tmp");
				    $res = $ua->request($req);
				    
				    if($res->is_success){
					$frag = 1;
					unless($res->content =~ /^\%PDF.*/){
					    print "$i\.txt: Not permitted in $tmp.\n";
					    print "---------------------------------\n";

					    open(TXT, ">$dir/$i".'.txt');
					    print TXT "$journal\n$title\n$authors\n$affiliation\n$abstract\n$pmid\n";
					    close(TXT);
					    print LOG "URL: $tmp\n";
					    print LOG "FILE: $i\.txt\n";
					    print LOG "State: Not permitted. Only abstract is saved.\n";
					    print LOG "---------------------------------\n\n";
					    $permit++;
					    last;
					}
					
					open(PDF, ">$dir/$i".'.pdf');
					print PDF $res->content;
					close(PDF);
					
					print "$i\.pdf: Download from $tmp was successful.\n";
					print "---------------------------------\n";

					print LOG "URL: $tmp\n";
					print LOG "FILE: $i\.pdf\n";
					print LOG "State: Download successfully.\n";
					print LOG "---------------------------------\n\n";
					$download++;
				    }
				}
			    }
			}
			elsif($res->as_string =~ /.*\"(http\:\/\/.*pdf.*)\".*/){
			    $req = HTTP::Request->new(GET => "$1");
			    $tmp = $1;
			    $res = $ua->request($req);
			    
			    if($res->is_success){
				$frag = 1;
				unless($res->content =~ /^\%PDF.*/){
				    print "$i\.txt: Not permitted in $tmp.\n";
				    print "---------------------------------\n";

				    open(TXT, ">$dir/$i".'.txt');
				    print TXT "$journal\n$title\n$authors\n$affiliation\n$abstract\n$pmid\n";
				    close(TXT);
				    print LOG "URL: $tmp\n";
				    print LOG "FILE: $i\.txt\n";
				    print LOG "State: Not permitted. Only abstract is saved.\n";
				    print LOG "---------------------------------\n\n";
				    $permit++;
				    last;
				}
				
				open(PDF, ">$dir/$i".'.pdf');
				print PDF $res->content;
				close(PDF);
				
				print "$i\.pdf: Download from $tmp was successful.\n";
				print "---------------------------------\n";

				print LOG "URL: $tmp\n";
				print LOG "FILE: $i\.pdf\n";
				print LOG "State: Download successfully.\n";
				print LOG "---------------------------------\n\n";
				$download++;
			    }
			}
		    }
		}
	    }
	    if($frag == 0 & $frag2 == 1){
		if($url =~ /.*\.sciencedirect\..*/){
		    print "$i\.txt: Not permitted in $url.\n";
		    print "---------------------------------\n";
		    
		    open(TXT, ">$dir/$i".'.txt');
		    print TXT "$journal\n$title\n$authors\n$affiliation\n$abstract\n$pmid\n";
		    close(TXT);
		    print LOG "URL: $url\n";
		    print LOG "FILE: $i\.txt\n";
		    print LOG "State: Not permitted. Only abstract is saved.\n";
		    print LOG "---------------------------------\n\n";
		    $permit++;
		}
		else{
		    print "$i\.txt: Invalid format from $url.\n";
		    print "---------------------------------\n";

		    open(TXT, ">$dir/$i".'.txt');
		    print TXT "$journal\n$title\n$authors\n$affiliation\n$abstract\n$pmid\n";
		    close(TXT);
		    print LOG "URL: $url\n";
		    print LOG "FILE: $i\.txt\n";
		    print LOG "State: Invalid format. Only abstract is saved.\n";
		    print LOG "---------------------------------\n\n";
		    $invalid++;
		}
	    }
	    elsif($frag == 0 & $frag2 == 0){
		print "$i\.txt: There is no PDF document.\n";
		print "---------------------------------\n";

		open(TXT, ">$dir/$i".'.txt');
		print TXT "$journal\n$title\n$authors\n$affiliation\n$abstract\n$pmid\n";
		close(TXT);
		print LOG "URL:\n";
		print LOG "FILE: $i\.txt\n";
		print LOG "State: There is no PDF document. Only abstract is saved.\n";
		print LOG "---------------------------------\n\n";
		$nopdf++;
	    }
	}
    }
    print "Searched from ".$i." papers.","\n";
    print "Downloaded: $download\n";
    print "Not permitted: $permit\n";
    print "Not found: $notfound\n";
    print "Invalid format: $invalid\n";
    print "No PDF: $nopdf\n";
    if($i-$nopdf != 0){
	print sprintf("%d",$download/($i-$nopdf)*100)."\% of papers have been downloaded.\n\n";
    }
    else{
	print "0% of papers have been downloaded.\n\n";
    }

    print "Directory is \"$dir\".\n";
    print "Log file is \"$dir\/$dir\.log\"\n";
    print "Key file is \"$dir\/$dir\.key\"\n" if($key);

    print LOG "Total: $i\n";
    print LOG "Downloaded: $download\n";
    print LOG "Not permitted: $permit\n";
    print LOG "Not found: $notfound\n";
    print LOG "Invalid format: $invalid\n";
    print LOG "No PDF: $nopdf\n";
    if($i-$nopdf != 0){
	print LOG "Accuracy: ".sprintf("%d",$download/($i-$nopdf)*100)."\%\n";
    }
    else{
	print LOG "Accuracy: 0%\n";
    }
    close(LOG);

    if($key){
	KeySearch($dir, $key);
    }	
}


sub KeySearch{
    &opt_default();
    my @args=opt_get(@_);

    my $dir=shift @args;
    my $key=shift @args;
    my @files;
    my @txts;
    my %hash;
    my $i;
    my $pdf;
    my $txt;
    my $tmp;

    opendir DIR, $dir;
    @files=readdir DIR;
    
    open(KEY,">$dir/$dir".'.key');

    foreach(@files){
	next if($_ eq '.' || $_ eq '..');
	next if(/\.log$/);
	next if(/\.key$/);
	
	if(/\.pdf/){
	    $tmp=PDFtoTEXT("$dir/$_");
	    push(@txts,$tmp);
	    $pdf++;
	}
	else{
	    $tmp="$dir/$_";
	    push(@txts,$tmp);
	    $txt++;
	}
    }

    foreach $tmp (@txts){
	$hash{$tmp}=WordCount($tmp, $key);
	$i++;
    }
    
    print KEY '**************************************************************'."\n".'**** Key Search (1.00)  Key Word Count from PDF Documents ****'."\n".'**************************************************************'."\n\n";

    print KEY "Key: $key\n";
    print KEY "Directory: $dir\n";
    print KEY "Paper: $i \(PDF: $pdf files   TXT: $txt files\)\n"; 
    print KEY "---------------------------------\n\n";

    foreach(sort{$hash{$b} <=> $hash{$a}}keys(%hash)){
	$tmp=substr($_, index($_,'/')+1);
	print KEY "$tmp: $hash{$_}\n";
    }
    close(KEY);

    return $i;
}    


sub PDFtoTEXT{
    &opt_default();
    my @args=opt_get(@_);

    my $pdf=shift @args;

    system('pdftotext '."$pdf");
    $pdf=~s/\.pdf$/\.txt/;

    return $pdf;
}
    

sub WordCount{
    &opt_default();
    my @args=opt_get(@_);

    my $file=shift @args;
    my $query=shift @args;
    my $count;

    open(FILE,"$file");
    while(<FILE>){
	$count+=s/${query}/${query}/g;
    }

    return $count;
}

1;
