# antenna.rb $Revision: 1.35 $
# 
#
# Copyright (C) 2004  Michitaka Ohno <elpeo@mars.dti.ne.jp>
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307,
# USA.

require 'net/http'
require 'timeout'
require 'time'
require 'cgi'
require 'nkf'

RANTENNA_VERSION = '0.0.3'

class Antenna
	def initialize( conf = nil )
		@generator = "Powered by rAntenna #{RANTENNA_VERSION} and Ruby #{RUBY_VERSION}"

		@dir = File::dirname( __FILE__ )

		eval( File::open( File.expand_path( conf||'antenna.conf', @dir ) ){|f| f.read }.untaint )

		@urls ||= []
		@rdf_path ||= 'index.rdf'

		begin
			require 'uconv'
			@rdf_encoding = 'UTF-8'
			@rdf_encoder = Proc::new {|s| Uconv.euctou8( s ) }
			@rdf_decoder = Proc::new {|s| Uconv.u8toeuc( s ) }
		rescue LoadError
			@rdf_encoding = 'EUC-JP'
			@rdf_encoder = Proc::new {|s| s }
			@rdf_decoder = Proc::new {|s| s }
		end

		@last_modified = Hash.new
		@last_detected = Hash.new
		@content_length = Hash.new
		@auth_url = Hash.new
	end

	def go_round
		Dir::glob( File.join( @dir, 'plugin', 'input_*.rb' ) ).sort.each do |file|
			eval( File::open( file.untaint ){|f| f.read }.untaint )
		end

		limittime = 10
		request_header = {'User-Agent' => "rAntenna #{RANTENNA_VERSION}"}

		@urls.each do |item|
			next if @last_modified[item[2]]
			next unless %r[^http://([^/]+)(/.*)?$] =~ (item[3]||item[2])
			path = $2||'/'
			host, port = $1.split( /:/ )
			port = '80' unless /^[0-9]+$/ =~ port
			timeout( limittime ) do
				begin
					Net::HTTP.version_1_1
					Net::HTTP.start( host.untaint, port.to_i.untaint ) do |http|
						response = http.head( path, request_header )
 						if response['Last-Modified'] then
							@last_modified[item[2]] = Time.parse( response['Last-Modified'] ).localtime
							@content_length[item[2]] = response['Content-Length'].to_i
							@last_detected[item[2]] = Time.now
							@auth_url[item[2]] = @antenna_url
						else
							response, = http.get( path, request_header )
							lm = get_last_modified( response.body )
							if lm then
								@last_modified[item[2]] = lm
								@content_length[item[2]] = response['Content-Length'].to_i
								@last_detected[item[2]] = Time.now
								@auth_url[item[2]] = @antenna_url
							end
						end
					end
				rescue Exception
				rescue
				end
			end
		end
	end

	def output( file = nil )
		output_file = File.expand_path( file||@rdf_path, @dir )

		if File.exist?( output_file ) then
			open( output_file ) do |f|
				buf = f.read
				begin
					buf = @rdf_decoder.call( buf )
				rescue
				end
				linkurl = nil
				buf.scan( /<([A-Za-z:]+)>([^<]*)<\/\1>/ ) do |tag|
					if tag[0] == 'link' then
						linkurl = tag[1]
					elsif linkurl && tag[0] == 'dc:date' then
						date = Time.parse( tag[1] ).localtime
						url = get_unlink( linkurl, date )
						@last_modified[url] = date unless @last_modified[url]
					end
				end
			end
		end

		sorted_urls = @urls.sort do |a, b|
			atime = @last_modified[a[2]]
			btime = @last_modified[b[2]]
			if atime && btime then
				btime <=> atime
			elsif atime then
				-1
			elsif btime then
				1
			else
				0
			end
		end

		r = ""
		r << <<-RDF
<?xml version="1.0" encoding="#{@rdf_encoding}"?>
<?xml-stylesheet href="index.xsl" type="text/xsl" media="screen"?>

<rdf:RDF xmlns="http://purl.org/rss/1.0/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:syn="http://purl.org/rss/1.0/modules/syndication/" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/">
<channel rdf:about="#{@rdf_url}">
<title>#{@title}</title>
<link>#{@antenna_url}</link>
<description>#{@title}</description>
<dc:date>#{Time.now.strftime( '%Y-%m-%dT%H:%M:%S' )}</dc:date>
<dc:language>ja</dc:language>
<dc:rights>#{CGI::escapeHTML( @copyright )}</dc:rights>
<dc:publisher>#{@generator}</dc:publisher>
<items>
<rdf:Seq>
RDF

		sorted_urls.each do |item|
			next unless item
			linkurl = get_link( item[2] )
			r << <<-RDF
<rdf:li rdf:resource="#{linkurl}"/>
RDF
		end

		r << <<-RDF
</rdf:Seq>
</items>
</channel>
RDF

		sorted_urls.each do |item|
			next unless item
			linkurl = get_link( item[2] )
			r << <<-RDF
<item rdf:about="#{linkurl}">
<title>#{CGI::escapeHTML( item[0] )}</title>
<link>#{linkurl}</link>
<description/>
<dc:creator>#{CGI::escapeHTML( item[1] )}</dc:creator>
RDF
			if @last_modified[item[2]] then
				r << <<-RDF
<dc:date>#{@last_modified[item[2]].strftime( '%Y-%m-%dT%H:%M:%S' )}</dc:date>
RDF
			end
			r << <<-RDF
</item>
RDF
		end

		r << <<-RDF
</rdf:RDF>
RDF

		open( output_file,  "w" ) do |f|
			f.print @rdf_encoder.call( r )
		end

		Dir::glob( File.join( @dir, 'plugin', 'output_*.rb' ) ).sort.each do |file|
			eval( File::open( file.untaint ){|f| f.read }.untaint )
		end
	end

	def get_link( url )
		return url unless @link_format && @last_modified[url]
		format =  @link_format.gsub( /%(antenna_url|url)%/ ) do
			($1 == 'url' ? url : eval( "@#{$1}" )).gsub( /%/, '%%' )
		end
		@last_modified[url].strftime( format )
	end

	def get_unlink( link, date = nil )
		return link unless @link_format
		format =  Regexp.escape( @link_format ).gsub( /%(antenna_url|url)%/ ) do
			$1 == 'url' ? '(.+)' : Regexp.escape( eval( "@#{$1}" ) ).gsub( /%/, '%%' )
		end
		if date then
			re = date.strftime( format )
		else
			re = format.gsub( /%(.)/ ) do
				$1 == '%' ? '%' : '.+'
			end
		end
		if /^#{re}$/ =~ link then
			$1||link
		else
			link
		end
	end

	def get_last_modified( str )
		lm = nil;
		data = NKF::nkf( '-m0 -e', str ).gsub( /<(?!meta)[^>]*>/im, ';' ).split( /[\r\n]+/ )
		i = -1
		while ( i += 1 ) < data.length do
			if /http-equiv=\"?last-modified.+content=\"([^\"]+)\"/i =~ data[i] then
				date = Time.parse( $1 ).localtime
				lm = date unless lm
				lm = date if date > lm
				break
			elsif /|update|modified/i =~ data[i] then
				s = (data[i-1]||'') + data[i] + (data[i+1]||'')
				if /([0-9]+)ǯ\s*([0-9]+)\s*([0-9]+)/ =~ s then
					year = $1.to_i<100 ? ($1.to_i+2000) : $1
					month = $2
					day = $3
				elsif /([0-9]+)\s*([0-9]+)/ =~ s then
					year = Time.now.year
					month = $1
					day = $2
					year -= 1 if month.to_i > Time.now.month
				elsif /([0-9]+)\/([0-9]+)\/([0-9]+)/ =~ s then
					year = $1.to_i<100 ? ($1.to_i+2000) : $1
					month = $2
					day = $3
				elsif /([0-9]+)\/([0-9]+)/ =~ s then
					year = Time.now.year
					month = $1
					day = $2
					year -= 1 if month.to_i > Time.now.month
				elsif /([0-9]+)-([A-Z][a-z]+)-([0-9]+)/ =~ s then
					year = $3.to_i<100 ? ($3.to_i+2000) : $3
					month = $2[0,3]
					day = $1
				elsif /([0-9]+)-([0-9]+)-([0-9]+)/ =~ s then
					year = $1.to_i<100 ? ($1.to_i+2000) : $1
					month = $2
					day = $3
				elsif / ([0-9]{1,2}) ([A-Z][a-z]{2}) ([0-9]{4}) / =~ s then
					year = $3
					month = $2
					day = $1
				else
					next
				end
				if /([0-9]+)\s*([0-9]+)ʬ/ =~ s then
					hour = $1
					min = $2
					sec = 0
				elsif /([0-9]+):([0-9]+):([0-9]+)/ =~ s then
					hour = $1
					min = $2
					sec = $3
				elsif /([0-9]+):([0-9]+)/ =~ s then
					hour = $1
					min = $2
					sec = 0
				elsif /([0-9]+)([0-9]+)/ =~ s then
					hour = $1
					min = $2
					sec = 0
				else
					next
				end
				date = Time.local( year, month, day, hour, min , sec )
				lm = date unless lm
				lm = date if date > lm
			end
		end
		lm
	end
end

