#!/usr/bin/env perl

#
# rss2imap - an IMAP-based RSS aggreagor
#
# Copyright (C) 2004 Taku Kudo <taku@chasen.org>
#               2005 Yoshinari Takaoka <mumumu@mumumu.org>
#     All rights reserved.
#     This is free software with ABSOLUTELY NO WARRANTY.
#
# You can redistribute it and/or modify it under the terms of the
# GPL2, GNU General Public License version 2.
#
# $Id: rss2imap,v 1.2 2005/09/19 08:35:48 mumumu-org Exp $
#

require 5.008_000;
use strict;
use Mail::IMAPClient;
use LWP::UserAgent;
use XML::RSS;
use HTTP::Date;
use Encode;
use Encode::Guess qw/euc-jp shift-jis utf8 jis/;
use Jcode;


package Unicode;
{
    sub to_utf7
    {
	my $s = shift;
	utf8::decode ($s); # set utf8 flag
	$s = Encode::encode ("UTF-7", $s);
	$s =~ s/\+([^\/&\-]*)\/([^\/\-&]*)\-/\+$1,$2\-/g;
	$s =~ s/&/&\-/g;
	$s =~ s/\+([^+\-]+)?\-/&$1\-/g;
	$s =~ s#/#,#g; # '/' cannot be used in courier/dovecot
	return $s;
    } 

    sub to_utf8 
    {
	my $s = shift;
	Encode::from_to ($s, 'euc-jp', 'utf8');
	utf8::decode ($s); # set utf8 flag
	return $s;
    }

    sub to_mime
    {
	return Jcode->new ( Encode::encode ('euc-jp', shift) )->mime_encode;
    }
}


package RSS2IMAP;
{
    our $VERSION = "0.21";
    
    our $DEFAULT_GLOABL_CONFIG = {
	'user'                 => $ENV{USER},
	'host'                 => "localhost",
	'port'                 => 143,
        'interval'             => 30,
	'last-modified-folder' => 'RSS.last-modified',
	'prefix'               => undef,     # if you use courier-imap, set this parameter to "INBOX"
	'cram-md5'             => undef,
	'use-ssl'              => undef,
    };
    
    our $DEFAULT_SITE_CONFIG = {
	'folder'        => 'RSS.%{channel:title}',
	'type'          => 'items',
	'subject'       => '%{item:title}',
	'from'          => '<%{channel:title}>',
	'expire-unseen' => undef,
	'expire'        => -1,
	'expire-folder' => undef,
	'sync'          => undef,
    };
    
    
    sub new {
	my $this = shift;
	my (%config) = @_;
	bless \%config, $this;
    }
    
    
    sub connect {

	my $this = shift;
	my $ssl_sock = undef;
        
	if ( $this->{'use-ssl'} ) {
	    
	    eval 'use IO::Socket::SSL';
            if($@) {
	        print "you specify use SSL but dont install IO::Socket::SSL.\n";
		print "please install it via cpan.\n";
                exit();
	    }
	    
            $ssl_sock = IO::Socket::SSL->new("$this->{host}:$this->{port}")
            or die "could not connect to the imap server over ssl.";
        }

	my $imap = Mail::IMAPClient->new(
					 Socket   => ( $ssl_sock ? $ssl_sock : undef ),
					 Server   => $this->{host},
					 User     => $this->{user},
					 Port     => $this->{port},
					 Password => $this->{password},
					 Peek	  => 1,
					 Authmechanism => ($this->{'cram-md5'} ? "CRAM-MD5" : undef));

        die "imap client initialize failed. maybe you dont specify proper option...\n" if(!$imap);

        if ($this->{debug}) {
	    $imap->Debug(1);
	    $imap->Debug_fh();
	}

	if ( $this->{'use-ssl'} ) {
	    $imap->State(1);    #connected
	    $imap->login();     #if ssl enabled, login required because it is bypassed.
	}

        #   authentication failure. sorry.
	if( !$imap->IsAuthenticated() ) {
	    print "Authentication failure, sorry.\n";
	    print "connected to : $this->{host}:$this->{port}\n";
	    exit();
	}
	                                                       
	$this->{imap} = $imap;
	die "$@ $this->{user}\@$this->{host}\n" unless ($imap);
	
    }
    
    
    sub connect_test {
	my $this = shift;
	$this->connect ();
	$this->{imap}->close ();
    }


    sub run {

	my $this = shift;
	my $site_config_list = $this->parse_url_list();
	$this->connect ();	
	
	for my $site_config (@{$site_config_list}) {

	    $this->{site_config} = $site_config;

	    for my $url (@{$site_config->{url}}) {
		my $rss = $this->get_rss ($url);
		next unless ($rss);
		$this->send   ($rss);
		$this->expire ($rss);
	    }
	}
	
	$this->{imap}->close ();
    }


    sub get_rss {

	my $this = shift;
	my $link  = shift;
	my $imap = $this->{imap};
	my $folder = $this->get_real_folder_name ($this->{'last-modified-folder'});
	
	$this->select ($folder);
	my $message_id = sprintf ('%s@%s', $link, $this->{host});
	my @search = $imap->search ("UNDELETED HEADER message-id \"$message_id\" HEADER x-rss-aggregator \"rss2imap-checker\"");
	
	if ($@) {
	    warn "WARNNING: $@\n";
	}

	my $ua = new LWP::UserAgent;
	$ua->agent ("rss2imap/$VERSION");

	if ($this->{proxy}) {
	    $ua->proxy(['http','ftp'], $this->{proxy});
	}
	
	my $request = HTTP::Request->new('GET' => $link);
	
	if (my $latest = $this->get_latest_date (\@search))  {
	    $request->header('If-Modified-Since' => HTTP::Date::time2str ($latest));
	}

	my $response = $ua->request($request);
	
	if ($response->code eq '304') {
	    print "skip $link \n";
	    return;
	}
	
	unless ($response->is_success) {
	    warn "WARNING: connection error $link\n";
	    return;
	}
	
	$imap->delete_message (@search); # delete other messages;

	my $content = $response->content;
	my $header = substr ($content, 0, 100);
	unless ($header =~ /encoding="utf-8"/i) {
	    $content =~ s/<\?xml.*?\?>/<\?xml version="1.0" encoding="utf-8"\?>/;	
	    $content = Unicode::to_utf8 ($content);
	}
	my $rss = new XML::RSS;
	eval { $rss->parse($content); };
	warn "WARNNING: $@\n" if ($@);
	return unless ($rss);
	
	print "modified $link\n";
	
	# copy session information
	$rss->{'rss2imap:last-modified'} = HTTP::Date::time2str ($response->last_modified);
	$rss->{'rss2imap:message-id'}    = $message_id;
	$rss->{'rss2imap:rss-link'}      = $link;
	
	return $rss;
    }


    sub send {

	my $this = shift;
	my $rss  = shift;
	my $imap = $this->{imap};

	my @items;
	my $type = $this->{site_config}->{type};
	if ($type eq "channel") {
	    @items = ($rss->{channel}); # assume that item == rss->channel
	} elsif ($type eq "items") {
	    @items = @{$rss->{items}};
	} else {
	    warn "WARNNING: unknown type [$type]\n";
	    return;
	}
	
	my ($folder) = $this->apply_template ($rss, undef, 1, $this->{site_config}->{folder});
	$folder = $this->get_real_folder_name ($folder);
	$this->select ($folder);
	
	my @append_items;
	my @delete_mail;
	
	for my $item (@items) {
	    
	    my $message_id  = $this->gen_message_id ($rss, $item);	    

	    if ($this->{site_config}->{expire} > 0) {
		my $rss_date = $this->get_date ($rss, $item);
		next if (time() - HTTP::Date::str2time ($rss_date) > $this->{site_config}->{expire} * 60 * 60 * 24);
	    }

	    my @search = $imap->search ("NOT DELETED HEADER message-id \"$message_id\" HEADER x-rss-aggregator \"rss2imap\"");
	    
	    if ($@) {
		warn "WARNNING: $@\n";
		next;
	    }

	    if (@search == 0) {
		print " appending $message_id\n";		
		push @append_items, $item;
	    } else {
		my $rss_date = $this->get_rss_date ($rss, $item);
		next unless ($rss_date); # date filed is not found, we ignore it.
		my $latest = $this->get_latest_date (\@search);
		if (HTTP::Date::str2time ($rss_date) > $latest) {
		    print " updateing $message_id\n";
		    push @delete_mail, @search;
		    push @append_items, $item;
		} else {
		    print " skip $message_id\n";
		}
	    }
	}

	# delete items
	if ($this->{site_config}->{'sync'}) {
	    my %found = ();
	    for my $item (@items) {
		$found{$item->{link}} = 1;
	    }
	    
	    my $link = $rss->{'rss2imap:rss-link'};
	    my @search = $imap->search ("HEADER x-rss-link \"$link\" HEADER x-rss-aggregator \"rss2imap\"");

	    for my $msg (@search) {
		my $link2 = $imap->get_header ($msg, "x-rss-item-link");
		$link2 =~ s/^\s*//g; $link2 =~ s/\s*$//g; # must trim spaces, bug of IMAP server?
		unless ($found{$link2}) {
		    print "  deleting $link2\n";
		    push @delete_mail, $msg;
		}
	    }
	}
	
	# update all message
	$imap->delete_message (@delete_mail);
	for my $item (@append_items) {
	    $this->send_item ($folder, $rss, $item);
	}
	
	$this->send_last_update ($rss);

	return;
    }


    sub expire {

	my $this   = shift;
	my $rss    = shift;
	my $expire = $this->{site_config}->{expire} || -1;
	my $imap = $this->{imap};	

	return if ($expire <= 0);

	my ($folder, $expire_folder) = $this->apply_template ($rss, undef, 1,
							      $this->{site_config}->{folder},
							      $this->{site_config}->{'expire-folder'});
	$folder        = $this->get_real_folder_name ($folder);
	$expire_folder = $this->get_real_folder_name ($expire_folder); 
	my $key  =  Mail::IMAPClient->Rfc2060_date (time() - $expire * 60 * 60 * 24);
	
	my $query = (defined $this->{site_config}->{'expire-unseen'}) ? "SENTBEFORE $key" : "SEEN SENTBEFORE $key";
	$query .= " HEADER x-rss-aggregator \"rss2imap\"";

	$this->select ($folder);
	my @search = $imap->search ($query);
	
	if ($@) {
	    warn "WARNNING: $@\n";	    
	    return;
	}
	
	return if (@search == 0);

	if ($expire_folder) {
	    $this->create_folder ($expire_folder);
	    for my $msg (@search) {
		print "  moving: $msg -> $expire_folder\n";
		$imap->move ($expire_folder, $msg);
	    }
	} else {
	    print "  deleting: [@search]\n";
	    $imap->delete_message (@search);
	}
    }


    sub get_latest_date {
	
	my $this = shift;
	my $list = shift;
	my $header = shift || 'date';
	my $imap = $this->{imap};

	my $latest = -1;
	for my $msg (@{$list}) {
	    my $date = $imap->get_header ($msg, $header);
	    next unless ($date);
	    $date = HTTP::Date::str2time ($date);
	    $latest = $date if ($date > $latest);
	}
	
	return ($latest == -1) ? undef : $latest;
    }


    sub parse_url_list {

	my $this = shift;
	my @filenames =  @{$this->{list}};
	my %config = %{$DEFAULT_SITE_CONFIG};
	my @config_list;
	
	for my $filename (@filenames) {
	    open (F, $filename) || die "$!: $filename\n";
	    while (<F>) {
		chomp;
		s/\s*$//;
		if (/^(ftp|http|https):\/\//) {
		    push @{$config{url}}, $_;
		} elsif (/^\#/) {
		    
		} elsif (/^([^:]+):\s*(.+)/) {
		    my $key   = lc($1);
		    my $value = $2;
		    unless (exists $config{$key}) {
			warn "WARNNING: key value [$1] is undefined\n";
			next;
		    }
		    next if ($key =~ /(expire-unseen|sync)/ && $2 =~ /^(no|0)$/i);
		    $config{$key} = Unicode::to_utf8 ($value);
		} elsif (/^\s*$/) {
		    push @config_list, { %config } if (keys %config);
		    %config = %{$DEFAULT_SITE_CONFIG};
		} else {
		    warn "WARNNING: parse error $_\n";
		}
	    }
	    close(F);
	    push @config_list, { %config } if (keys %config);
	}

	return \@config_list;
    }


    sub send_last_update {

	my $this = shift;
	my $rss = shift;

	my $message_id = $rss->{'rss2imap:message-id'};
	my $date       = $rss->{'rss2imap:last-modified'};
	my $link       = $rss->{'rss2imap:rss-link'};
	my $a_date     = scalar (localtime ());

	my $body =<<"BODY"
From: $this->{user}\@$this->{host}
Subject: $link
MIME-Version: 1.0
Content-Type: text/plain;
Content-Transfer-Encoding: 7bit
Content-Base: $link
Message-Id: $message_id
Date: $date
User-Agent: rss2imap version $VERSION
X-RSS-Link: $link
X-RSS-Aggregator: rss2imap-checker
X-RSS-Aggregate-Date: $a_date;
X-RSS-Last-Modified: $date

Link: $link
Last-Modified: $date
Aggregate-Date: $a_date
BODY
;
	my $folder = $this->get_real_folder_name ($this->{'last-modified-folder'});
	$this->{imap}->append_string ($folder, $body);	
    }
    
    sub send_item {

	my $this   = shift;
	my $folder = shift;
	my $rss    = shift;
	my $item   = shift;
	
	my $date       = $this->get_date ($rss, $item);
	my $rss_date   = $this->get_rss_date ($rss, $item) || "undef";

	my $subject    = $this->{site_config}->{subject}; 
	my $from       = $this->{site_config}->{from};    
	my $desc       = $item->{description} || "";	
	my $message_id = $this->gen_message_id ($rss, $item);
	($subject, $from) = $this->apply_template ($rss, $item, undef, $subject, $from);

	my $m_subject = Unicode::to_mime ($subject);
	my $m_from    = Unicode::to_mime ($from);
	my $a_date    = scalar (localtime ());
	my $l_date    = $rss->{'rss2imap:last-modified'} || $a_date;
	my $link      = $rss->{'rss2imap:rss-link'} || "undef";

    	my $body =<<"BODY"
From: $m_from
Subject: $m_subject
MIME-Version: 1.0
Content-Type: text/html; charset=UTF-8
Content-Transfer-Encoding: 8bit
Content-Base: $item->{link}
Message-Id: $message_id
Date: $date
User-Agent: rss2imap version $VERSION
X-RSS-Link: $link
X-RSS-Channel-Link: $rss->{channel}->{link}
X-RSS-Item-Link: $item->{link}
X-RSS-Aggregator: rss2imap	  
X-RSS-Aggregate-Date: $a_date
X-RSS-Last-Modified: $l_date;

<html>
<head>
<title>$subject</title>
<style type="text/css">
body {
      margin: 0;
      border: none;
      padding: 0;
    }
iframe {
  position: fixed;
  top: 0;
  right: 0;
  bottom: 0;
  left: 0;
  border: none;
}
</style>
</head>
<body>
<iframe  width="100%" height="100%" src="$item->{link}">
$desc
</iframe>
</body>
</html>
BODY
;
	utf8::encode ($body);
	$this->{imap}->append_string ($folder, $body);
    }    

    # wrappers
    sub select {
	my $this   = shift;
	my $folder = shift;
	$this->create_folder ($folder);
	$this->{imap}->select ($folder) || warn "@!\n";
    }

    sub create_folder {
	my $this   = shift;
	my $folder = shift;
	my $imap = $this->{imap};
	unless ($imap->exists($folder)) {
	    $imap->create ($folder) || warn "WARNNING: $@\n";
	}
    }
    
    # misc functions
    sub gen_message_id {
	my $this = shift;
	my $rss  = shift;
	my $item = shift;
	return sprintf ('%s@%s', $item->{link}, $this->{host});
    }
    
    sub get_rss_date {
	my $this = shift;
	my $rss  = shift;
	my $item = shift;
	return $item->{dc}->{date} || $rss->{channel}->{dc}->{date} || $rss->{'rss2imap:last-modified'};
    }

    sub get_date {
	my $this = shift;
	my $rss  = shift;
	my $item = shift;
	my $date = $this->get_rss_date ($rss, $item) || "";
	return HTTP::Date::time2str(HTTP::Date::str2time ($date));
    }

    sub get_real_folder_name {
	my $this = shift;
	my $str  = shift;
	if ($this->{prefix}) {
	    $str = sprintf ("%s.%s", Unicode::to_utf8 ($this->{prefix}), $str);
	}
	return Unicode::to_utf7 ($str);
    }

    sub apply_template {

	my $this = shift;
	my $rss  = shift;
	my $item = shift;
	my $folder_flg = shift;
	my @from = @_;

	my %cnf;
	if ($rss) {
	    $cnf{'channel:title'}       = $rss->{channel}->{title};
	    $cnf{'channel:link'}        = $rss->{channel}->{link};
	    $cnf{'channel:description'} = $rss->{channel}->{description};
	    $cnf{'channel:dc:date'}     = $rss->{channel}->{dc}->{date} || "";
	}

	if ($item) {
	    $cnf{'item:description'}  = $item->{description} || $rss->{channel}->{description};
	    $cnf{'item:link'}         = $item->{link} || $rss->{channel}->{link};
	    $cnf{'item:title'}        = $item->{title} || $rss->{channel}->{title};
	    $cnf{'item:dc:date'}      = $item->{dc}->{date} || $item->{dc}->{date} || "";
	    $cnf{'item:dc:subject'}   = $item->{dc}->{subject} || "";
	    $cnf{'item:dc:creator'}   = $item->{dc}->{creator} || "";
	}
	
	$cnf{host}            = $this->{host};
	$cnf{user}            = $this->{user};
	$cnf{'last-modified'} = $rss->{'rss2imap:last-modified'};
	$cnf{'rss-link'}      = $rss->{'rss2imap:rss-link'};

	my @result;
	for my $from (@from) {
	    if ($from) {
		for my $key (keys %cnf) {
		    next unless ($cnf{$key});
		    $cnf{$key} =~ s/\./:/g if ($folder_flg);
		    my $key2 = "%{" . $key . "}";
		    $from =~ s/$key2/$cnf{$key}/eg;
		}
	    }
	    push @result, $from;
	}

	return @result;
    }
}


package main;
use Getopt::Long;
use POSIX qw(setsid);

sub parse_options {
    
    my %config = %{$RSS2IMAP::DEFAULT_GLOABL_CONFIG};

    Getopt::Long::config('bundling');
    GetOptions(
               's|host=s'          => \$config{'host'},
               'u|user=s'          => \$config{'user'},
               'P|port=s'          => \$config{'port'},
               'm|last-modified-folder=s' => \$config{'last-modified-folder'},
               'password=s'        => \$config{'password'},
               'p|prefix=s'        => \$config{'prefix'},
               'd|debug'           => \$config{'debug'},
	       'proxy=s'           => \$config{'proxy'},
	       'o|once_p'          => \$config{'once_p'},
               'n|nodaemon'        => \$config{'nodaemon'},
	       'c|cram-md5'        => \$config{'cram-md5'},
               'i|interval=s'      => \$config{'interval'},
               'S|use-ssl'         => \$config{'use-ssl'},
               );
    
    return %config;
}


sub show_usage {
    print <<"EOS"
Usage: rss2imap [options] file1 file2 ...
-s,  --host=HOST         use HOST as an IMAP host [localhost]
-P,  --port=INT          use INT as a port number [143]
-u,  --user=USER         use USER as a user name [$ENV{USER}]
     --password=PASSWORD use PASSWORD as your password
-p,  --prefx=PREFIX      use PREFIX as a folder prefix name []
                         set "INBOX" when you use courier-imap
-m,  --last-modified-folder=FOLDER
                         user "FOLDER" as an last-modified-folder
                                      [RSS.last-modified]
-c,  --cram-md5          use cram-md5 authentication
-o,  --once              run once and exit
-n,  --nodaemon          run as a forground process 
-i,  --interval=MIN      run at MIN inervals [30]
     --proxy=HOST        use HOST as a http proxy
-S   --use-ssl           use Secure Socket Layer(SSL)

EOS
;
    exit();
}

sub myread_password {
    
    my $prompt = shift @_;
    my $password = "";

    eval 'use Term::ReadPassword';
    if ($@) {
        warn "you dont install Term::ReadPassword.\n";
	warn "your password will display on console.\n";
        print $prompt;
	$password = <STDIN>;
    } else {
        $password = read_password($prompt);
    }
    chomp $password;
    return $password;
}


sub main {
    
    $| = 1; select (STDERR); $| = 1; select (STDOUT);    
    my %config = parse_options ();
    $config{list} = \@ARGV;

    show_usage () if (@{$config{list}} == 0);

    unless ($config{'password'}) {
        $config{password} = myread_password('password: ');
    }

    if ($config{once_p} || $config{debug}) {

	my $rss2imap = new RSS2IMAP (%config);
	$rss2imap->run ();	

    } else {

        {
	    # invoke rss2imap once to check whether given parameters are OK
	    my $rss2imap = new RSS2IMAP (%config);
	    $rss2imap->connect_test ();
	}

	unless (defined $config{nodaemon}) {
	    exit() if fork();
	    setsid();
	    umask(022);
	    open (STDIN, '/dev/null') or die;
	    open (STDOUT, '>>/dev/null') or die;
	    open (STDERR, '>>/dev/null') or die;
	}

	while (1) {
	    eval {
		my $rss2imap = new RSS2IMAP (%config);
		$rss2imap->run ();
	    };
	    sleep $config{interval} * 60;
	}
    }
}

main ();
