#!/bin/sh

# Author: Aleksey Tulinov <aleksey.tulinov@gmail.com>
# Contributor: SumolX <https://github.com/SumolX>

DEFAULT_CONFIG_DIR="/etc/leech"

usage() {
    echo "Usage: CONFIG_DIR=\"<DIRECTORY>\" DOWNLOADS_DIR=\"<DIRECTORY>\" $0"
    echo
    echo "Or you could omit DOWNLOADS_DIR to download files to current directory."
    echo "You could also omit CONFIG_DIR to use \"$DEFAULT_CONFIG_DIR\" by default"
}

if [ -z "$CONFIG_DIR" ]; then
    echo "WARNING: CONFIG_DIR is not set, assuming it's \"$DEFAULT_CONFIG_DIR\""
    CONFIG_DIR="$DEFAULT_CONFIG_DIR"
fi

HERE="$(dirname "$0")"
RFC822TOUNIX="$HERE/rfc822tounix"
WILD_MAGIC="$HERE/leech-wild-magic"
CONFIG="$HERE/leech-config"

if [ ! -f $CONFIG ]; then
    echo "ERROR: No config file found at $CONFIG"
    echo
    usage
    exit 1
fi

. "$CONFIG"

# sanity checks
#
if [ ! -f "$XSL_TEMPLATE" ]; then
    echo "ERROR: No XSL template at \"$XSL_TEMPLATE\""
    echo "You probably need to reinstall leech."
    exit 1
fi

if [ ! -f "$FOODS" ]; then
    echo "ERROR: No feeds file at \"$FOODS\""
    echo
    usage
    exit 1
fi

if [ ! -f "$DOWNLOADS" ]; then
    echo "ERROR: No downloads file at \"$DOWNLOADS\""
    echo
    usage
    exit 1
fi

# defaults
#
if [ -z "$DOWNLOADS_DIR" ]; then
    echo "WARNING: DOWNLOADS_DIR is not set, assuming it's current directory."
    DOWNLOADS_DIR=.
fi

[ -z "$DOWNLOAD_RECIPE" ] && DOWNLOAD_RECIPE="leech-default"
[ -z "$PERSISTENCE" ] && PERSISTENCE=$DOWNLOADS_DIR
[ -z "$EXPIRATION" ] && EXPIRATION=3 # day
[ -z "$QUIET_PERIOD" ] && QUIET_PERIOD=0 # ineffective by default
[ -z "$HISTORY" ] && HISTORY=14 # day
[ -z "$DOWNLOAD_DELAY" ] && DOWNLOAD_DELAY=5 # seconds
[ -z "$TIMEOUT" ] && TIMEOUT=30 # seconds

DOWNLOAD_RECIPE="$(dirname "$0")/$DOWNLOAD_RECIPE" # fix recipe path (if running w/o shell)
EXPIRATION=$(($EXPIRATION * 86400)) # set expiration in seconds, days * seconds in a day
HISTORY=$(($HISTORY * 86400)) # set history in seconds, days * seconds in a day
DB="$PERSISTENCE/.leech.db" # setup DB path
QUIET_PERIOD=$(($QUIET_PERIOD * 3600)) # set quiet period in seconds, hours * 60 * 60

# some more sanity checks
#
if [ $QUIET_PERIOD -ge $EXPIRATION ]; then
    echo "ERROR: You configured QUIET_PERIOD=$(($QUIET_PERIOD / 3600)) hour(s) to be larger or equal to EXPIRATION=$(($EXPIRATION / 86400)) day(s), meaning no files will be ever downloaded"
    echo "ERROR: I would disable QUIET_PERIOD for you, but you need to fix your configuration really"
    exit 1
fi

if [ $(($EXPIRATION - $QUIET_PERIOD)) -le 3600 ]; then
    echo "WARNING: Difference between EXPIRATION and QUIET_PERIOD is $((($EXPIRATION - $QUIET_PERIOD) / 60)) minutes"
    echo "WARNING: Leaving you less than an hour window to fetch files. I wish you good luck with that."
fi

# temporary file with RSS-feed
LUNCH=$(mktemp -t leech.lunch.XXXXXX)
trap "rm -f '$LUNCH'" 1 2 3 15

# cURL options
#
CURL_LUNCH_OPTS="-k -s -f -L -g --connect-timeout $TIMEOUT --max-time $TIMEOUT" # cURL options for downloading lunch
# -L - to follow redirects
# -k - to ignore invalid certificates
# -s - for silent, no output
# -f - for not outputting failed download to file (lunch)
# -g - to disable globbing in URLs

# misc options
#
SED_REGEX="([^ ]+) (.*) (.+) \"(.*)\"$" # \1 is feed's URL, \2 is title, \3 is torrent URL, \4 is datetime in RFC822
XSLT_OPTS="--novalid" # skip loading DTDs during XSL transformation

# prepare environment
#

# create downloads dir
if [ ! -d "$DOWNLOADS_DIR" ]; then
    echo "WARNING: Downloads directory \"$DOWNLOADS_DIR\" doesn't exist, creating it."
    mkdir -p "$DOWNLOADS_DIR" || exit 1
fi

if [ ! -d "$PERSISTENCE" ]; then
    echo "WARNING: Persistence directory \"$PERSISTENCE\" doesn't exist, creating it"
    mkdir -p "$PERSISTENCE" || exit 1
fi

# current time
NOW=$(date -u +%s)

# downloading
#
cat "$FOODS" | grep -v -e "^\s*#" -e "^$" | while read FOOD; do
    # download lunch
    #
    echo -n "Downloading feed: $FOOD... "
    curl $CURL_LUNCH_OPTS -b "$COOKIE" -o "$LUNCH" "$FOOD" && echo "OK"
    RET=$?

    # don't parse lunch if download failed
    #
    if [ $RET -ne 0 ]; then
        echo "Failed: $RET"
        rm -f "$LUNCH"
        continue
    fi

    # search lunch for patterns
    # all matching is done by WILD_MAGIC with LUNCH at stdin
    #
    # p.s. this also prepends $FOOD to each line, so you can
    #      match <FEED> <TITLE> in your filter
    #
    xsltproc $XSLT_OPTS "$XSL_TEMPLATE" "$LUNCH" | sed -r -e "s|^|$FOOD |" \
        | DOWNLOADS="$DOWNLOADS" REVERSE_DOWNLOADS="$REVERSE_DOWNLOADS" WILD_DOWNLOADS="$WILD_DOWNLOADS" $WILD_MAGIC \
    | while read STR; do
        URL=$(echo $STR | sed -r -e "s/$SED_REGEX/\3/")

        # FIXME: filter out empty lines or not effective anymore (after grep -re $GREP_FILTER_COMMENTS)
        [ -z "$URL" ] && continue

        MD5=$(echo -n "$URL" | md5sum | cut -c 1-32)

        # check md5 of URL for duplicates
        grep "$MD5" "$DB" >/dev/null 2>&1
        [ $? -eq 0 ] && echo "Skipping $URL: already downloaded" && continue

        # timestamp in RSS is in RFC822 format, it need to be converted to string understandable by `date`
        TIMESTAMP=$(echo $STR | sed -r -e "s/$SED_REGEX/\4/")
        UNIXTIME=$($RFC822TOUNIX "$TIMESTAMP")
        if [ ! $? -eq 0 ]; then
            echo "WARNING: RSS timestamp ($TIMESTAMP) can't be parsed correctly, expiration feature might not work properly"
            UNIXTIME=""
        fi

        # if pub date is available
        if [ ! -z "$UNIXTIME" ]; then
            AGE=$(($NOW - $UNIXTIME))
            [ $AGE -lt 0 ] && echo "WARNING: $URL pub date ($UNIXTIME) is in the future, EXPIRATION and QUIET_PERIOD features obviously won't work correctly, disabling them for this file"

            if [ $AGE -ge 0 ]; then
                # check entry for expiration and quiet period
                [ $AGE -gt $EXPIRATION ] && echo "Skipping $URL: expired" && continue
                [ $AGE -lt $QUIET_PERIOD ] && echo "Skipping $URL for now: too fresh" && continue
            fi
        fi

        echo -n "Downloading: $URL... "
        sleep $DOWNLOAD_DELAY

        TITLE=$(echo $STR | sed -r -e "s/$SED_REGEX/\2/")

        DISH=$(LEECH_DOWNLOADS_DIR="$DOWNLOADS_DIR" \
        LEECH_CONFIG_DIR="$CONFIG_DIR" \
        LEECH_FEED_URL="$FOOD" \
        LEECH_URL="$URL" \
        LEECH_URL_MD5="$MD5" \
        LEECH_TITLE="$TITLE" \
        LEECH_PUBDATE="$TIMESTAMP" \
        LEECH_TIMEOUT="$TIMEOUT" \
        LEECH_TARGET_DIR="$TARGET_DIR" \
        $DOWNLOAD_RECIPE 2>&1) && echo "OK"

        RET=$?
        [ $RET -ne 0 ] && echo "Failed: $RET ($DISH)" && continue

        # make a record in DB about downloaded file
        echo "$MD5 $NOW" >>"$DB"
    done

    # cleanup
    rm -f "$LUNCH"
done

# delete old records from DB
#
if [ -f "$DB" ]; then
    DB_TMP="$DB.tmp"
    rm -f "$DB_TMP" && touch "$DB_TMP"

    # write new database to tmp file, put only lines that are not expired yet
    cat "$DB" | while read LINE; do
        TIMESTAMP=$(echo "$LINE" | cut -d ' ' -f 2)

        [ $(($NOW - $TIMESTAMP)) -lt $HISTORY ] && echo "$LINE" >>"$DB_TMP"
    done

    # replace db with new one
    [ -f "$DB_TMP" ] && mv "$DB_TMP" "$DB"
fi
