#!/bin/sh
#
#       Shared Disk File EXclusiveness (SF-EX) OCF RA. 
#       prevent a destruction of data on shared disk file system 
#	due to Split-Brain.
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
# 
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
# 
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  
# 02110-1301, USA.
#
# Copyright (c) 2007 NIPPON TELEGRAPH AND TELEPHONE CORPORATION
#
#######################################################################
#
#	  OCF parameters are as below:
#		OCF_RESKEY_device
#		OCF_RESKEY_index
#		OCF_RESKEY_collision_timeout
#		OCF_RESKEY_lock_timeout
#		OCF_RESKEY_fsck
#		OCF_RESKEY_fsck_mode
#		OCF_RESKEY_halt
#
#
#OCF_RESKEY_device
#	Block device path that stores exclusive control data.
#
#OCF_RESKEY_index
#	Position in block device where exclusive control data is stored.
# 	1 or more is specified. Default is 1.
#
#OCF_RESKEY_collision_timeout
#	Waiting time when a collision of lock acquisition is detected.
#	Default is 1 second.
#
#OCF_RESKEY_lock_timeout
#	Valid term of lock is set by the second.
#	Default is 20 seconds.
#
#	OCF_RESKEY_monitor_interval
#	Monitor interval is set by the second.
#	Default is 10 seconds.
#
#OCF_RESKEY_fsck
#	When lock is acquired, this command is executed.
#	Default is null. fsck command line is usually specified.
#
#OCF_RESKEY_fsck_mode
#	The condition for executing the command specified in 
#	OCF_RESKEY_fsck. Whenever the lock is acquired, the command
#	will be executed if this parameter is "yes". If this parameter
#	is "check", it is unclean status and only when the lock is 
#	acquired, executes it. If this parameter is "no", it never
#	executes it. 
#	Default is "check".
#	Unclean status means that lock data exists despite lock is
#	invalid due to time out(lock_timeout).
#
#OCF_RESKEY_halt
#	When SF-EX fails in the maintenance of the acquired lock (monitor
#	failed), this command is executed. 
#	Default is null.
#	(like "halt -nf", "echo b > /proc/sysrq-trigger").
#	If entering the state of double mount, this function is used
#	to make the risk minimum.
#
# NOTE:
#	As a prerequisite for running SF-EX, one device should be
#	initialized as below.
#
#		sfex_init [-b <blocksize>] [-n <numlocks>] <device>
#
#	Example:
#
#		/usr/lib/heartbeat/sfex_init -b 512 -n 10 /dev/sdb1
#
#	if further information is necessary, See README.
#
#######################################################################
# Initialization:

#. /usr/lib/heartbeat/ocf-shellfuncs

# switching ocf-shellfuncs path
if [ -f ${OCF_ROOT}/resource.d/heartbeat/.ocf-shellfuncs ]; then
        FUNCTION_FILE="${OCF_ROOT}/resource.d/heartbeat/.ocf-shellfuncs"
elif [ -f /usr/lib64/heartbeat/ocf-shellfuncs ]; then
        FUNCTION_FILE="/usr/lib64/heartbeat/ocf-shellfuncs"
elif [ -f /usr/lib/heartbeat/ocf-shellfuncs ]; then
        FUNCTION_FILE="/usr/lib/heartbeat/ocf-shellfuncs"
else
        echo "${OCF_RESOURCE_INSTANCE} ocf-shellfuncs file doesn't exist." >&2
        exit 1
fi

. ${FUNCTION_FILE}

unset LC_ALL; export LC_ALL
unset LANGUAGE; export LANGUAGE

#######################################################################

SFEX_LOCK=/usr/lib64/heartbeat/sfex_lock
SFEX_UNLOCK=/usr/lib64/heartbeat/sfex_unlock
SFEX_UPDATE=/usr/lib64/heartbeat/sfex_update
SFEX_STAT=/usr/lib64/heartbeat/sfex_stat

usage() {
    cat <<END
    usage: $0 {start|stop|status|monitor|meta-data}
END
}

meta_data() {
    cat <<END
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="sfex">
<version>1.3</version>

<longdesc lang="en">
Resource script for SF-EX. It manages a shared storage medium exclusively .
</longdesc>
<shortdesc lang="en">SF-EX resource agent</shortdesc>

<parameters>
<parameter name="device" unique="0" required="1">
<longdesc lang="en">
Block device path that stores exclusive control data.
</longdesc>
<shortdesc lang="en">block device</shortdesc>
<content type="string" default="" />
</parameter>
<parameter name="index" unique="0" required="0">
<longdesc lang="en">
Location in block device where exclusive control data is stored. 1 or more is specified. Default is 1.
</longdesc>
<shortdesc lang="en">index</shortdesc>
<content type="integer" default="1" />
</parameter>
<parameter name="collision_timeout" unique="0" required="0">
<longdesc lang="en">
Waiting time when a collision of lock acquisition is detected. Default is 1 second.
</longdesc>
<shortdesc lang="en">waiting time for lock acquisition</shortdesc>
<content type="integer" default="1" />
</parameter>
<parameter name="monitor_interval" unique="0" required="0">
<longdesc lang="en">
Monitor interval(sec). Default is 10 seconds
</longdesc>
<shortdesc lang="en">monitor interval</shortdesc>
<content type="integer" default="10" />
</parameter>
<parameter name="lock_timeout" unique="0" required="0">
<longdesc lang="en">
Valid term of lock(sec). Default is 20 seconds.
</longdesc>
<shortdesc lang="en">Valid term of lock</shortdesc>
<content type="integer" default="20" />
</parameter>
<parameter name="fsck_mode" unique="0" required="0">
<longdesc lang="en">
Condition for executing fsck command. It always execute if it's "yes", it execute only when it is not clean if it is "check", and if it is "no", it never executes it. Default is "check".
</longdesc>
<shortdesc lang="en">Condition for executing fsck</shortdesc>
<content type="string" default="" />
</parameter>
<parameter name="fsck" unique="0" required="0">
<longdesc lang="en">
Command executed when lock is acquired. Default is null. The fsck command line is specified usually.
</longdesc>
<shortdesc lang="en">fsck command line</shortdesc>
<content type="string" default="" />
</parameter>
<parameter name="halt" unique="0" required="0">
<longdesc lang="en">
Command executed when failing in maintenance of lock. Default is null. The command line that urgent stops OS is specified usually. 
</longdesc>
<shortdesc lang="en">halt command line</shortdesc>
<content type="string" default="" />
</parameter>
</parameters>

<actions>
<action name="start" timeout="600" />
<action name="stop" timeout="10" />
<action name="status" depth="0" timeout="10" interval="10" start-delay="10" />
<action name="monitor" depth="0" timeout="10" interval="10" start-delay="10" />
<action name="meta-data" timeout="5" />
</actions>
</resource-agent>
END
}

acquire_lock() {
    $SFEX_LOCK -i $INDEX -c $COLLISION_TIMEOUT -t $LOCK_TIMEOUT $DEVICE
}

release_lock() {
    $SFEX_UNLOCK -i $INDEX $DEVICE
}

stat_lock() {
    $SFEX_STAT -i $INDEX $DEVICE
}

update_lock() {
    $SFEX_UPDATE -i $INDEX $DEVICE
}

# Function that starts periodic command execution
# The first argument is execution interval. This is specified at the
# second. The second argument is command.
start_interval() {
    # To execute the command by receiving the SIGUSR1 signal, the trap is set.
    trap $2 10 # SIGUSR1

    # The process that send SIGUSR1 signal periodically is started in the background.
    interval $1 $$ &
    TIMER_PID=$!
}

# Function that stops periodic command executions
stop_interval() {
    # stop the signal sending process
    if [ -n "$TIMER_PID" ]; then
	kill $TIMER_PID >/dev/null 2>&1
	wait $TIMER_PID >/dev/null 2>&1
	unset TIMER_PID
    fi

    # release SIGUSR1 signal receive trap 
    trap - 10 # SIGUSR1
}

# Function that send SIGUSR1 signal periodically.
# The first argument is interval of signal sending by the number of seconds.
# The second argument is source process id of signal.
interval() {
    while :; do
	sleep $1
	kill -10 $2
    done
}

# Function that update lock while executing fsck
update_lock_for_fsck() {
    update_lock >$LOCK_STDOUT_FILE 2>$LOCK_STDERR_FILE
    LOCK_RCODE=$?
    case $LOCK_RCODE in
	0)
	    ocf_log info "Lock update success (fsck is executing)."
	    ;;
	*)
	    ocf_log warn "Lock update failure (fsck is executing)."

	    # When the lock update fails, we send SIGTERM signal to
	    # interrupt fsck.
	    [ -n "$FSCK_PID" ] && kill $FSCK_PID >/dev/null 2>&1
	    ;;
    esac
}

# cat by standard output and a standard error of all fsck function#
cat_all_fsckfile() {
    cat $FSCK_STDOUT_FILE
    cat $FSCK_STDERR_FILE >&2
}

# fsck execute function
# The lock update is continuously done at the same as executing fsck.
do_fsck() {
    ocf_log info "fsck command ($FSCK) start..."
    LOCK_RCODE=

    # The periodic lock update is started.
    start_interval $MONITOR_INTERVAL update_lock_for_fsck

    # fsck is started in the background.
    eval "$FSCK" >$FSCK_STDOUT_FILE 2>$FSCK_STDERR_FILE &
    FSCK_PID=$!

    # Waiting for ending of fsck.
    # "wait" is returned by the signal interrupt, too. 
    # Therefor, it loops confirming the existence of the fsck process.
    while kill -0 $FSCK_PID >/dev/null 2>&1; do
	wait $FSCK_PID >/dev/null 2>&1
	FSCK_RCODE=$?
    done
    unset FSCK_PID

    # stop periodic lock update
    stop_interval

    # check the result.
    case "$LOCK_RCODE" in
	""|0)
	    case "$FSCK_RCODE" in
		0|1)
		    ocf_log info "fsck command success."
		    cat_all_fsckfile
		    ;;
		*)
		    ocf_log err "fsck command failure ($FSCK_RCODE)."
		    cat_all_fsckfile
		    exit $OCF_ERR_GENERIC
		    ;;
	    esac
	    ;;
	2)
	    ocf_log warn "Fsck command interruption due to lock update failure."
	    exit $OCF_ERR_GENERIC
	    ;;
	*)
	    ocf_log err "Fsck command interruption due to lock update error($LOCK_RCODE)."
	    cat $LOCK_STDERR_FILE >&2
	    exit $OCF_ERR_GENERIC
	    ;;
    esac
}

#
# START: Exclusive control starts.
#
# It loops permanently until the lock can be acquired when locked with 
# the other node. In this case, the reception of the stop signal by the 
# timeout time passage set to CIB becomes the only stop opportunity. 
#
sfex_start() {
    ocf_log info "sfex_start: started..."

    # Retry loop when lock acquisition fails(Be locking with the other node).
    while :; do
	# Lock acquisition
	acquire_lock >$LOCK_STDOUT_FILE 2>$LOCK_STDERR_FILE
	LOCK_RCODE=$?
	case $LOCK_RCODE in
	    0) # Lock acquisition success
		ocf_log info "Lock acquisition success (clean)."
		NEED_FSCK=no
		break
		;;
	    1) # Lock acquisition failure (Exclusive control data before lock acquisition was unclean)
		ocf_log info "Lock acquisition success (unclean)."
		NEED_FSCK=yes
		break
		;;
	    2) # Lock acquisition failure
		ocf_log warn "Lock acquisition failure (other node is locking)."
		;;
	    *) # Lock acquisition error
		ocf_log err "Lock acquisition error ($LOCK_RCODE)."
		cat $LOCK_STDERR_FILE >&2
		exit $OCF_ERR_GENERIC
		;;
	esac
    done

    # Execute fsck
    if [ -n "$FSCK" ] && [ "$FSCK_MODE" = yes -o "$FSCK_MODE" = check -a  $NEED_FSCK = yes ]; then
	do_fsck
    fi

    ocf_log info "sfex_start: complete."
    return $OCF_SUCCESS
}

#
# STOP: stop exclusive control 
#
sfex_stop() {
    ocf_log info "sfex_stop: started..."

    # lock release
    release_lock >$LOCK_STDOUT_FILE 2>$LOCK_STDERR_FILE
    LOCK_RCODE=$?
    case $LOCK_RCODE in
	0) # lock release success
	    ocf_log info "Lock release success."
	    ;;
	1) # lock release is unnecessary. (self node is not locked)
	    ocf_log info "Lock release is unnecessary. (self node is not locked)."
	    ;;
	*) # lock release error 
	    ocf_log warn "Lock release error ($LOCK_RCODE)."
	    cat $LOCK_STDERR_FILE
	    ocf_log warn "The error is ignored at the stop."
	    ;;
    esac

    ocf_log info "sfex_stop: complete."
    return $OCF_SUCCESS
}

#
# STATUS: exclusive control status check
#
sfex_status() {
    ocf_log info "sfex_status: started..."

    # lock status check
    stat_lock >$LOCK_STDOUT_FILE 2>$LOCK_STDERR_FILE
    LOCK_RCODE=$?
    case $LOCK_RCODE in
	0) # locking 
	    ocf_log info "lock status: own node is locking."
	    RCODE=$OCF_SUCCESS
	    ;;
	2) # unlocking
	    ocf_log info "lock status: own node is not lock."
	    RCODE=$OCF_NOT_RUNNING
	    ;;
	*) # lock status check error 
	    ocf_log err "lock status check error ($LOCK_RCODE)."
	    cat $LOCK_STDERR_FILE >&2
	    exit $OCF_ERR_GENERIC
	    ;;
    esac

    ocf_log info "sfex_status: complete."
    return $RCODE
}

#
# MONITOR: update exclusive control(update lock data)
#
sfex_monitor() {
    ocf_log debug "sfex_monitor: started..."

    if [ "${OCF_RESKEY_CRM_meta_interval:-0}" -eq "0" ]; then
	# in case of probe, monitor operation is surely treated as
	# under suspension. This will call start operation.
	ocf_log info "probe..."
	return $OCF_NOT_RUNNING
    fi

    # lock update 
    update_lock >$LOCK_STDOUT_FILE 2>$LOCK_STDERR_FILE
    LOCK_RCODE=$?
    case $LOCK_RCODE in
	0) # lock update success
	    ocf_log debug "lock update success."
	    RCODE=$OCF_SUCCESS
	    ;;
	2) # lock update failure
	    ocf_log warn "lock update failure(own node dose not lock)."
	    RCODE=$OCF_NOT_RUNNING

	    # halt command is executed
	    if [ -n "$HALT" ]; then
		ocf_log info "halt command ($HALT) is executed."
		eval "$HALT"
	    fi
	    ;;
	*) # lock update error
	    ocf_log err "lock update error ($LOCK_RCODE)."
	    cat $LOCK_STDERR_FILE >&2
	    exit $OCF_ERR_GENERIC
	    ;;
    esac

    ocf_log debug "sfex_monitor: complete."
    return $RCODE
}

#
# main process 
#

# check arguments
if [ $# -ne 1 ]; then
    usage
    exit $OCF_ERR_ARGS
fi
OP=$1

# These operations do not require instance parameters
case $OP in
    meta-data)
	meta_data
	exit $OCF_SUCCESS
	;;
    usage)
	usage
	exit $OCF_SUCCESS
	;;
esac

# check parameters
DEVICE=$OCF_RESKEY_device
INDEX=${OCF_RESKEY_index:-1}
COLLISION_TIMEOUT=${OCF_RESKEY_collision_timeout:-1}
LOCK_TIMEOUT=${OCF_RESKEY_lock_timeout:-20}
MONITOR_INTERVAL=${OCF_RESKEY_monitor_interval:-10}
FSCK="$OCF_RESKEY_fsck"
FSCK_MODE=${OCF_RESKEY_fsck_mode:-check}
HALT="$OCF_RESKEY_halt"

blockdevice=no
if [ -z "$DEVICE" ]; then
    ocf_log err "Please set OCF_RESKEY_device to device for sfex meta-data"
    exit $OCF_ERR_ARGS
fi
if [ ! -w "$DEVICE" ]; then
    ocf_log warn "Couldn't find device [$DEVICE]. Expected /dev/??? to exist"
fi

# temporary files 
FSCK_STDOUT_FILE=/tmp/$$-f1
FSCK_STDERR_FILE=/tmp/$$-f2
LOCK_STDOUT_FILE=/tmp/$$-l1
LOCK_STDERR_FILE=/tmp/$$-l2

# cleanup function on exit
on_exit() {
    if [ -n "$FSCK_PID" ]; then
	kill $FSCK_PID >/dev/null 2>&1
	wait $FSCK_PID >/dev/null 2>&1
	unset FSCK_PID
    fi
    stop_interval
    rm -rf $FSCK_STDOUT_FILE $FSCK_STDERR_FILE $LOCK_STDOUT_FILE $LOCK_STDERR_FILE
}

# registration of cleanup function on exit
trap on_exit 0

case $OP in
    start)
	case "$FSCK_MODE" in
	    yes|check|no)
		;;
	    *)
		ocf_log err "Invalid OCF_RESKEY_fsck_mode. it must be 'yes', 'check' or 'no'"
		exit $OCF_ERR_ARGS
		;;
	esac
	sfex_start
	;;
    stop)
	sfex_stop
	;;
    status)
	sfex_status
	;;
    monitor)
	sfex_monitor
	;;
    *)
	exit $OCF_ERR_UNIMPLEMENTED
	;;
esac
exit $?
