/*--------------------------------------------------------------------
 * FILE:
 *     admin_monitor.c
 *
 * NOTE:
 *     This file is composed of the admin process 
 *     Low level I/O functions that called by in these functions are 
 *     contained in 'replicate_com.c'.
 *
 *--------------------------------------------------------------------
 */

/*--------------------------------------
 * INTERFACE ROUTINES
 *
 * I/O call:
 *      
 *-------------------------------------
 */
#include "postgres.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <signal.h>
#include <sys/wait.h>
#include <ctype.h>
#include <time.h>
#include <pwd.h>
#include <sys/time.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/socket.h>
#include <sys/ipc.h>
#include <sys/shm.h>
#include <netdb.h>
#include <netinet/in.h>
#include <netinet/tcp.h>
#include <errno.h>
#include <fcntl.h>
#include <time.h>
#include <arpa/inet.h>
#include <sys/param.h>
#include <sys/select.h>
#include <sys/file.h>
#include <dirent.h>
#include <getopt.h>

#ifdef MULTIBYTE
#include "mb/pg_wchar.h"
#endif

#include "libpq/pqsignal.h"
#include "pgc_admin.h"

int PGC_Admin_Status_Monitor(int fork_wait_time);

static int monitor_startup(void);
static void monitor_loop(void);
static void check_server_status(void);
static int check_cluster_status(void);
static int check_pgrp_status(void);
static int check_pglb_status(void);
static void auto_recovery_server(void);
static int auto_recovery_cluster(void);
static int auto_recovery_pgrp(void);
static int auto_recovery_pglb(void);
#if 0
static int ping_probe(SSL_Server_Info * probe);
static int server_status_req(SSL_Info * ssl_tbl);
static int pglb_status_req(SSL_Info * ssl_tbl);
static int cluster_status_req(SSL_Info * ssl_tbl);
static int pgrp_status_req(SSL_Info * ssl_tbl);
#endif
static int startup_probe(SSL_Info * ssl_tbl, SSL_Server_Info * probe);
static int init_send_2_probe(SSL_Info * ssl_tbl);
static void exit_monitor(int signo);
static int admin_send_pglb_info_in_physical_server(SSL_Info * ssl_tbl, SSL_Server_Info * probe);
static int admin_send_cluster_info_in_physical_server(SSL_Info * ssl_tbl, SSL_Server_Info * probe);
static int admin_send_pgrp_info_in_physical_server(SSL_Info * ssl_tbl, SSL_Server_Info * probe);

int
PGC_Admin_Status_Monitor(int fork_wait_time)
{
	char * func = "PGC_Admin_Monitor()";
	pid_t pgid = 0;
	pid_t pid = 0;


	if (monitor_startup() != STATUS_OK)
	{
		show_error("%s:monitor_startup failed. ",func);
		return STATUS_ERROR;
	}

	pgid = getpgid(0);
	pid = fork();
	if (pid != 0)
	{
		return STATUS_OK;
	}

	setpgid(0,pgid);
	PGRsignal(SIGINT, exit_monitor);
	PGRsignal(SIGQUIT, exit_monitor);
	PGRsignal(SIGTERM, exit_monitor);
	PG_SETMASK(&UnBlockSig);

	if (fork_wait_time > 0) {
		sleep(fork_wait_time);
	}

	for (;;)
	{
		/* life check to all cluster dbs */
		monitor_loop();

		/* wait next lifecheck as interval */
		sleep(PGR_Lifecheck_Interval);
	}
	return STATUS_OK;
}


static void
monitor_loop(void)
{
	char * func = "monitor_loop()";

	/* check each server status */
	show_debug("%s:check_server_status start",func);
	check_server_status();
	show_debug("%s:check_server_status end",func);
	
	/* auto recovery */
	show_debug("%s:auto_recovery_server start",func);
	auto_recovery_server();
	show_debug("%s:auto_recovery_server end",func);
}

static int
monitor_startup(void)
{
	char * func = "monitor_startup()";
	SSL_Server_Info * probe = ProbeTbl;
	SSL_Info ssl_tbl;
	int count = 0;
	int i = 0;

	if (probe == NULL)
	{
		show_error("%s:ProbeTbl is NULL",func);
		return STATUS_ERROR;
	}
	
	memset(&ssl_tbl, 0, sizeof(SSL_Info));
	AdminTbl->status = DATA_INIT;
	/* send service information to each probe server */
	for(i = 0; probe->status != DATA_END; i ++, probe++)
	{
		count = 0;
		while (PGC_Create_Admin_Send_SSL(&ssl_tbl, probe->hostName, probe->portNumber) == NULL )
		{
			if (count > MAX_RETRY_TIMES )
			{
				show_error("%s:host[%s] port[%d]PGR_Create_Send_Socket failed",func, probe->hostName, probe->portNumber);
				continue;
			}
			count ++;
			sleep(1);
		}
		startup_probe(&ssl_tbl, probe);
		PGC_Close_SSL(&ssl_tbl);
	}
	AdminTbl->status = DATA_USE;

	/*
	check_server_status();
	*/
	/* start up each service */
	auto_recovery_server();

	return STATUS_OK;
}

static void
check_server_status(void)
{
	char * func ="check_server_status()";
	show_debug("%s:check_pgrp_status start",func);
	check_pgrp_status();
	show_debug("%s:check_pgrp_status  end",func);
	show_debug("%s:check_cluster_status start",func);
	check_cluster_status();
	show_debug("%s:check_cluster_status end",func);
	show_debug("%s:check_pglb_status  start",func);
	check_pglb_status();
	show_debug("%s:check_pglb_status  end",func);
}

static int
check_cluster_status(void)
{
	char * func ="check_cluster_status()";
	Cluster_Info * cluster = NULL;
	
	if (ClusterDbTbl == NULL)
	{
		show_error("%s: ClusterDbTbl is NULL",func);
		return STATUS_ERROR;
	}
	cluster = ClusterDbTbl;
	while (cluster->portNumber != 0)
	{
		uint16_t status;
		show_debug("%s:cluster->receiveStatus[%d] cluster->status[%d]\n",func,cluster->receiveStatus, cluster->status);
		status = PGC_Confirm_Cluster_Alive(cluster);
		cluster->receiveStatus = status;
		if (cluster->status != status)
		{
			cluster->status = status;
			PGC_Sync_Status_Cluster(cluster);
		}
		cluster ++;
	}
	return STATUS_OK;
}

static int
check_pgrp_status(void)
{
	char * func="check_pgrp_status()";
	Pgrp_Info * pgrp = NULL;
	if (PgrpTbl == NULL)
	{
		show_error("%s: PgrpTbl is NULL",func);
		return STATUS_ERROR;
	}
	pgrp = PgrpTbl;
	
	while (pgrp->replicationPortNumber != 0)
	{
		uint16_t status;
		status = PGC_Confirm_Pgrp_Alive(pgrp);
		show_debug("%s: pgrp->status[%d] <- status[%d]\n",func, pgrp->status, status);	
		pgrp->receiveStatus = status;
		if (pgrp->status != status)
		{
			pgrp->status = status;
			PGC_Sync_Status_Pgrp(pgrp);
		}
		pgrp ++;
	}
	return STATUS_OK;
}

static int
check_pglb_status(void)
{
	Pglb_Info * pglb = NULL;
	char * func="check_pglb_status()";
	uint16_t status = 0;

	if (PglbTbl == NULL)
	{
		show_error("%s: PglbTbl is NULL",func);
		return STATUS_ERROR;
	}
	pglb = PglbTbl;

	while (pglb->receivePortNumber != 0)
	{
		status = PGC_Confirm_Pglb_Alive(pglb);
		show_debug("%s: pglb->status[%d] status[%d]\n",func, pglb->status, status);	
		pglb->receiveStatus = status;
		if (pglb->status != status)
		{
			pglb->status = status;
			PGC_Sync_Status_Pglb(pglb);
		}
		pglb ++;
	}
	return STATUS_OK;
}

static void
auto_recovery_server(void)
{
	char * func ="auto_recovery_server()";
show_debug("%s: ------------ auto_recovery_cluster start",func);
	auto_recovery_cluster();
show_debug("%s: ------------ auto_recovery_cluster end",func);
show_debug("%s: ------------ auto_recovery_pgrp start",func);
	auto_recovery_pgrp();
show_debug("%s: ------------ auto_recovery_pgrp end",func);
show_debug("%s: ------------ auto_recovery_pglb start",func);
	auto_recovery_pglb();
show_debug("%s: ------------ auto_recovery_pglb end",func);
}

static int
auto_recovery_cluster(void)
{
	char * func = "auto_recovery_cluster()";
	Cluster_Info * cluster = NULL;
	uint16_t	packet_no = START_REQ_PKT;
	uint16_t status;
	int i = 0;

	if (ClusterDbTbl == NULL)
	{
		show_error("%s: ClusterDbTbl is NULL",func);
		return STATUS_ERROR;
	}
	cluster = ClusterDbTbl;
	/* check living cluster DB */
	while ((cluster+i)->portNumber != 0)
	{
		if ((cluster+i)->status == DATA_USE)
		{
			packet_no = RECOVERY_REQ_PKT;
			break;
		}
		i++;
	}
	while (cluster->portNumber != 0)
	{
show_debug("%s: host[%s] port[%d] status[%d]",func,cluster->hostName,cluster->portNumber, cluster->status);	
		if (cluster->status != DATA_USE)
		{
			PGC_Admin_Exec_Cluster(packet_no, cluster);
			i = 0;
			while (cluster->status != DATA_USE)
			{
				status = PGC_Confirm_Cluster_Alive(cluster);
				show_debug("%s:PGC_Confirm_Cluster_Alive[%d]",func,status);
				cluster->receiveStatus = status;
				if (cluster->status != status)
				{
					cluster->status = status;
					/*
					PGC_Sync_Status_Cluster(cluster);
					*/
				}
				if (i > MAX_EXEC_RETRY)
				{
					show_error("%s: cluster db [%s] failed to startup", func, cluster->hostName);
					break;
				}
				sleep( WATCHDOG_INTERVAL );
				i ++;
			}
		}
		cluster ++;
	}
	return STATUS_OK;
}

static int
auto_recovery_pgrp(void)
{
	char * func ="auto_recovery_pgrp()";
	Pgrp_Info * pgrp = NULL;
	uint16_t status;
	int i = 0;

	if (PgrpTbl == NULL)
	{
		show_error("%s: PgrpTbl is null",func);
		return STATUS_ERROR;
	}
	pgrp = PgrpTbl;
	
	while (pgrp->replicationPortNumber != 0)
	{
		if (pgrp->status != DATA_USE)
		{
			i = 0;
			PGC_Admin_Exec_Pgrp(START_REQ_PKT, pgrp);
			while (pgrp->status != DATA_USE)
			{
				status = PGC_Confirm_Pgrp_Alive(pgrp);
				show_debug("%s:PGC_Confirm_Pgrp_Alive[%d]",func,status);
				pgrp->receiveStatus = status;
				if (pgrp->status != status)
				{
					pgrp->status = status;
					/*
					PGC_Sync_Status_Pgrp(pgrp);
					*/
				}
				if (i > MAX_EXEC_RETRY)
				{
					show_error("%s: replicator [%s] failed to startup", func, pgrp->hostName);
					break;
				}
				sleep( WATCHDOG_INTERVAL );
				i ++;
			}
		}
		pgrp ++;
	}
	return STATUS_OK;
}

static int
auto_recovery_pglb(void)
{
	char * func = "auto_recovery_pglb()";
	Pglb_Info * pglb = NULL;
	uint16_t status;
	int i = 0;

	if (PglbTbl == NULL)
	{
		show_error("%s: PglbTbl is NULL",func);
		return STATUS_ERROR;
	}
	pglb = PglbTbl;
	
	while (pglb->receivePortNumber != 0)
	{
		if (pglb->status != DATA_USE)
		{
			i = 0;
			PGC_Admin_Exec_Pglb(START_REQ_PKT, pglb);
			while (pglb->status != DATA_USE)
			{
				status = PGC_Confirm_Pglb_Alive(pglb);
				show_debug("%s:PGC_Confirm_Pglb_Alive[%d]",func,status);
				pglb->receiveStatus = status;
				if (pglb->status != status)
				{
					pglb->status = status;
					/*
					PGC_Sync_Status_Pglb(pglb);
					*/
				}
				if (i > MAX_EXEC_RETRY)
				{
					show_error("%s: replicator [%s] failed to startup", func, pglb->hostName);
					break;
				}
				sleep( WATCHDOG_INTERVAL );
				i ++;
			}
		}
		pglb ++;
	}
	return STATUS_OK;
}
	
#if 0
static int
ping_probe(SSL_Server_Info * probe)
{
	char * func = "ping_probe()";
	int count = 0;
	int status;
	SSL_Info ssl_tbl;

	if (probe == NULL)
	{
		show_error("%s: probe is not available",func);
		return STATUS_ERROR;
	}
	memset(&ssl_tbl, 0, sizeof(SSL_Info));
	count = 0;
	while (PGC_Create_Admin_Send_SSL(&ssl_tbl, probe->hostName, probe->portNumber) == NULL )
	{
		PGC_Close_SSL(&ssl_tbl);
		if (count > MAX_RETRY_TIMES )
		{
			show_error("%s:host[%s] port[%d]PGR_Create_Send_Socket failed",func, probe->hostName, probe->portNumber);
			return STATUS_ERROR;
		}
		count ++;
		sleep(1);
	}
	if (probe->status == DATA_USE)
	{
		status = server_status_req(&ssl_tbl);
	}
	else
	{
		status = startup_probe(&ssl_tbl, probe);
	}
	PGC_Close_SSL(&ssl_tbl);
	return status;
}

static int
server_status_req(SSL_Info * ssl_tbl)
{
	int status;

	status = pglb_status_req(ssl_tbl);
	if (status != STATUS_OK)
		return status;
	status = cluster_status_req(ssl_tbl);
	if (status != STATUS_OK)
		return status;
	status = pgrp_status_req(ssl_tbl);
	if (status != STATUS_OK)
		return status;
	return status;
}

static int
pglb_status_req(SSL_Info * ssl_tbl)
{
	int status;
	Probe_Header r_header;
	Probe_Header h_data;
	
	/* set header */
	h_data.packet_no = GET_STS_REQ_PKT;
	h_data.serverType = SERVER_TYPE_PGLB;
	h_data.body_length = 0;
	h_data.rec_num = 0;
	PGC_Set_Packet_Header(&r_header, &h_data);

	/* pglb status request */
	status = PGC_Send_Status_Packet(ssl_tbl, &r_header, NULL);

	/* receive response packet */
	if (status == STATUS_OK)
	{
		PGC_Response_Receive(ssl_tbl, &r_header);
	}
	return status;
}

static int
cluster_status_req(SSL_Info * ssl_tbl)
{
	int status;
	Probe_Header r_header;
	Probe_Header h_data;
	
	/* set header */
	h_data.packet_no = GET_STS_REQ_PKT;
	h_data.serverType = SERVER_TYPE_CLUSTER;
	h_data.body_length = 0;
	h_data.rec_num = 0;
	PGC_Set_Packet_Header(&r_header, &h_data);
	
	/* cluster db status request */
	status = PGC_Send_Status_Packet(ssl_tbl, &r_header, NULL);

	/* receive response packet */
	if (status == STATUS_OK)
	{
		PGC_Response_Receive(ssl_tbl, &r_header);
	}
	return status;
}

static int
pgrp_status_req(SSL_Info * ssl_tbl)
{
	int status;
	Probe_Header r_header;
	Probe_Header h_data;
	
	/* set header */
	h_data.packet_no = GET_STS_REQ_PKT;
	h_data.serverType = SERVER_TYPE_PGRP;
	h_data.body_length = 0;
	h_data.rec_num = 0;
	PGC_Set_Packet_Header(&r_header, &h_data);

	/* pgrp status request */
	status = PGC_Send_Status_Packet(ssl_tbl, &r_header, NULL);

	/* receive response packet */
	if (status == STATUS_OK)
	{
		PGC_Response_Receive(ssl_tbl, &r_header);
	}
	return status;
}
#endif
static int 
startup_probe(SSL_Info * ssl_tbl, SSL_Server_Info * probe)
{
	char * func = "startup_probe()";
	Probe_Header header;
/*
	SSL_Info new_ssl_tbl;
	int count = 0;
*/
	
	if (probe == NULL)
	{
		return STATUS_ERROR;
	}
	
	/* send initial ping packet */
	if(init_send_2_probe(ssl_tbl) != STATUS_OK)
	{
		show_error("%s:init_send_2_probe failed\n",func);
		return STATUS_ERROR;
	}
	/* receive response packet */
	if (PGC_Response_Receive(ssl_tbl, &header) == NULL)
	{
		show_error("%s: PGC_Response_Receive failed",func);
		return STATUS_ERROR;
	}
	probe->status = DATA_USE;
	if (ntohs(header.packet_no) == INIT_OK_PKT)
	{
		show_debug("%s:INIT_OK_PKT recv\n",func);
	}
	else if (ntohs(header.packet_no) == INIT_INFO_REQ_PKT)
	{
		show_debug("%s:admin_send_pgrp_info_in_physical_server start",func);
		admin_send_pgrp_info_in_physical_server(ssl_tbl, probe);
		show_debug("%s:admin_send_pgrp_info_in_physical_server end",func);

		show_debug("%s:admin_send_cluster_info_in_physical_server start",func);
		admin_send_cluster_info_in_physical_server(ssl_tbl, probe);
		show_debug("%s:admin_send_cluster_info_in_physical_server end",func);

		show_debug("%s:admin_send_pglb_info_in_physical_server start",func);
		admin_send_pglb_info_in_physical_server(ssl_tbl, probe);
		show_debug("%s:admin_send_pglb_info_in_physical_server end",func);
	}
	return STATUS_OK;
}

static int
init_send_2_probe(SSL_Info * ssl_tbl)
{
	int status;
	Probe_Header r_header;
	Probe_Header h_data;
	SSL_Server_Info body;

	h_data.packet_no = INIT_NOTICE_PKT;
	h_data.serverType = SERVER_TYPE_ADMIN;
	h_data.body_length = sizeof(SSL_Server_Info);
	h_data.rec_num = 1;
	memset(&r_header, 0, sizeof(Probe_Header));
	memset(&body, 0, sizeof(SSL_Server_Info));
	PGC_Set_Packet_Header(&r_header, &h_data);
	PGC_Set_SSL_Server_Info_2_packet(&body, AdminTbl);
	status = PGC_Send_Status_Packet(ssl_tbl, &r_header, (char *)&body);
	return status;
}	

static void
exit_monitor(int signo)
{
	exit(0);
}

static int
admin_send_pglb_info_in_physical_server(SSL_Info * ssl_tbl, SSL_Server_Info * probe)
{
	char * func ="admin_send_pglb_info_in_physical_server()";
	int status = STATUS_ERROR;
	Probe_Header r_header;
	Probe_Header h_data;
	Pglb_Info * pglb = NULL;
	Pglb_Info body;
	
	if (PglbTbl == NULL)
	{
		show_error("%s: PglbTbl is null",func);
		return STATUS_ERROR;
	}
	if (probe == NULL)
	{
		show_error("%s: probe is not available in this server",func );
		return STATUS_ERROR;
	}

	pglb = PglbTbl;
	while (pglb->receivePortNumber != 0)
	{
		if (pglb->physicalServerId == probe->physicalServerId)
		{
			h_data.packet_no = INFO_NOTICE_PKT;
			h_data.serverType = SERVER_TYPE_PGLB;
			h_data.body_length = sizeof(Pglb_Info);
			h_data.rec_num = 1;
			memset(&r_header, 0, sizeof(Probe_Header));
			memset(&body, 0, sizeof(Pglb_Info));
			PGC_Set_Packet_Header(&r_header, &h_data);
			PGC_Set_Pglb_Info_2_packet(&body, pglb);
			status = PGC_Send_Status_Packet(ssl_tbl, &r_header, (char *)&body);
			/* receive response packet */
			PGC_Response_Receive(ssl_tbl, &r_header);
		}
		pglb ++;
	}
	return status;
}

static int
admin_send_cluster_info_in_physical_server(SSL_Info * ssl_tbl, SSL_Server_Info * probe)
{
	char * func ="admin_send_cluster_info_in_physical_server()";
	int status = STATUS_ERROR;
	Probe_Header r_header;
	Probe_Header h_data;
	Cluster_Info * cluster = NULL;
	Cluster_Info body;
	
	if (ClusterDbTbl == NULL)
	{
		return STATUS_ERROR;
	}
	if (probe == NULL)
	{
		show_error("%s: probe is not available in this server",func);
		return STATUS_ERROR;
	}
	cluster = ClusterDbTbl;
	while (cluster->portNumber != 0)
	{
		if (cluster->physicalServerId == probe->physicalServerId)
		{
			h_data.packet_no = INFO_NOTICE_PKT;
			h_data.serverType = SERVER_TYPE_CLUSTER;
			h_data.body_length = sizeof(Cluster_Info);
			h_data.rec_num = 1;
			memset(&r_header, 0, sizeof(Probe_Header));
			memset(&body, 0, sizeof(Cluster_Info));
			PGC_Set_Packet_Header(&r_header, &h_data);
			PGC_Set_Cluster_Info_2_packet(&body, cluster);
			status = PGC_Send_Status_Packet(ssl_tbl, &r_header, (char *)&body);
			/* receive response packet */
			PGC_Response_Receive(ssl_tbl, &r_header);
		}
		cluster ++;
	}
	return status;
}

static int
admin_send_pgrp_info_in_physical_server(SSL_Info * ssl_tbl, SSL_Server_Info * probe)
{
	char * func="admin_send_pgrp_info_in_physical_server()";
	int status = STATUS_ERROR;
	Probe_Header r_header;
	Probe_Header h_data;
	Pgrp_Info * pgrp = NULL;
	Pgrp_Info body;
	
	if (PgrpTbl == NULL)
	{
		show_error("%s:PgrpTbl is null",func);
		return STATUS_ERROR;
	}
	if (probe == NULL)
	{
		show_error("%s: probe is not available in this server",func);
		return STATUS_ERROR;
	}
	pgrp = PgrpTbl;
	while (pgrp->replicationPortNumber != 0)
	{
		if (pgrp->physicalServerId == probe->physicalServerId)
		{
			h_data.packet_no = INFO_NOTICE_PKT;
			h_data.serverType = SERVER_TYPE_PGRP;
			h_data.body_length = sizeof(Pgrp_Info);
			h_data.rec_num = 1;
			memset(&r_header, 0, sizeof(Probe_Header));
			memset(&body, 0, sizeof(Pgrp_Info));
			PGC_Set_Packet_Header(&r_header, &h_data);
			PGC_Set_Pgrp_Info_2_packet(&body, pgrp);
			status = PGC_Send_Status_Packet(ssl_tbl, &r_header, (char*)&body);
			/* receive response packet */
			PGC_Response_Receive(ssl_tbl, &r_header);
		}
		pgrp ++;
	}
	return status;
}
