/*****************************************************************************/
/* The development of this program is partly supported by IPA                */
/* (Information-Technology Promotion Agency, Japan).                         */
/*****************************************************************************/

/*****************************************************************************/
/*  bt_relfs.c - branch trace module (relayfs interface)                     */
/*  Copyright: Copyright (c) Hitachi, Ltd. 2005-2006                         */
/*             Authors: Yumiko Sugita (sugita@sdl.hitachi.co.jp),            */
/*                      Satoshi Fujiwara (sa-fuji@sdl.hitachi.co.jp)         */
/*                                                                           */
/*  This program is free software; you can redistribute it and/or modify     */
/*  it under the terms of the GNU General Public License as published by     */
/*  the Free Software Foundation; either version 2 of the License, or        */
/*  (at your option) any later version.                                      */
/*                                                                           */
/*  This program is distributed in the hope that it will be useful,          */
/*  but WITHOUT ANY WARRANTY; without even the implied warranty of           */
/*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the            */
/*  GNU General Public License for more details.                             */
/*                                                                           */
/*  You should have received a copy of the GNU General Public License        */
/*  along with this program; if not, write to the Free Software              */
/*  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111 USA      */
/*****************************************************************************/

#include <linux/version.h>
#include <linux/file.h>
#include <linux/proc_fs.h>
#ifdef USE_SYS_RELAYFS
#  include <linux/relayfs_fs.h>
#else
#  include "relayfs_fs.h"
#endif
#include "bt.h"

static char *relfs_dirname = "btrax";
static char *relfs_basename = "cpu";
static struct dentry *dir = NULL;

struct rchan *bt_channel = NULL;
size_t bt_dropped;

static DECLARE_WAIT_QUEUE_HEAD(relfs_write_wait);

/* produced/consumed control files */
extern struct proc_dir_entry *proc_btrax;
static struct proc_dir_entry *proc_subbuf_sz;
static struct proc_dir_entry *proc_n_subbufs;
static struct proc_dir_entry *proc_subbuf_threshold;
static struct proc_dir_entry *proc_dropped;

extern struct info_per_cpu bt_info_per_cpu[NR_CPUS];

extern size_t subbuf_size;
extern size_t subbuf_num;
extern size_t subbuf_sleep_threshold;
extern int mode;

#ifdef DEBUG
unsigned long wcnt[NR_CPUS];
static void relfs_relay_write(struct rchan *channel, void *p, size_t size)
{
	int cpu = smp_processor_id();
	struct bt_record rec = {0,0,BT_FLAG_DEBUG};
	size_t i;

	for (i = 0; i < size / sizeof(rec); i++) {
		rec.from = wcnt[cpu]++;
		relay_write(channel, &rec, sizeof(rec));
	}
}
#else
static void relfs_relay_write(struct rchan *channel, void *p, size_t size)
{
	struct bt_record *p_rec, *p_max;

	p_rec = (struct bt_record*)p;
	p_max = (struct bt_record*)((char*)p + size);
	for (; p_rec < p_max; p_rec++)
		relay_write(channel, p_rec, sizeof(struct bt_record));
}
#endif

void relfs_flush(void)
{
	serial_prints("relay_flush called\n");
	relay_flush(bt_channel);
}

void write_bt_records(void *p, size_t size)
{
	relfs_relay_write(bt_channel, p, size);
}

void write_pid_record(pid_t pid)
{
	struct pid_record rec;
	struct timeval tv;

	do_gettimeofday(&tv);
	rec.pid = pid;
	rec.tv_sec = tv.tv_sec;
	rec.tv_usec = tv.tv_usec | BT_FLAG_PID;

	relfs_relay_write(bt_channel, &rec, sizeof(rec));
}

void write_warn_record(unsigned long left)
{
	struct warn_record rec;

	rec.left = left;
	rec.reserve = 0;
	rec.flags = BT_FLAG_WARN;

	relfs_relay_write(bt_channel, &rec, sizeof(rec));
}

size_t get_ready_subbuf_num(struct rchan_buf *buf)
{
#if RELAYFS_CHANNEL_VERSION >= 5	// btrax relayfs
	return buf->subbufs_produced - buf->subbufs_consumed;
#else
	return atomic_read(&buf->subbufs_produced)
		- atomic_read(&buf->subbufs_consumed);
#endif
}

static int is_relfs_writeable(void)
{
	unsigned long flags;
	struct rchan_buf *buf;
	size_t ready;
	int rc, cpu;

	local_irq_save(flags);
	cpu = smp_processor_id();
	buf = bt_channel->buf[cpu];
	ready = get_ready_subbuf_num(buf);
	rc = (ready < subbuf_sleep_threshold);
	/* for DEBUG
	serial_prints("(%d)is writable: %d(p:%d,c:%d)\n",
		      cpu, rc, buf->subbufs_produced, buf->subbufs_consumed);
		      */
	local_irq_restore(flags);
	return rc;
}

void check_and_wait_relfs_write(void)
{
	wait_event_interruptible(relfs_write_wait, is_relfs_writeable());
}

#if RELAYFS_CHANNEL_VERSION < 5
static inline int relay_buf_full(struct rchan_buf *buf)
{
	int produced, consumed;

	/* Check buffer full even though mode is overwrite */
	/*
	if (buf->chan->overwrite)
		return 0;
		*/
	produced = atomic_read(&buf->subbufs_produced);
	consumed = atomic_read(&buf->subbufs_consumed);

	return (produced - consumed > buf->chan->n_subbufs - 1) ? 1 : 0;
}

static int subbuf_start_handler(struct rchan_buf *buf, void *subbuf,
				unsigned prev_subbuf_idx, void *prev_subbuf)
{
	unsigned padding = buf->padding[prev_subbuf_idx];

	/* for DEBUG
	serial_prints("CB:subbuf_start %d, %p, %d\n",
		      prev_subbuf_idx, prev_subbuf, padding);
		      */
	/* When do-not-overwrite mode, log data is never dropped. */
	if (!is_kern_all_by_hook(mode) &&
	    !is_start(mode) && relay_buf_full(buf))
		bt_dropped++;
	if (prev_subbuf)
		*((unsigned*)prev_subbuf) = padding;
	return sizeof(padding);	/* reserve space for padding */
}

static void buf_full_handler(struct rchan_buf *buf, unsigned subbuf_idx,
			     void *subbuf)
{
	unsigned padding = buf->padding[subbuf_idx];

	*((unsigned*)subbuf) = padding;
}

static int get_cpu_from_rchan_buf(struct rchan_buf *rbuf)
{
	struct rchan *chan = rbuf->chan;
	int i;

	for (i = 0; i < NR_CPUS; i++)
		if (chan->buf[i] == rbuf)
			return i;
	return -1;
}

#else
static int subbuf_start_handler(struct rchan_buf *buf, void *subbuf,
				void *prev_subbuf, unsigned int prev_padding)
{
	/* for DEBUG
	serial_prints("subbuf_start(cpu:%d, full:%d, p:%d)\n",
		      smp_processor_id(), relay_buf_full(buf), prev_padding);
		      */
	if (prev_subbuf)
		*((unsigned*)prev_subbuf) = prev_padding;
	subbuf_start_reserve(buf, sizeof(prev_padding));

	if (relay_buf_full(buf)) {
		if (is_start(mode))
			return 0;
		else if (!is_kern_all_by_hook(mode))
			/* When no-overwrite mode, log data is never dropped. */
			bt_dropped++;
	}
	return 1;
}
#define get_cpu_from_rchan_buf(rbuf)	(rbuf)->cpu

#endif

static struct rchan_callbacks relayfs_callbacks =
{
	.subbuf_start = subbuf_start_handler,
#if RELAYFS_CHANNEL_VERSION < 5
	.buf_full = buf_full_handler,
#endif
};

static int subbuf_sz_read(char *buffer, char **start, off_t off,
			  int count, int *eof, void *data)
{
	return sprintf(buffer, "%d\n", subbuf_size);
}

static int n_subbufs_read(char *buffer, char **start, off_t off,
			  int count, int *eof, void *data)
{
	return sprintf(buffer, "%d\n", subbuf_num);
}

static int subbuf_sleep_threshold_read(char *buffer, char **start, off_t off,
				       int count, int *eof, void *data)
{
	return sprintf(buffer, "%d\n", subbuf_sleep_threshold);
}

static int dropped_read(char *buffer, char **start, off_t off,
			int count, int *eof, void *data)
{
	return sprintf(buffer, "%d\n", bt_dropped);
}

static int produced_read(char *buffer, char **start, off_t off,
			 int count, int *eof, void *data)
{
	struct rchan_buf *rbuf = (struct rchan_buf*)data;
#if RELAYFS_CHANNEL_VERSION < 5
	int produced = atomic_read(&rbuf->subbufs_produced);
#else
	int produced = rbuf->subbufs_produced;
#endif
	int len = sizeof(produced);

	memcpy(buffer, &produced, len);
	return len;
}

static int consumed_read(char *buffer, char **start, off_t off,
			 int count, int *eof, void *data)
{
	struct rchan_buf *rbuf = (struct rchan_buf*)data;
#if RELAYFS_CHANNEL_VERSION < 5
	int consumed = atomic_read(&rbuf->subbufs_consumed);
#else
	int consumed = rbuf->subbufs_consumed;
#endif
	int len = sizeof(consumed);

	memcpy(buffer, &consumed, len);
	return len;
}

static int consumed_write(struct file *file, const char *user_buf,
			  unsigned long count, void *data)
{
	struct rchan_buf *rbuf = (struct rchan_buf*)data;
	size_t consumed;

	if (copy_from_user(&consumed, user_buf, sizeof(consumed))) {
		printk("%s: copy_from_user failed.\n", MOD_NAME);
		return -EFAULT;
	}
		
	relay_subbufs_consumed(rbuf->chan, get_cpu_from_rchan_buf(rbuf),
			       consumed);

	if (is_relfs_writeable())
		wake_up_interruptible(&relfs_write_wait);

	return count;
}

int relfs_init(void)
{
	int i;
	struct info_per_cpu *info;

	dir = relayfs_create_dir(relfs_dirname, NULL);
	if (dir == NULL) {
		printk("%s: cannot create relayfs directory\n", MOD_NAME);
		return -ENOMEM;
	}
#if RELAYFS_CHANNEL_VERSION < 5
	bt_channel = relay_open(relfs_basename, dir, subbuf_size, subbuf_num,
				!is_start(mode), &relayfs_callbacks);
#else
	bt_channel = relay_open(relfs_basename, dir, subbuf_size, subbuf_num,
				&relayfs_callbacks);
#endif
	if (bt_channel == NULL) {
		printk("%s: cannot open relayfs channel\n", MOD_NAME);
		return -ENOMEM;
	}
	proc_subbuf_sz = create_proc_entry("subbuf_size", 0400, proc_btrax);
	if (!proc_subbuf_sz)
		return -ENOMEM;
	proc_subbuf_sz->read_proc = subbuf_sz_read;
	proc_n_subbufs = create_proc_entry("n_subbufs", 0400, proc_btrax);
	if (!proc_n_subbufs)
		return -ENOMEM;
	proc_n_subbufs->read_proc = n_subbufs_read;
	proc_subbuf_threshold = create_proc_entry("subbuf_threshold", 0400,
						  proc_btrax);
	if (!proc_subbuf_threshold)
		return -ENOMEM;
	proc_subbuf_threshold->read_proc = subbuf_sleep_threshold_read;
	proc_dropped = create_proc_entry("dropped", 0400, proc_btrax);
	if (!proc_dropped)
		return -ENOMEM;
	proc_dropped->read_proc = dropped_read;

	for_each_online_cpu(i) {
		info = &bt_info_per_cpu[i];
		info->p_produced = create_proc_entry("produced", 0400,
						     info->p_cpuN);
		if (!info->p_produced)
			return -ENOMEM;
		info->p_produced->data = bt_channel->buf[i];
		info->p_produced->read_proc = produced_read;

		info->p_consumed = create_proc_entry("consumed", 0600,
						     info->p_cpuN);
		if (!info->p_consumed)
			return -ENOMEM;
		info->p_consumed->data = bt_channel->buf[i];
		info->p_consumed->read_proc = consumed_read;
		info->p_consumed->write_proc = consumed_write;
	}
	return 0;
}

static void chk_procs_using_relfs(struct dentry *d,
				  pid_t **pp_pid, pid_t *p_max)
{
	struct task_struct *p;
	struct files_struct *files;
	struct file *file = NULL;
	unsigned int i;
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,14)
	struct fdtable *fdt;
#endif

	write_lock_irq(&tasklist_lock);
	p = &init_task;
	do {
		p = prev_task(p);
		files = p->files;
		if (!files)
			continue;
		spin_lock(&files->file_lock);
		//serial_prints("chk-pid(%d)\n", p->pid);
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,14)
		fdt = files_fdtable(files);
		for (i = 0; i < fdt->max_fds; i++) {
			file = fdt->fd[i];
#else
		for (i = 0; i < files->max_fds; i++) {
			file = files->fd[i];
#endif
			if (file && file->f_dentry == d) {
				*(*pp_pid)++ = p->pid;
				if (*pp_pid >= p_max) {
					spin_unlock(&files->file_lock);
					goto EXIT;
				}
				break;
			}
		}
		spin_unlock(&files->file_lock);
	} while (p != &init_task);
EXIT:
	write_unlock_irq(&tasklist_lock);
}

static void kill_procs_using_relfs(void)
{
	struct dentry *child;
	int max = 16;
	pid_t pid[max], *p, *p_max;

	if (!dir)
		return;
	do {
		p = pid;
		p_max = p + max;
		spin_lock(&dcache_lock);
		list_for_each_entry(child, &dir->d_subdirs, d_child) {
			chk_procs_using_relfs(child, &p, p_max);
			if (p >= p_max)
				break;
		}
		spin_unlock(&dcache_lock);
		p_max = p;
		for (p = pid; p < p_max; p++) {
			//serial_prints("  kill(%d)\n", *p);
			kill_proc(*p, SIGKILL, 1);
		}
		cpu_relax();
	} while (p != pid);
}

void relfs_cleanup(void)
{
	int i;
	struct info_per_cpu *info;

	if (proc_subbuf_sz)
		remove_proc_entry(proc_subbuf_sz->name, proc_btrax);
	if (proc_n_subbufs)
		remove_proc_entry(proc_n_subbufs->name, proc_btrax);
	if (proc_subbuf_threshold)
		remove_proc_entry(proc_subbuf_threshold->name, proc_btrax);
	if (proc_dropped)
		remove_proc_entry(proc_dropped->name, proc_btrax);

	kill_procs_using_relfs();

	for (i = 0; i < NR_CPUS; i++) {
		info = &bt_info_per_cpu[i];
		if (info->p_produced)
			remove_proc_entry(info->p_produced->name, info->p_cpuN);
		if (info->p_consumed)
			remove_proc_entry(info->p_consumed->name, info->p_cpuN);
	}
	if (bt_channel)
		relay_close(bt_channel);
	if (dir)
		relayfs_remove_dir(dir);
}
