/* $Id: virtual.c,v 1.13 2003/05/14 21:29:05 mikpe Exp $
 * Library interface to virtual per-process performance counters.
 *
 * Copyright (C) 1999-2003  Mikael Pettersson
 */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <unistd.h>
#include <sys/mman.h>
#include <sys/ioctl.h>
#include <fcntl.h>
#include "libperfctr.h"

#define PAGE_SIZE	4096

/*
 * Operations on the process' own virtual-mode perfctrs.
 */

struct vperfctr {
    /* XXX: point to &vperfctr_state.cpu_state instead? */
    volatile const struct vperfctr_state *kstate;
    int fd;
    unsigned char have_rdpmc;
};

static int vperfctr_open_name(const char filename[], struct vperfctr *perfctr)
{
    struct perfctr_info info;
    int isnew;

    isnew = 1;
    perfctr->fd = open(filename, O_RDONLY|O_CREAT);
    if( perfctr->fd < 0 && errno == EEXIST ) {
	isnew = 0;
	perfctr->fd = open(filename, O_RDONLY);
    }
    if( perfctr->fd < 0 )
	goto out_perfctr;
    if( perfctr_abi_check_fd(perfctr->fd) < 0 )
	goto out_fd;
    if( perfctr_info(perfctr->fd, &info) < 0 )
	goto out_fd;
    perfctr->have_rdpmc = (info.cpu_features & PERFCTR_FEATURE_RDPMC) != 0;
    perfctr->kstate = mmap(NULL, PAGE_SIZE, PROT_READ,
			   MAP_SHARED, perfctr->fd, 0);
    if( perfctr->kstate != MAP_FAILED )
	return 0;
    munmap((void*)perfctr->kstate, PAGE_SIZE);
 out_fd:
    if( isnew )
	vperfctr_unlink(perfctr);
    close(perfctr->fd);
 out_perfctr:
    return -1;
}

struct vperfctr *vperfctr_open(void)
{
    struct vperfctr *perfctr;

    perfctr = malloc(sizeof(*perfctr));
    if( perfctr ) {
	if( vperfctr_open_name("/proc/self/perfctr", perfctr) == 0 )
	    return perfctr;
	free(perfctr);
    }
    return NULL;
}

int vperfctr_info(const struct vperfctr *vperfctr, struct perfctr_info *info)
{
    return perfctr_info(vperfctr->fd, info);
}

#define rdtscl(low)	\
	__asm__ __volatile__("rdtsc" : "=a"(low) : : "edx")
#define rdpmcl(ctr,low)	\
	__asm__ __volatile__("rdpmc" : "=a"(low) : "c"(ctr) : "edx")

#if (__GNUC__ < 2) ||  (__GNUC__ == 2 && __GNUC_MINOR__ < 96)
#define __builtin_expect(x, expected_value) (x)
#endif

#define likely(x)	__builtin_expect((x),1)
#define unlikely(x)	__builtin_expect((x),0)

#if defined(__x86_64__)
#define vperfctr_has_rdpmc(vperfctr)	(1)
#else
#define vperfctr_has_rdpmc(vperfctr)	((vperfctr)->have_rdpmc)
#endif

unsigned long long vperfctr_read_tsc(const struct vperfctr *self)
{
    unsigned long long sum;
    unsigned int tsc0, tsc1, now;
    volatile const struct vperfctr_state *kstate;

    kstate = self->kstate;
    if( likely(kstate->cpu_state.cstatus != 0) ) {
	tsc0 = kstate->cpu_state.start.tsc;
    retry:
	rdtscl(now);
	sum = kstate->cpu_state.sum.tsc;
	tsc1 = kstate->cpu_state.start.tsc;
	if( likely(tsc1 == tsc0) )
	    return sum += (now - tsc0);
	tsc0 = tsc1;
	goto retry; /* better gcc code than with a do{}while() loop */
    }
    return kstate->cpu_state.sum.tsc;
}

unsigned long long vperfctr_read_pmc(const struct vperfctr *self, unsigned i)
{
    unsigned long long sum;
    unsigned int start, now;
    unsigned int tsc0, tsc1;
    volatile const struct vperfctr_state *kstate;
    unsigned int cstatus;

    kstate = self->kstate;
    cstatus = kstate->cpu_state.cstatus;
    /* gcc 3.0 generates crap code for likely(E1 && E2) :-( */
    if( perfctr_cstatus_has_tsc(cstatus) && vperfctr_has_rdpmc(self) ) {
	 tsc0 = kstate->cpu_state.start.tsc;
    retry:
	 rdpmcl(kstate->cpu_state.control.pmc_map[i], now);
	 start = kstate->cpu_state.start.pmc[i];
	 sum = kstate->cpu_state.sum.pmc[i];
	 tsc1 = kstate->cpu_state.start.tsc;
	 if( likely(tsc1 == tsc0) ) {
	      return sum += (now - start);
	 }
	 tsc0 = tsc1;
	 goto retry;
    }
    if( cstatus != 0 )
	ioctl(self->fd, VPERFCTR_SAMPLE, NULL);
    return kstate->cpu_state.sum.pmc[i];
}

static void vperfctr_read_ctrs_slow(const struct vperfctr *vperfctr,
				    struct perfctr_sum_ctrs *sum)
{
    unsigned int tsc0, tsc1;
    unsigned int cstatus, nrctrs;
    volatile const struct vperfctr_state *kstate;
    unsigned int i;

    ioctl(vperfctr->fd, VPERFCTR_SAMPLE, NULL);
    kstate = vperfctr->kstate;
    cstatus = kstate->cpu_state.cstatus;
    nrctrs = perfctr_cstatus_nrctrs(cstatus);
    tsc1 = kstate->cpu_state.start.tsc;
    do {
	tsc0 = tsc1;
	sum->tsc = kstate->cpu_state.sum.tsc;
	for(i = 0; i < nrctrs; ++i)
	    sum->pmc[i] = kstate->cpu_state.sum.pmc[i];
	tsc1 = kstate->cpu_state.start.tsc;
    } while( tsc1 != tsc0 );
}

void vperfctr_read_ctrs(const struct vperfctr *self,
			struct perfctr_sum_ctrs *sum)
{
    unsigned int tsc0, now;
    unsigned int cstatus, nrctrs;
    volatile const struct vperfctr_state *kstate;
    int i;

    /* Fast path is impossible if the TSC isn't being sampled (bad idea,
       but on WinChip you don't have a choice), or at least one PMC is
       enabled but the CPU doesn't have RDPMC. */
    kstate = self->kstate;
    cstatus = kstate->cpu_state.cstatus;
    nrctrs = perfctr_cstatus_nrctrs(cstatus);
    if( perfctr_cstatus_has_tsc(cstatus) && (!nrctrs || vperfctr_has_rdpmc(self)) ) {
    retry:
	tsc0 = kstate->cpu_state.start.tsc;
	rdtscl(now);
	sum->tsc = kstate->cpu_state.sum.tsc + (now - tsc0);
	for(i = nrctrs; --i >= 0;) {
	    rdpmcl(kstate->cpu_state.control.pmc_map[i], now);
	    sum->pmc[i] = kstate->cpu_state.sum.pmc[i] + (now - kstate->cpu_state.start.pmc[i]);
	}
	if( likely(tsc0 == kstate->cpu_state.start.tsc) )
	    return;
	goto retry;
    }
    vperfctr_read_ctrs_slow(self, sum);
}

int vperfctr_read_state(const struct vperfctr *self, struct perfctr_sum_ctrs *sum,
			struct vperfctr_control *control)
{
    unsigned int prev_tsc, next_tsc;
    volatile const struct vperfctr_state *kstate;

    ioctl(self->fd, VPERFCTR_SAMPLE, NULL);
    kstate = self->kstate;
    next_tsc = kstate->cpu_state.start.tsc;
    do {
	prev_tsc = next_tsc;
	/* XXX: this copies more than necessary */
	if( sum )
	    *sum = kstate->cpu_state.sum;
	if( control ) {
	    control->si_signo = kstate->si_signo;
	    control->cpu_control = kstate->cpu_state.control;
	}
	next_tsc = kstate->cpu_state.start.tsc;
    } while( next_tsc != prev_tsc );
    return 0;
}

int vperfctr_control(const struct vperfctr *perfctr,
		     struct vperfctr_control *control)
{
    return ioctl(perfctr->fd, VPERFCTR_CONTROL, control);
}

int vperfctr_stop(const struct vperfctr *perfctr)
{
    struct vperfctr_control control;
    memset(&control, 0, sizeof control);
    return ioctl(perfctr->fd, VPERFCTR_CONTROL, &control);
}

int vperfctr_is_running(const struct vperfctr *perfctr)
{
    return perfctr->kstate->cpu_state.cstatus != 0;
}

int vperfctr_iresume(const struct vperfctr *perfctr)
{
    return ioctl(perfctr->fd, VPERFCTR_IRESUME, NULL);
}

int vperfctr_unlink(const struct vperfctr *perfctr)
{
    return ioctl(perfctr->fd, VPERFCTR_UNLINK, NULL);
}

void vperfctr_close(struct vperfctr *perfctr)
{
    munmap((void*)perfctr->kstate, PAGE_SIZE);
    close(perfctr->fd);
    free(perfctr);
}

/*
 * Operations on other processes' virtual-mode perfctrs.
 */

struct rvperfctr {
    struct vperfctr vperfctr;	/* must be first for the close() operation */
    int pid;
};

struct rvperfctr *rvperfctr_open(int pid)
{
    char filename[64];
    struct rvperfctr *rvperfctr;

    snprintf(filename, sizeof filename, "/proc/%d/perfctr", pid);
    rvperfctr = malloc(sizeof(*rvperfctr));
    if( rvperfctr ) {
	if( vperfctr_open_name(filename, &rvperfctr->vperfctr) == 0 ) {
	    rvperfctr->pid = pid;
	    return rvperfctr;
	}
	free(rvperfctr);
    }
    return NULL;
}

int rvperfctr_pid(const struct rvperfctr *rvperfctr)
{
    return rvperfctr->pid;
}

int rvperfctr_info(const struct rvperfctr *rvperfctr, struct perfctr_info *info)
{
    return vperfctr_info(&rvperfctr->vperfctr, info);
}

void rvperfctr_read_ctrs(const struct rvperfctr *rvperfctr,
			 struct perfctr_sum_ctrs *sum)
{
    return vperfctr_read_ctrs_slow(&rvperfctr->vperfctr, sum);
}

int rvperfctr_read_state(const struct rvperfctr *rvperfctr,
			 struct perfctr_sum_ctrs *sum,
			 struct vperfctr_control *control)
{
    return vperfctr_read_state(&rvperfctr->vperfctr, sum, control);
}

int rvperfctr_control(const struct rvperfctr *rvperfctr,
		      struct vperfctr_control *control)
{
    return vperfctr_control(&rvperfctr->vperfctr, control);
}

int rvperfctr_stop(const struct rvperfctr *rvperfctr)
{
    return vperfctr_stop(&rvperfctr->vperfctr);
}

int rvperfctr_unlink(const struct rvperfctr *rvperfctr)
{
    return vperfctr_unlink(&rvperfctr->vperfctr);
}

void rvperfctr_close(struct rvperfctr *rvperfctr)
{
    /* this relies on offsetof(struct rvperfctr, vperfctr) == 0 */
    vperfctr_close(&rvperfctr->vperfctr);
}
