/*
 * Copyright (c) 2007, 2008 University of Tsukuba
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 * 1. Redistributions of source code must retain the above copyright notice,
 *    this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright notice,
 *    this list of conditions and the following disclaimer in the documentation
 *    and/or other materials provided with the distribution.
 * 3. Neither the name of the University of Tsukuba nor the names of its
 *    contributors may be used to endorse or promote products derived from
 *    this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */
/*
 * Copyright (c) 2010-2012 Yuichi Watanabe
 */

/* MMU emulation, Shadow Page Tables (SPT) */

#include <common/list.h>
#include <core/initfunc.h>
#include <core/printf.h>
#include <core/spinlock.h>
#include <core/string.h>
#include "asm.h"
#include "constants.h"
#include "cpu_mmu.h"
#include "cpu_mmu_spt.h"
#include "current.h"
#include "mm.h"
#include "mmio.h"
#include "msr_pass.h"
#include "panic.h"
#include "pcpu.h"
#include "vmmcall_status.h"

struct access_data {
	unsigned int write : 1;
	unsigned int user : 1;
	unsigned int wp : 1;
};

struct guest_table_data {
	unsigned int rw : 1;
	unsigned int us : 1;
	unsigned int nx : 1;
	unsigned int ps : 1;
	u32 cache_flag;
	u64 gfn;
};

static bool
guest64 (void)
{
	u64 efer;

	current->vmctl.read_msr (MSR_IA32_EFER, &efer);
	if (efer & MSR_IA32_EFER_LMA_BIT)
		return true;
	return false;
}

void
cpu_mmu_spt_invalidate(ulong v)
{
	pmap_t p;
	u64 tmp;

	pmap_open_vmm (&p, current->spt.cr3tbl_phys, current->spt.levels);
	pmap_setvirt (&p, v, 2);
	tmp = pmap_read (&p);
	if (tmp & PDE_P_BIT) {
		if (tmp & PDE_AVAILABLE1_BIT) {
			pmap_write (&p, tmp & ~PDE_AVAILABLE1_BIT, 0xFFF);
			pmap_setlevel (&p, 1);
			pmap_clear (&p);
			if (current->spt.levels >= 3) {
				pmap_setvirt (&p, v ^ PAGESIZE2M, 2);
				tmp = pmap_read (&p);
				if (tmp & PDE_P_BIT) {
					pmap_write (&p,
						    tmp & ~PDE_AVAILABLE1_BIT,
						    0xFFF);
					pmap_setlevel (&p, 1);
					pmap_clear (&p);
				}
			}
		} else {
			pmap_setlevel (&p, 1);
			tmp = pmap_read (&p);
			pmap_write (&p, 0, 0xFFF);
		}
	}
	pmap_close (&p);

	current->spt.tlbflash = true;
}

static void
invalidate_all_spt(void)
{
	pmap_t p;
	struct spt_page *spt_page;

	pmap_open_vmm (&p, current->spt.cr3tbl_phys, current->spt.levels);
	pmap_clear (&p);
	pmap_close (&p);
	while((spt_page = LIST2_POP(current->spt.shadow_l1, list)) != NULL) {
		spt_page->pte_virt = NULL;
		LIST2_ADD(current->spt.shadow_free, list, spt_page);
	}
	while((spt_page = LIST2_POP(current->spt.shadow_lu, list)) != NULL) {
		spt_page->pte_virt = NULL;
		LIST2_ADD(current->spt.shadow_free, list, spt_page);
	}

	current->spt.tlbflash = true;
}

static void
map_page (u64 v, struct access_data ad, struct guest_table_data *gtd,
	  int glvl, phys_t hphys)
{
	pmap_t p;
	u64 tmp;
	int l;
	ulong cr0;
	struct spt_page *spt_page;

	current->vmctl.read_control_reg (CONTROL_REG_CR0, &cr0);
	if (!(cr0 & CR0_WP_BIT)) {
		if (!ad.user && ad.write) {
			gtd->us = 0;
			gtd->rw = 1;
		}
	}
	pmap_open_vmm (&p, current->spt.cr3tbl_phys, current->spt.levels);
	pmap_setvirt (&p, v, 1);
	tmp = pmap_read (&p);
	for (; (l = pmap_getreadlevel (&p)) > 1; tmp = pmap_read (&p)) {
		pmap_setlevel (&p, l);
		spt_page = LIST2_POP(current->spt.shadow_free, list);
		if (spt_page == NULL) {
			spt_page = LIST2_POP(current->spt.shadow_l1, list);
			if (spt_page == NULL) {
				invalidate_all_spt();
				continue;
			}
			if (spt_page->pte_virt) {
				*spt_page->pte_virt = 0;
				current->spt.tlbflash = true;
			}
		}
		spt_page->pte_virt = pmap_pointer(&p);
		if (l == 1) {
			LIST2_ADD(current->spt.shadow_l1, list, spt_page);
		} else {
			LIST2_ADD(current->spt.shadow_lu, list, spt_page);
		}
		pmap_write (&p, spt_page->phys | PDE_P_BIT, PDE_P_BIT);
		pmap_setlevel (&p, l - 1);
		pmap_clear (&p);
		pmap_setlevel (&p, 1);
	}
	if (gtd->ps) {
		pmap_setlevel (&p, 2);
		tmp = pmap_read (&p);
		if (!(tmp & PDE_AVAILABLE1_BIT))
			pmap_write (&p, tmp | PDE_AVAILABLE1_BIT, 0xFFF);
		pmap_setlevel (&p, 1);
	}
	pmap_write (&p, (hphys & (~PAGESIZE_MASK)) | PTE_P_BIT |
		    (gtd->rw ? PTE_RW_BIT : 0) |
		    (gtd->us ? PTE_US_BIT : 0) |
		    (gtd->nx ? PTE_NX_BIT : 0) |
		    mm_cache_flag_to_pte_attr (gtd->cache_flag),
		    PTE_P_BIT | PTE_RW_BIT | PTE_US_BIT |
		    PTE_PWT_BIT | PTE_PCD_BIT | PTE_PAT_BIT);
	pmap_close (&p);
}

void
cpu_mmu_spt_updatecr3(void)
{
	current->spt.levels = guest64 () ? 4 : 3;
	invalidate_all_spt();
	current->vmctl.spt_setcr3 (current->spt.cr3tbl_phys);
}

void
cpu_mmu_spt_init(void)
{
	int i;
	vmmerr_t err;

	err = alloc_page(&current->spt.cr3tbl, &current->spt.cr3tbl_phys);
	if (err) {
		panic("Failed to allocate memory for a cr3-table.");
	}
	memset(current->spt.cr3tbl, 0, PAGESIZE);
	LIST2_HEAD_INIT(current->spt.shadow_free);
	LIST2_HEAD_INIT(current->spt.shadow_l1);
	LIST2_HEAD_INIT(current->spt.shadow_lu);
	for (i = 0; i < NUM_OF_SPTTBL; i++) {
		err = alloc_page(&current->spt.spt_page[i].virt, &current->spt.spt_page[i].phys);
		if (err) {
			panic("Failed to allocate memory for a shadow page table.");
		}
		current->spt.spt_page[i].pte_virt = NULL;
		LIST2_ADD(current->spt.shadow_free, list, &current->spt.spt_page[i]);
	}
}

bool
cpu_mmu_spt_tlbflush(void)
{
	if (current->spt.tlbflash) {
		current->spt.tlbflash = false;
		return true;
	} else {
		return false;
	}
}

static void
set_ad (bool write, bool user, bool wp, struct access_data *ad)
{
	ad->write = write;
	ad->user = user;
	ad->wp = wp;
}

static void
set_gtd (u64 entries[5], int levels, struct guest_table_data *gtd)
{
	unsigned int rw = 1, us = 1, nx = 0;
	int i;
	u64 entry;

	for (i = levels; i >= 0; i--) {
		entry = entries[i];
		if (!(entry & PDE_RW_BIT))
			rw = 0;
		if (!(entry & PDE_US_BIT))
			us = 0;
		if (entry & PDE_NX_BIT)
			nx = 1;
	}
	if (!(entries[0] & PTE_D_BIT))
		rw = 0;
	gtd->cache_flag = msr_pte_to_cache_flag(entries[0]);
	gtd->rw = rw;
	gtd->us = us;
	gtd->nx = nx;
	gtd->gfn = (entries[0] & PTE_ADDR_MASK64) >> PAGESIZE_SHIFT;

	if (levels >= 2 && entries[1] & PDE_PS_BIT) {
		gtd->ps = 1;
	} else {
		gtd->ps = 0;
	}
}

/* handling a page fault of a guest */
void
cpu_mmu_spt_pagefault (ulong err, ulong cr2)
{
	int levels;
	vmmerr_t ret;
	bool wr, us, ex, wp;
	ulong cr0, cr3, cr4;
	struct access_data ad;
	struct guest_table_data gtd;
	u64 efer, entries[5];
	phys_t hphys;

	current->vmctl.read_control_reg(CONTROL_REG_CR0, &cr0);
	current->vmctl.read_control_reg(CONTROL_REG_CR3, &cr3);
	current->vmctl.read_control_reg(CONTROL_REG_CR4, &cr4);
	current->vmctl.read_msr(MSR_IA32_EFER, &efer);
	wr = !!(err & PAGEFAULT_ERR_WR_BIT);
	us = !!(err & PAGEFAULT_ERR_US_BIT);
	ex = !!(err & PAGEFAULT_ERR_ID_BIT);
	wp = !!(cr0 & CR0_WP_BIT);
	ret = mmu_get_pte(cr2, cr0, cr3, cr4, efer, wr, us, ex, entries,
			  &levels);
	if (ret != VMMERR_SUCCESS) {
		mmu_generate_pagefault(ret, err & PAGEFAULT_ERR_WR_BIT,
				       err & PAGEFAULT_ERR_US_BIT, cr2);
		return;
	}
	set_ad(wr, us, wp, &ad);
	set_gtd(entries, levels, &gtd);

	mmio_lock();
	ret = mmio_pagefault(gtd.gfn << PAGESIZE_SHIFT);
	mmio_unlock();
	if (ret != VMMERR_NODEV) {
		if (ret != VMMERR_SUCCESS &&
		    (ret < VMMERR_PAGE_NOT_PRESENT ||
		     ret > VMMERR_PAGE_BAD_RESERVED_BIT)) {
			panic("Failed to emulate accessing to MMIO area. ret 0x%x",
			      ret);
		}
		return;
	}

	hphys = current->vm->gmm.gp2hp((gtd.gfn << PAGESIZE_SHIFT) +
				       (cr2 & PAGESIZE_MASK));
	if (hphys == GMM_NO_MAPPING) {
		ret = cpu_interpreter();
		if (ret != VMMERR_SUCCESS &&
		    (ret < VMMERR_PAGE_NOT_PRESENT ||
		     ret > VMMERR_PAGE_BAD_RESERVED_BIT)) {
			panic("Failed to emulate accessing to no mapping area. ret 0x%x",
			      ret);
		}
		return;
	}
	map_page(cr2, ad, &gtd, levels, hphys);
}
