/*
 * Copyright (c) 2007, 2008 University of Tsukuba
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 * 1. Redistributions of source code must retain the above copyright notice,
 *    this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright notice,
 *    this list of conditions and the following disclaimer in the documentation
 *    and/or other materials provided with the distribution.
 * 3. Neither the name of the University of Tsukuba nor the names of its
 *    contributors may be used to endorse or promote products derived from
 *    this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */
/*
 * Copyright (c) 2010-2012 Yuichi Watanabe
 */

#include <core/assert.h>
#include <core/cpu.h>
#include <core/initfunc.h>
#include <core/mm.h>
#include <core/printf.h>
#include <core/rm.h>
#include <core/spinlock.h>
#include <core/string.h>
#include "asm.h"
#include "callrealmode.h"
#include "current.h"
#include "entry.h"
#include "mm.h"
#include "panic.h"
#include "vramwrite.h"

#define MAXNUM_OF_SYSMEMMAP		256

#define HOST_PTE_ATTR_WB		0
#define HOST_PTE_ATTR_WT		(PTE_PWT_BIT)
#define HOST_PTE_ATTR_WUC		(PTE_PCD_BIT)
#define HOST_PTE_ATTR_UC		(PTE_PCD_BIT | PTE_PWT_BIT)
#define HOST_PTE_ATTR_WP		(PTE_PAT_BIT | PTE_PWT_BIT)
#define HOST_PTE_ATTR_WC		(PTE_PAT_BIT | PTE_PCD_BIT)

extern u8 end[];

static phys_t memorysize = 0;
static phys32_t top_of_low_avail_mem;
static phys32_t vmm_start_phys;
static struct page pagestruct[NUM_OF_PAGES];
static spinlock_t mapmem_lock;
static virt_t mapmem_lastvirt = MAPMEM_ADDR_END - PAGESIZE;
static struct resource mem_address_space;
static struct resource sysmemmap[MAXNUM_OF_SYSMEMMAP];
static struct resource vmm_mem_resource;
static int sysmemmaplen;

struct memsizetmp {
	void *addr;
	int ok;
};

int
get_phys_mem_map_count(void)
{
	return sysmemmaplen;
}

int
get_phys_mem_map(int index, phys_t *base, phys_t *len, u32 *type)
{
	if (index < 0 || index >= sysmemmaplen) {
		return 0;
	}
	*base = sysmemmap[index].start;
	*len = sysmemmap[index].end - sysmemmap[index].start + 1;
	*type = sysmemmap[index].data;
	return 1;
}

phys32_t
mm_top_of_low_avail_mem()
{
	return top_of_low_avail_mem;
}

struct resource *
mm_next_mem_map(struct resource *resource)
{
	return RM_RESOURCE_NEXT(&mem_address_space, resource);
}

static void
getallsysmemmap (void)
{
	int i;
	u32 n = 0, nn = 1;
	struct e820map e820map;
	phys_t memsize = 0;
	struct resource *entry;

	for (i = 0; i < MAXNUM_OF_SYSMEMMAP && nn; i++, n = nn) {
		if (callrealmode_getsysmemmap (n, &e820map, &nn))
			panic ("getsysmemmap failed");
		entry = sysmemmap + i;
		entry->start = e820map.base;
		entry->end = e820map.base + e820map.len - 1;
		entry->type = RESOURCE_TYPE_MEM;
		entry->data = e820map.type;
		entry->name = "mem";
		rm_insert_resource(&mem_address_space, entry);

		if (entry->data == MEM_TYPE_AVAILABLE) {
			memsize += e820map.len;
			if (entry->end <= 0xffffffff &&
			    entry->end > top_of_low_avail_mem) {
				top_of_low_avail_mem = entry->end;
			}
		}
	}
	memorysize = memsize;
	sysmemmaplen = i;
}

void
mm_dump_mem_address_space(void)
{
	rm_dump_resources(&mem_address_space);
}

static void
debug_sysmemmap_print (void)
{
	int index = 0;
	phys_t hphys;
	phys_t len;
	u32 type;

	while (get_phys_mem_map(index++, &hphys, &len, &type)) {
		printf("%llx len %llx type %x\n",
		       hphys, len, type);
	}
	printf ("Done.\n");
}

/* Find a physical address for VMM. 0 is returned on error */
static phys32_t
find_vmm_phys (void)
{
	phys32_t base32, limit32, phys;
	phys_t limit64;
	phys_t base;
	phys_t len;
	u32 type;
	int index = 0;

	phys = 0;
	while(get_phys_mem_map(index++, &base, &len, &type)) {
		if (type != MEM_TYPE_AVAILABLE)
			continue; /* only available area can be used */
		if (base >= 0x100000000ULL)
			continue; /* we can't use over 4GB */
		limit64 = base + len - 1;
		if (limit64 >= 0x100000000ULL)
			limit64 = 0xFFFFFFFFULL; /* ignore over 4GB */
		base32 = base;
		limit32 = limit64;
		if (base32 > limit32)
			continue; /* avoid strange value */
		if (base32 > (0xFFFFFFFF - VMMSIZE_ALL + 1))
			continue; /* we need more than VMMSIZE_ALL */
		if (limit32 < VMMSIZE_ALL)
			continue; /* skip shorter than VMMSIZE_ALL */
		base32 = (base32 + 0x003FFFFF) & 0xFFC00000; /* align 4MB */
		limit32 = ((limit32 + 1) & 0xFFC00000) - 1; /* align 4MB */
		if (base32 > limit32)
			continue; /* lack space after alignment */
		if (limit32 - base32 >= (VMMSIZE_ALL - 1) && /* enough */
		    phys < limit32 - (VMMSIZE_ALL - 1)) { /* use top of it */
			phys = limit32 - (VMMSIZE_ALL - 1);
		}
	}
	return phys;
}

struct page *
virt_to_page (virt_t virt)
{
	unsigned int i;

	i = (virt - VMM_START_VIRT) >> PAGESIZE_SHIFT;
	ASSERT (i < NUM_OF_PAGES);
	return &pagestruct[i];
}

virt_t
phys_to_virt (phys_t phys)
{
	return (virt_t)(phys - vmm_start_phys + VMM_START_VIRT);
}

struct page *
phys_to_page (phys_t phys)
{
	return virt_to_page (phys_to_virt (phys));
}

virt_t
page_to_virt (struct page *p)
{
	return p->virt;
}

phys_t
page_to_phys (struct page *p)
{
	return p->phys;
}

u32 vmm_start_inf()
{
        return vmm_start_phys ;
}

u32 vmm_term_inf()
{
        return vmm_start_phys+VMMSIZE_ALL ;
}

#if 0
void
dump_pt(char *name, void *page_table)
{
	u64 *pt = page_table;
	int i;

	printf("%s %p\n", name, pt);
	for (i = 0; i < PTE_NUM_IN_TABLE_64 / 4; i++) {
		printf("    %016llx %016llx %016llx %016llx\n",
		       pt[0], pt[1], pt[2], pt[3]);
		pt += 4;
	}
}
#endif

static void
move_vmm (void)
{
	int i;
	ulong cr3;

	/* Create a page directory to map vmm area */
	for (i = 0; i < VMMSIZE_ALL >> PAGESIZE2M_SHIFT; i++)
		vmm_pd1[i] =
			(vmm_start_phys + (i << PAGESIZE2M_SHIFT)) |
			PDE_P_BIT | PDE_RW_BIT | PDE_PS_BIT | PDE_A_BIT |
			PDE_D_BIT;
	/* map vmm area at 0xC0000000 temporarily */
	entry_pdp[3] = (((u64)(ulong)vmm_pd1) - 0x40000000) | PDPE_ATTR;
	asm_rdcr3 (&cr3);
	asm_wrcr3 (cr3);

	/* make a new page directory for switching from/to real mode */

	/* virt 0x00000000 identity mapping */
	vmm_pdp[0] = sym_to_phys (entry_pd0) | PDPE_ATTR;
	/* virt 0x40000000 vmm area */
	vmm_pdp[1] = sym_to_phys (vmm_pd1)   | PDPE_ATTR;
#ifndef __x86_64__
	vmm_base_cr3 = sym_to_phys (vmm_pdp);
#else
	vmm_pml4[0] = sym_to_phys (vmm_pdp) |
		PDE_P_BIT | PDE_RW_BIT | PDE_US_BIT;
	vmm_base_cr3 = sym_to_phys (vmm_pml4);
#endif

	/* make a new page directory for normal-use */

	/* virt 0x40000000 vmm area */
	vmm_main_pdp[1] = sym_to_phys (vmm_pd1) | PDPE_ATTR;
	/* virt 0x80000000 mapmem area */
	vmm_main_pdp[2] = sym_to_phys (vmm_pd2) | PDPE_ATTR;
#ifndef __x86_64__
	vmm_main_cr3 = sym_to_phys (vmm_main_pdp);
#else
	vmm_main_pml4[0] = sym_to_phys (vmm_main_pdp) |
		PDE_P_BIT | PDE_RW_BIT | PDE_US_BIT;
	vmm_main_cr3 = sym_to_phys (vmm_main_pml4);
#endif

#ifdef __x86_64__
	move_vmm_area64 ();
#else
	move_vmm_area32 ();
#endif
	asm_wrcr3(vmm_main_cr3);
}

static void
map_hphys (void)
{
#ifdef __x86_64__
	void *virt;
	phys_t phys;
	u64 *pdp, *pd;
	u64 i;
	int pdpi, pdi;
	vmmerr_t err;

	err = alloc_page (&virt, &phys);
	if (err) {
		panic("Failed to allocate memory for a l3 page table.");
	}

	memset (virt, 0, PAGESIZE);
	vmm_main_pml4[1] = phys | PDE_P_BIT | PDE_RW_BIT | PDE_A_BIT;
	pdp = virt;
	pdpi = 0;
	pdi = 512;
	for (i = 0; i < NUM_OF_HPHYS_PAGES; i += 512) {
		if (pdi >= 512) {
			if (pdpi >= 512)
				panic ("NUM_OF_HPHYS_PAGES is too large.");
			err = alloc_page (&virt, &phys);
			if (err) {
				panic("Failed to allocate memory for a l2 page table.");
			}
			memset (virt, 0, PAGESIZE);
			pdp[pdpi++] = phys | PDE_P_BIT | PDE_RW_BIT |
				PDE_A_BIT;
			pd = virt;
			pdi = 0;
		}
		pd[pdi++] = (i << PAGESIZE_SHIFT) | PDE_P_BIT | PDE_RW_BIT |
			PDE_PS_BIT | PDE_A_BIT | PDE_D_BIT;
	}
#endif
}

static void
mm_init_global (void)
{
	int i;
	vmmerr_t err;

	spinlock_init (&mapmem_lock);
	rm_init_resource(&mem_address_space, 0, 0xffffffffffffffffLL,
			 RESOURCE_TYPE_MEM, 0, "mem-space");
	getallsysmemmap ();

	vmm_start_phys = find_vmm_phys ();
	if (vmm_start_phys == 0) {
		printf("Failed to allocate %d MiB for VMM.\n",
			VMMSIZE_ALL >> 20);
		debug_sysmemmap_print ();
		panic("Failed to allocate %d MiB for VMM.\n",
			VMMSIZE_ALL >> 20);
	}
	printf ("%lld bytes (%lld MiB) RAM available.\n",
		memorysize, memorysize >> 20);
	printf ("VMM will use 0x%08X-0x%08X (%d MiB).\n", vmm_start_phys,
		vmm_start_phys + VMMSIZE_ALL, VMMSIZE_ALL >> 20);

	rm_init_resource(&vmm_mem_resource, vmm_start_phys,
			 vmm_start_phys + VMMSIZE_ALL - 1,
			 RESOURCE_TYPE_MEM,
			 MEM_TYPE_AVAILABLE, "vmm");
	err = rm_insert_resource(rm_find_resource(&mem_address_space, vmm_start_phys),
			      &vmm_mem_resource);
	if (err) {
		panic("Failed to insert the vmm memory to sysmemmap. err 0x%x",
		      err);
	}

	move_vmm ();
	mm_alloc_init ();
	for (i = 0; i < NUM_OF_PAGES; i++) {
		pagestruct[i].type = PAGE_TYPE_RESERVED;
		pagestruct[i].allocsize = 0;
		pagestruct[i].phys = vmm_start_phys + PAGESIZE * i;
		pagestruct[i].virt = VMM_START_VIRT + PAGESIZE * i;
	}
	for (i = 0; i < NUM_OF_PAGES; i++) {
		if ((u64)(long)head <= pagestruct[i].virt &&
		    pagestruct[i].virt < (u64)(long)end)
			continue;
		mm_page_free (&pagestruct[i]);
	}
	map_hphys ();
	vramwrite_mm_initialized();
}

/* get a physical address of a symbol sym */
phys_t
sym_to_phys (void *sym)
{
	return ((virt_t)sym) - 0x40000000 + vmm_start_phys;
}

bool
phys_in_vmm (phys_t phys)
{
	return (phys >= vmm_start_phys) &&
		(phys < vmm_start_phys + VMMSIZE_ALL);
}

/**********************************************************************/
/*** accessing page tables ***/

void
pmap_open_vmm (pmap_t *m, ulong cr3, int levels)
{
	m->levels = levels;
	m->readlevel = levels;
	m->curlevel = levels - 1;
	m->entry[levels] = (cr3 & ~PAGESIZE_MASK) | PDE_P_BIT;
	m->type = PMAP_TYPE_VMM;
}

void
pmap_open_guest (pmap_t *m, ulong cr3, int levels, bool atomic)
{
	m->levels = levels;
	m->readlevel = levels;
	m->curlevel = levels - 1;
	if (levels == 3)
		m->entry[levels] = (cr3 & (~0x1F | CR3_PWT_BIT | CR3_PCD_BIT))
			| PDE_P_BIT;
	else
		m->entry[levels] = (cr3 & (~PAGESIZE_MASK | CR3_PWT_BIT |
					   CR3_PCD_BIT)) | PDE_P_BIT;
	if (atomic)
		m->type = PMAP_TYPE_GUEST_ATOMIC;
	else
		m->type = PMAP_TYPE_GUEST;
}

void
pmap_close (pmap_t *m)
{
}

int
pmap_getreadlevel (pmap_t *m)
{
	return m->readlevel + 1;
}

void
pmap_setlevel (pmap_t *m, int level)
{
	m->curlevel = level - 1;
}

void
pmap_setvirt (pmap_t *m, virt_t virtaddr, int level)
{
	const u64 masks[3][4] = {
		{ 0xFFFFF000, 0xFFC00000, 0x00000000, 0x00000000, },
		{ 0xFFFFF000, 0xFFE00000, 0xC0000000, 0x00000000, },
		{ 0x0000FFFFFFFFF000ULL, 0x0000FFFFFFE00000ULL,
		  0x0000FFFFC0000000ULL, 0x0000FF8000000000ULL, }
	};
	u64 mask;

	pmap_setlevel (m, level);
	while (m->readlevel < m->levels) {
		mask = masks[m->levels - 2][m->readlevel];
		if ((m->curaddr & mask) == (virtaddr & mask))
			break;
		m->readlevel++;
	}
	m->curaddr = virtaddr;
}

static u64
pmap_rd32 (pmap_t *m, u64 phys, u32 attr)
{
	u32 r = 0;

	switch (m->type) {
	case PMAP_TYPE_VMM:
		r = *(u32 *)phys_to_virt (phys);
		break;
	case PMAP_TYPE_GUEST:
		read_gphys_l (phys, &r, attr);
		break;
	case PMAP_TYPE_GUEST_ATOMIC:
		cmpxchg_gphys_l (phys, &r, r, attr);
		break;
	}
	return r;
}

static u64
pmap_rd64 (pmap_t *m, u64 phys, u32 attr)
{
	u64 r = 0;

	switch (m->type) {
	case PMAP_TYPE_VMM:
		r = *(u64 *)phys_to_virt (phys);
		break;
	case PMAP_TYPE_GUEST:
		read_gphys_q (phys, &r, attr);
		break;
	case PMAP_TYPE_GUEST_ATOMIC:
		cmpxchg_gphys_q (phys, &r, r, attr);
		break;
	}
	return r;
}

static bool
pmap_wr32 (pmap_t *m, u64 phys, u32 attr, u64 oldentry, u64 *entry)
{
	u32 tmp;
	bool r = false;

	switch (m->type) {
	case PMAP_TYPE_VMM:
		*(u32 *)phys_to_virt (phys) = *entry;
		break;
	case PMAP_TYPE_GUEST:
		write_gphys_l (phys, *entry, attr);
		break;
	case PMAP_TYPE_GUEST_ATOMIC:
		tmp = oldentry;
		r = cmpxchg_gphys_l (phys, &tmp, *entry, attr);
		if (r)
			*entry = tmp;
		break;
	}
	return r;
}

static bool
pmap_wr64 (pmap_t *m, u64 phys, u32 attr, u64 oldentry, u64 *entry)
{
	bool r = false;

	switch (m->type) {
	case PMAP_TYPE_VMM:
		*(u64 *)phys_to_virt (phys) = *entry;
		break;
	case PMAP_TYPE_GUEST:
		write_gphys_q (phys, *entry, attr);
		break;
	case PMAP_TYPE_GUEST_ATOMIC:
		r = cmpxchg_gphys_q (phys, &oldentry, *entry, attr);
		if (r)
			*entry = oldentry;
		break;
	}
	return r;
}

u64
pmap_read (pmap_t *m)
{
	u64 tmp;
	u32 tblattr;

	while (m->readlevel > m->curlevel) {
		tmp = m->entry[m->readlevel];
		if (!(tmp & PDE_P_BIT))
			return 0;
		if (m->readlevel == 1 && (tmp & PDE_PS_BIT)) {
			if (m->curlevel == 0) {
				/*
				 * If large page, and requested level
				 * is 1 (m->curlevel == 0), return
				 * pseudo PTE.
				 */
				tmp &= ~PDE_PS_BIT;
				if (tmp & PDE_4M_PAT_BIT) {
					tmp |= PTE_PAT_BIT;
					tmp &= ~PDE_4M_PAT_BIT;
				}
				if (m->levels == 2) {
					tmp &= ~0x3FF000ULL;
					tmp |= (m->curaddr & 0x3FF000);
				} else {
					tmp &= ~0x1FF000ULL;
					tmp |= (m->curaddr & 0x1FF000);
				}
			}
			return tmp;
		}
		tblattr = tmp & (PDE_PWT_BIT | PDE_PCD_BIT);
		if (m->levels == 3 && m->readlevel == 3)
			tmp &= 0xFFFFFFE0;
		else
			tmp &= 0x0000FFFFFFFFF000ULL;
		m->readlevel--;
		if (m->levels == 2) {
			tmp |= (m->curaddr >> (10 + 10 * m->readlevel)) &
				0xFFC;
			m->entryaddr[m->readlevel] = tmp;
			m->entry[m->readlevel] = pmap_rd32 (m, tmp, tblattr);
		} else {
			tmp |= (m->curaddr >> (9 + 9 * m->readlevel)) &
				0xFF8;
			m->entryaddr[m->readlevel] = tmp;
			m->entry[m->readlevel] = pmap_rd64 (m, tmp, tblattr);
		}
	}
	return m->entry[m->curlevel];
}	

bool
pmap_write (pmap_t *m, u64 e, uint attrmask)
{
	uint attrdef = PTE_RW_BIT | PTE_US_BIT | PTE_A_BIT;
	u32 tblattr;
	bool fail;

	ASSERT (m->readlevel <= m->curlevel);
	if (m->levels == 3 && m->curlevel == 2)
		attrdef = 0;
	else if (m->curlevel == 0)
		attrdef |= PTE_D_BIT;
	e &= (~0xFFFULL) | attrmask;
	e |= attrdef & ~attrmask;
	tblattr = m->entry[m->curlevel + 1] & (PDE_PWT_BIT | PDE_PCD_BIT);
	if (m->levels == 2)
		fail = pmap_wr32 (m, m->entryaddr[m->curlevel], tblattr,
				  m->entry[m->curlevel], &e);
	else
		fail = pmap_wr64 (m, m->entryaddr[m->curlevel], tblattr,
				  m->entry[m->curlevel], &e);
	m->entry[m->curlevel] = e;
	if (fail)
		m->readlevel = m->curlevel;
	return fail;
}

void *
pmap_pointer (pmap_t *m)
{
	ASSERT (m->readlevel <= m->curlevel);
	ASSERT (m->type == PMAP_TYPE_VMM);
	return (void *)phys_to_virt (m->entryaddr[m->curlevel]);
}

void
pmap_clear (pmap_t *m)
{
	ASSERT (m->readlevel <= m->curlevel + 1);
	ASSERT (m->entry[m->curlevel + 1] & PDE_P_BIT);
	ASSERT (!(m->curlevel == 0 && (m->entry[1] & PDE_PS_BIT)));
	ASSERT (m->type == PMAP_TYPE_VMM);
	memset ((void *)phys_to_virt (m->entry[m->curlevel + 1] & ~0xFFF), 0,
		(m->levels == 3 && m->curlevel == 2) ? 8 * 4 : PAGESIZE);
	m->readlevel = m->curlevel + 1;
}

vmmerr_t
pmap_autoalloc (pmap_t *m)
{
	int level;
	void *tmp;
	phys_t phys;
	vmmerr_t err;

	ASSERT (m->type == PMAP_TYPE_VMM);
	level = m->curlevel;
	if (m->readlevel <= level)
		return VMMERR_SUCCESS;
	if (!(m->entry[m->readlevel] & PDE_P_BIT))
		goto readskip;
	for (;;) {
		pmap_read (m);
		if (m->readlevel <= level)
			return VMMERR_SUCCESS;
		ASSERT (!(m->entry[m->readlevel] & PDE_P_BIT));
	readskip:
		err = alloc_page (&tmp, &phys);
		if (err) {
			printf("pmap_autoalloc: Failed to allocate memory for a page table.");
			return VMMERR_NOMEM;
		}
		memset (tmp, 0, PAGESIZE);
		m->curlevel = m->readlevel;
		pmap_write (m, phys | PDE_P_BIT, PDE_P_BIT);
		m->curlevel = level;
	}
}

void
pmap_dump (pmap_t *m)
{
	printf ("entry[0]=0x%08llX ", m->entry[0]);
	printf ("entry[1]=0x%08llX ", m->entry[1]);
	printf ("entry[2]=0x%08llX\n", m->entry[2]);
	printf ("entry[3]=0x%08llX ", m->entry[3]);
	printf ("entry[4]=0x%08llX\n", m->entry[4]);
	printf ("entryaddr[0]=0x%08llX ", m->entryaddr[0]);
	printf ("entryaddr[1]=0x%08llX\n", m->entryaddr[1]);
	printf ("entryaddr[2]=0x%08llX ", m->entryaddr[2]);
	printf ("entryaddr[3]=0x%08llX\n", m->entryaddr[3]);
	printf ("curaddr=0x%08lX ", m->curaddr);
	printf ("curlevel=%d ", m->curlevel);
	printf ("readlevel=%d ", m->readlevel);
	printf ("levels=%d ", m->levels);
	printf ("type=%d\n", m->type);
}

/**********************************************************************/
/*** accessing memory ***/

static void *
mapped_hphys_addr (phys_t hphys, uint len)
{
#if NUM_OF_HPHYS_PAGES == 0
	return NULL;
#else
	if ((hphys >> PAGESIZE_SHIFT) >= NUM_OF_HPHYS_PAGES)
		return NULL;
	if (((hphys + len - 1) >> PAGESIZE_SHIFT) >= NUM_OF_HPHYS_PAGES)
		return NULL;
	return (void *)(virt_t)(HPHYS_ADDR + hphys);
#endif
}

static void *
mapped_gphys_addr(phys_t gphys, uint len, u32 flags)
{
#if NUM_OF_HPHYS_PAGES == 0
	return NULL;
#else
	phys_t p1, p2;
	phys_t hphys, hphys1, hphys2;

	hphys = current->vm->gmm.gp2hp(gphys);
	if (hphys == GMM_NO_MAPPING) {
		return NULL;
	}
	hphys1 = hphys & ~PAGESIZE_MASK;
	p1 = gphys & ~PAGESIZE_MASK;
	p2 = (gphys + len - 1) & ~PAGESIZE_MASK;
	while (p1 != p2) {
		p1 += PAGESIZE;
		hphys1 += PAGESIZE;
		if (hphys1 != (hphys2 = current->vm->gmm.gp2hp(p1))) {
			return NULL;
		}
		if (hphys2 == GMM_NO_MAPPING) {
			return NULL;
		}
	}
	return mapped_hphys_addr(hphys, len);
#endif
}

static void *
mapmem_alloc (pmap_t *m, uint offset, uint len)
{
	u64 pte;
	virt_t virt;
	uint n, i;
	int loopcount = 0;

	n = (offset + len + PAGESIZE_MASK) >> PAGESIZE_SHIFT;
	virt = mapmem_lastvirt;
retry:
	for (i = 0; i < n; i++) {
		virt += PAGESIZE;
		if (virt >= MAPMEM_ADDR_END) {
			virt = MAPMEM_ADDR_START;
			loopcount++;
			if (loopcount > 1) {
				/* Not enough virtual address space. */
				return NULL;
			}
			goto retry;
		}
		pmap_setvirt (m, virt, 1);
		pte = pmap_read (m);
		if (pte & PTE_P_BIT) {
			/* The virtual address is already used. */
			virt += PAGESIZE;
			goto retry;
		}
	}
	mapmem_lastvirt = virt;
	return (void *)(virt + offset);
}

u64
mm_cache_flag_to_pte_attr (int flags)
{
	switch (flags & MAPMEM_CACHE_MASK) {
	case MAPMEM_WB:
		return HOST_PTE_ATTR_WB;
	case MAPMEM_UC:
		return HOST_PTE_ATTR_UC;
	case MAPMEM_WUC:
		return HOST_PTE_ATTR_WUC;
	case MAPMEM_WC:
		return HOST_PTE_ATTR_WC;
	case MAPMEM_WT:
		return HOST_PTE_ATTR_WT;
	case MAPMEM_WP:
		return HOST_PTE_ATTR_WP;
	default:
		panic ("Unknown cashe flag %d",
		       flags & MAPMEM_CACHE_MASK);
	}
}

static int
mapmem_domap(pmap_t *m, void *virt_addr, int flags, phys_t phys_addr, uint len)
{
	virt_t virt;
	u64 pte;
	phys_t phys, hphys;
	uint i, num;
	vmmerr_t err;

	virt = (virt_t)virt_addr & ~PAGESIZE_MASK;
	phys = phys_addr & ~PAGESIZE_MASK;
	num = ((phys_addr & PAGESIZE_MASK) + len + PAGESIZE_MASK) >> PAGESIZE_SHIFT;
	for (i = 0; i < num; i++) {
		pmap_setvirt (m, virt, 1);
		err = pmap_autoalloc(m);
		if (err) {
			printf("Failed to create page table. cpu %d virt 0x%lx\n",
			       get_cpu_id(), virt);
			goto unmap;
		}
		if (flags & MAPMEM_GPHYS) {
			hphys = current->vm->gmm.gp2hp(phys);
			if (hphys == GMM_NO_MAPPING) {
				printf("Failed to map no gp2hp mapping area. cpu %d gphys 0x%llx\n",
				       get_cpu_id(), hphys);
				goto unmap;
			}
		} else {
			hphys = phys;
		}
		if ((pmap_read (m) & PTE_P_BIT) != 0) {
			printf("mapmem_domap: virt_addr %p, phys_addr 0x%llx, pte %llx, i %d\n",
			       virt_addr, phys_addr, pmap_read(m), i);
		}
		ASSERT ((pmap_read (m) & PTE_P_BIT) == 0);
		pte = (hphys & ~PAGESIZE_MASK) | PTE_P_BIT |
			mm_cache_flag_to_pte_attr (flags);
		if (flags & MAPMEM_WRITE) {
			pte |= PTE_RW_BIT;
		}
		pmap_write (m, pte, PTE_P_BIT | PTE_RW_BIT | PTE_PWT_BIT |
			PTE_PCD_BIT | PTE_PAT_BIT);
		asm_invlpg ((void *)(virt));
		virt += PAGESIZE;
		phys += PAGESIZE;
	}
	return 0;
unmap:
	virt = (virt_t)virt_addr & ~PAGESIZE_MASK;
	num = i;
	for (i = 0; i < num; i++) {
		pmap_setvirt(m, virt, 1);
		if (pmap_read(m) & PTE_P_BIT)
			pmap_write(m, 0, 0);
		asm_invlpg((void *)(virt));
		virt += PAGESIZE;
	}
	return -1;
}

void
unmapmem(void *virt_addr, uint len)
{
	pmap_t m;
	virt_t virt;
	ulong hostcr3;
	uint num, i;

	if ((virt_t)virt_addr < MAPMEM_ADDR_START ||
	    (virt_t)virt_addr >= MAPMEM_ADDR_END)
		return;

	virt = (virt_t)virt_addr & ~PAGESIZE_MASK;
	num = (((virt_t)virt_addr & PAGESIZE_MASK) + len + PAGESIZE_MASK) >> PAGESIZE_SHIFT;

	spinlock_lock(&mapmem_lock);
	asm_rdcr3(&hostcr3);
	pmap_open_vmm(&m, hostcr3, PMAP_LEVELS);
	for (i = 0; i < num; i++) {
		pmap_setvirt(&m, virt, 1);
		if (pmap_read(&m) & PTE_P_BIT)
			pmap_write(&m, 0, 0);
		asm_invlpg((void *)(virt));
		virt += PAGESIZE;
	}
	pmap_close(&m);
	spinlock_unlock(&mapmem_lock);
}

void *
mapmem(u32 flags, phys_t physaddr, uint len)
{
	void *virt;
	pmap_t m;
	ulong hostcr3;

	ASSERT (!(flags & ~MAPMEM_VALID_FLAG));

	if ((flags & MAPMEM_CACHE_MASK) != MAPMEM_WB)
		goto skip;
	if (flags & MAPMEM_GPHYS) {
		virt = mapped_gphys_addr(physaddr, len, flags);
	} else {
		virt = mapped_hphys_addr(physaddr, len);
	}
	if (virt) {
		return virt;
	}
skip:
	spinlock_lock(&mapmem_lock);
	asm_rdcr3(&hostcr3);
	pmap_open_vmm(&m, hostcr3, PMAP_LEVELS);
	virt = mapmem_alloc(&m, physaddr & PAGESIZE_MASK, len);
	if (virt == NULL) {
		goto out;
	}
	if (mapmem_domap(&m, virt, flags, physaddr, len)) {
		virt = NULL;
		goto out;
	}
out:
	pmap_close(&m);
	spinlock_unlock(&mapmem_lock);
	return virt;
}

void *
mapmem_hphys (phys_t physaddr, uint len, u32 flags)
{
	return mapmem (MAPMEM_HPHYS | flags, physaddr, len);
}

void *
mapmem_gphys (phys_t physaddr, uint len, u32 flags)
{
	return mapmem (MAPMEM_GPHYS | flags, physaddr, len);
}

INITFUNC ("global2", mm_init_global);
