1 files changed, 1269 insertions, 0 deletions
diff --git a/ntkrutils.h b/ntkrutils.h
new file mode 100644
index 0000000..bea0a30
--- /dev/null
+++ b/ntkrutils.h
@@ -0,0 +1,1269 @@
+/*
+ * Copyright 2019 Google LLC
+
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#pragma once
+#include <ntddk.h>
+#include <intrin.h>
+#include <gvm_types.h>
+#include <string.h>
+#include <dos.h>
+#include <linux/list.h>
+
+// APC definitions (undocumented)
+typedef enum _KAPC_ENVIRONMENT
+{
+	OriginalApcEnvironment,
+	AttachedApcEnvironment,
+	CurrentApcEnvironment,
+	InsertApcEnvironment
+} KAPC_ENVIRONMENT;
+
+typedef
+VOID
+(NTAPI *PKNORMAL_ROUTINE)(
+	_In_ PVOID NormalContext,
+	_In_ PVOID SystemArgument1,
+	_In_ PVOID SystemArgument2
+	);
+
+typedef
+VOID
+(NTAPI *PKKERNEL_ROUTINE)(
+	_In_ PKAPC Apc,
+	_Inout_ PKNORMAL_ROUTINE* NormalRoutine,
+	_Inout_ PVOID* NormalContext,
+	_Inout_ PVOID* SystemArgument1,
+	_Inout_ PVOID* SystemArgument2
+	);
+
+typedef
+VOID
+(NTAPI *PKRUNDOWN_ROUTINE) (
+	_In_ PKAPC Apc
+	);
+
+NTKERNELAPI
+VOID
+NTAPI
+KeInitializeApc(
+	_Out_ PRKAPC Apc,
+	_In_ PETHREAD Thread,
+	_In_ KAPC_ENVIRONMENT Environment,
+	_In_ PKKERNEL_ROUTINE KernelRoutine,
+	_In_opt_ PKRUNDOWN_ROUTINE RundownRoutine,
+	_In_opt_ PKNORMAL_ROUTINE NormalRoutine,
+	_In_opt_ KPROCESSOR_MODE ApcMode,
+	_In_opt_ PVOID NormalContext
+	);
+
+NTKERNELAPI
+BOOLEAN
+NTAPI
+KeInsertQueueApc(
+	_Inout_ PRKAPC Apc,
+	_In_opt_ PVOID SystemArgument1,
+	_In_opt_ PVOID SystemArgument2,
+	_In_ KPRIORITY Increment
+	);
+
+// MSDN recommends the string in reverse order
+#define GVM_POOL_TAG '_MVG'
+
+// cpuid
+static __forceinline void cpuid(unsigned int op,
+	unsigned int *eax,
+	unsigned int *ebx,
+	unsigned int *ecx,
+	unsigned int *edx)
+{
+	int cpuInfo[4];
+	__cpuid(cpuInfo, op);
+	*eax = cpuInfo[0];
+	*ebx = cpuInfo[1];
+	*ecx = cpuInfo[2];
+	*edx = cpuInfo[3];
+}
+
+static __forceinline void cpuid_count(unsigned int op,
+	unsigned int count,
+	unsigned int *eax,
+	unsigned int *ebx,
+	unsigned int *ecx,
+	unsigned int *edx)
+{
+	int cpuInfo[4];
+	__cpuidex(cpuInfo, op, count);
+	*eax = cpuInfo[0];
+	*ebx = cpuInfo[1];
+	*ecx = cpuInfo[2];
+	*edx = cpuInfo[3];
+}
+
+static __inline unsigned int cpuid_eax(unsigned int op)
+{
+        unsigned int eax, ebx, ecx, edx;
+
+        cpuid(op, &eax, &ebx, &ecx, &edx);
+
+        return eax;
+}
+
+static __inline unsigned int cpuid_ebx(unsigned int op)
+{
+        unsigned int eax, ebx, ecx, edx;
+
+        cpuid(op, &eax, &ebx, &ecx, &edx);
+
+        return ebx;
+}
+
+static __inline unsigned int cpuid_ecx(unsigned int op)
+{
+        unsigned int eax, ebx, ecx, edx;
+
+        cpuid(op, &eax, &ebx, &ecx, &edx);
+
+        return ecx;
+}
+
+static __inline unsigned int cpuid_edx(unsigned int op)
+{
+        unsigned int eax, ebx, ecx, edx;
+
+        cpuid(op, &eax, &ebx, &ecx, &edx);
+
+        return edx;
+}
+
+static __forceinline unsigned int x86_family(unsigned int sig)
+{
+	unsigned int x86;
+
+	x86 = (sig >> 8) & 0xf;
+
+	if (x86 == 0xf)
+		x86 += (sig >> 20) & 0xff;
+
+	return x86;
+}
+
+static __forceinline unsigned int x86_cpuid_family(void)
+{
+	return x86_family(cpuid_eax(1));
+}
+
+static __forceinline unsigned int x86_model(unsigned int sig)
+{
+	unsigned int fam, model;
+
+	fam = x86_family(sig);
+
+	model = (sig >> 4) & 0xf;
+
+	if (fam >= 0x6)
+		model += ((sig >> 16) & 0xf) << 4;
+
+	return model;
+}
+
+static __forceinline unsigned int x86_cpuid_model(void)
+{
+	return x86_model(cpuid_eax(1));
+}
+
+static __forceinline unsigned int x86_stepping(unsigned int sig)
+{
+	return sig & 0xf;
+}
+
+/*
+ * cpu_has_vmx
+ */
+static __inline int cpu_has_vmx(void)
+{
+	size_t ecx = cpuid_ecx(1);
+	return test_bit(5, &ecx); /* CPUID.1:ECX.VMX[bit 5] -> VT */
+}
+
+/*
+ * Memory Barriers
+ */
+#define smp_mb() _mm_mfence()
+#define smp_rmb() _mm_lfence()
+#define smp_wmb() _mm_sfence()
+#define mb() _mm_mfence()
+#define rmb() _mm_lfence()
+#define wmb() _mm_sfence()
+#define smp_mb__after_atomic() _mm_mfence();
+
+// smp_processor_id
+static __inline unsigned int raw_smp_processor_id(void)
+{
+	return KeGetCurrentProcessorNumberEx(NULL);
+}
+
+static __inline unsigned int smp_processor_id(void)
+{
+	return raw_smp_processor_id();
+}
+
+/*
+ * cpu_get/put for ensure vmx safety
+ */
+
+struct cpu_getput_cxt {
+	long count;
+	KIRQL irql;
+};
+
+DECLARE_PER_CPU(struct cpu_getput_cxt, cpu_getput_cxt);
+
+static __inline unsigned int get_cpu()
+{
+	KIRQL oldIrql = KeRaiseIrqlToDpcLevel();
+	unsigned int cpu = smp_processor_id();
+	long newcount = InterlockedIncrement(&per_cpu(cpu_getput_cxt, cpu).count);
+
+	if (newcount == 1)
+		per_cpu(cpu_getput_cxt, cpu).irql = oldIrql;
+
+	return cpu;
+}
+
+static __inline void put_cpu()
+{
+	unsigned int cpu = smp_processor_id();
+	long newcount = InterlockedDecrement(&per_cpu(cpu_getput_cxt, cpu).count);
+	BUG_ON(newcount < 0);
+	if (newcount == 0) {
+		KIRQL oldIrql = per_cpu(cpu_getput_cxt, cpu).irql;
+		per_cpu(cpu_getput_cxt, cpu).irql = 0;
+		KeLowerIrql(oldIrql);
+	}
+}
+
+#define preempt_disable() KeRaiseIrqlToDpcLevel()
+#define preempt_enable() KeLowerIrql(PASSIVE_LEVEL)
+
+// msr access
+static _forceinline void wrmsrl(unsigned int msr, u64 val)
+{
+	__writemsr(msr, val);
+}
+
+extern struct cpumask *cpu_online_mask;
+extern unsigned int cpu_online_count;
+
+/*
+ * SpinLock Implementation
+ * Compared with Windows Native Support, this implementation does not raise IRQL to DPC level.
+ * KVM has nasty lock nesting that might work on Linux but not directly on Windows.
+ */
+struct spin_lock {
+	volatile LONG lock;
+};
+
+typedef struct spin_lock spinlock_t;
+typedef struct spin_lock raw_spinlock_t;
+
+#define DEFINE_SPINLOCK(x) spinlock_t x
+#define DECLARE_SPINLOCK(x) extern spinlock_t x
+#define DEFINE_RAW_SPINLOCK(x) spinlock_t x
+#define DECLARE_RAW_SPINLOCK(x) extern spinlock_t x
+
+static __forceinline void spin_lock_init(spinlock_t *lock)
+{
+	lock->lock = 0;
+}
+
+extern __forceinline void __spin_lock(spinlock_t *lock);
+static __forceinline void spin_lock(spinlock_t *lock)
+{
+	__spin_lock(lock);
+}
+
+static __forceinline void spin_unlock(spinlock_t *lock)
+{
+	lock->lock = 0;
+}
+
+static __forceinline void raw_spin_lock_init(spinlock_t *lock)
+{
+	spin_lock_init(lock);
+}
+
+static __forceinline void raw_spin_lock(spinlock_t *lock)
+{
+	spin_lock(lock);
+}
+
+static __forceinline void raw_spin_unlock(spinlock_t *lock)
+{
+	spin_unlock(lock);
+}
+
+/*
+ Mutex Windows Implementation
+ */
+struct mutex
+{
+	FAST_MUTEX mutex;
+};
+typedef struct mutex mutex;
+
+static __forceinline void mutex_init(struct mutex *lock)
+{
+	ExInitializeFastMutex(&lock->mutex);
+}
+
+static __forceinline void mutex_lock(struct mutex *lock)
+{
+	ExAcquireFastMutex(&lock->mutex);
+}
+
+static __forceinline void mutex_unlock(struct mutex *lock)
+{
+	ExReleaseFastMutex(&lock->mutex);
+}
+
+#define __KERNEL_CS 0x10
+#define __KERNEL_DS 0x28
+#define __KERNEL_SS 0x18
+#define __KERNEL_FS 0x53
+
+/*
+ MSR access
+ */
+static __inline void __rdmsr(u32 index, u32 *low, u32 *high)
+{
+	u64 val = __readmsr(index);
+	*low = (u32)val;
+	*high = (u32)(val >> 32);
+}
+
+static __inline int __rdmsr_safe(u32 index, u32 *low, u32 *high)
+{
+	u64 val = 0;
+	__try {
+		val = __readmsr(index);
+		*low = (u32)val;
+		*high = (u32)(val >> 32);
+	} __except(EXCEPTION_EXECUTE_HANDLER) {
+		return -1;
+	}
+	return 0;
+}
+
+static __inline int __rdmsrl_safe(u32 index, u64 *val)
+{
+	__try {
+		*val = __readmsr(index);
+	} __except(EXCEPTION_EXECUTE_HANDLER) {
+		return -1;
+	}
+	return 0;
+}
+
+static __inline u64 native_read_msr_safe(u32 index, int *err)
+{
+	u64 value = 0;
+	*err = __rdmsrl_safe(index, &value);
+	return value;
+}
+
+static __inline int __wrmsr_safe(u32 index, u32 low, u32 high)
+{
+	u64 val = (((u64)high) << 32) | low;
+	__try {
+		__writemsr(index, val);
+	} __except(EXCEPTION_EXECUTE_HANDLER) {
+		return -1;
+	}
+	return 0;
+}
+
+static __inline int __wrmsrl_safe(u32 index, u64 val)
+{
+	__try {
+		__writemsr(index, val);
+	} __except(EXCEPTION_EXECUTE_HANDLER) {
+		return -1;
+	}
+	return 0;
+}
+
+static __inline int native_write_msr_safe(u32 index, u32 low, u32 high)
+{
+	return __wrmsr_safe(index, low, high);
+}
+
+#define rdmsr(a, b, c) __rdmsr(a, &b, &c)
+#define rdmsr_safe(a, b, c)	__rdmsr_safe(a, b, c)
+#define rdmsrl(a, b) b=__readmsr(a)
+#define rdmsrl_safe(a, b) __rdmsrl_safe(a, b)
+
+#define wrmsr(a,b) __writemsr(a,b)
+#define wrmsrl(a,b) __writemsr(a,b)
+#define wrmsr_safe(a, b, c) __wrmsr_safe(a, b, c)
+#define wrmsrl_safe(a,b) __wrmsrl_safe(a,b)
+
+/*
+ Local Irq Disable
+ */
+static __forceinline void local_irq_disable(void)
+{
+	_disable();
+}
+
+static __forceinline void local_irq_enable(void)
+{
+	_enable();
+}
+
+/*
+ Timer Stuffs
+ */
+
+#define MSEC_PER_SEC    1000L
+#define USEC_PER_MSEC   1000L
+#define NSEC_PER_USEC   1000L
+#define NSEC_PER_MSEC   1000000L
+#define USEC_PER_SEC    1000000L
+#define NSEC_PER_SEC    1000000000L
+#define FSEC_PER_SEC    1000000000000000LL
+
+union ktime 
+{
+	s64	tv64;
+	struct {
+	s32	nsec, sec;
+	} tv;
+};
+
+typedef union ktime ktime_t;
+
+#define KTIME_MAX			((s64)~((u64)1 << 63))
+#define KTIME_SEC_MAX			LONG_MAX
+
+#pragma warning(disable : 4204)
+static __forceinline ktime_t ktime_set(const long secs, const size_t nsecs)
+{
+#if 0
+	if (unlikely(secs >= KTIME_SEC_MAX))
+		return (ktime_t){ .tv64 = KTIME_MAX };
+#endif
+	return (ktime_t) { .tv64 = (s64)secs * NSEC_PER_SEC + (s64)nsecs };
+}
+
+/* Subtract two ktime_t variables. rem = lhs -rhs: */
+#define ktime_sub(lhs, rhs) \
+		(ktime_t){ .tv64 = (lhs).tv64 - (rhs).tv64 }
+
+/* Add two ktime_t variables. res = lhs + rhs: */
+#define ktime_add(lhs, rhs) \
+		(ktime_t){ .tv64 = (lhs).tv64 + (rhs).tv64 }
+
+/*
+ * Add a ktime_t variable and a scalar nanosecond value.
+ * res = kt + nsval:
+ */
+#define ktime_add_ns(kt, nsval) \
+		(ktime_t){ .tv64 = (kt).tv64 + (nsval) }
+
+/*
+ * Subtract a scalar nanosecod from a ktime_t variable
+ * res = kt - nsval:
+ */
+#define ktime_sub_ns(kt, nsval) \
+		(ktime_t){ .tv64 = (kt).tv64 - (nsval) }
+
+
+/* Map the ktime_t to timespec conversion to ns_to_timespec function */
+#define ktime_to_timespec(kt)		ns_to_timespec((kt).tv64)
+
+/* Map the ktime_t to timeval conversion to ns_to_timeval function */
+#define ktime_to_timeval(kt)		ns_to_timeval((kt).tv64)
+
+/* Convert ktime_t to nanoseconds - NOP in the scalar storage format: */
+#define ktime_to_ns(kt)			((kt).tv64)
+
+static __forceinline int ktime_equal(const ktime_t cmp1, const ktime_t cmp2)
+{
+	return cmp1.tv64 == cmp2.tv64;
+}
+
+/**
+ * ktime_compare - Compares two ktime_t variables for less, greater or equal
+ * @cmp1:	comparable1
+ * @cmp2:	comparable2
+ *
+ * Returns ...
+ *   cmp1  < cmp2: return <0
+ *   cmp1 == cmp2: return 0
+ *   cmp1  > cmp2: return >0
+ */
+static __forceinline int ktime_compare(const ktime_t cmp1, const ktime_t cmp2)
+{
+	if (cmp1.tv64 < cmp2.tv64)
+		return -1;
+	if (cmp1.tv64 > cmp2.tv64)
+		return 1;
+	return 0;
+}
+
+static __forceinline ktime_t ktime_add_us(const ktime_t kt, const u64 usec)
+{
+	return ktime_add_ns(kt, usec * 1000);
+}
+
+static __forceinline ktime_t ktime_sub_us(const ktime_t kt, const u64 usec)
+{
+	return ktime_sub_ns(kt, usec * 1000);
+}
+
+static __forceinline ktime_t ns_to_ktime(u64 ns)
+{
+	static const ktime_t ktime_zero = { .tv64 = 0 };
+	return ktime_add_ns(ktime_zero, ns);
+}
+
+static __forceinline ktime_t ktime_get(void)
+{
+	s64 nsecs = 0;
+	LARGE_INTEGER time;
+	KeQuerySystemTime(&time);
+	nsecs = time.QuadPart;
+	nsecs *= 100;
+	
+	return (ktime_t){.tv64 = nsecs};
+}
+typedef size_t clockid_t;
+#define CLOCK_REALTIME			0
+#define CLOCK_MONOTONIC			1
+#define CLOCK_PROCESS_CPUTIME_ID	2
+#define CLOCK_THREAD_CPUTIME_ID		3
+#define CLOCK_MONOTONIC_RAW		4
+#define CLOCK_REALTIME_COARSE		5
+#define CLOCK_MONOTONIC_COARSE		6
+#define CLOCK_BOOTTIME			7
+#define CLOCK_REALTIME_ALARM		8
+#define CLOCK_BOOTTIME_ALARM		9
+
+enum hrtimer_mode 
+{
+	HRTIMER_MODE_ABS = 0x0,		/* Time value is absolute */
+	HRTIMER_MODE_REL = 0x1,		/* Time value is relative to now */
+	HRTIMER_MODE_PINNED = 0x02,	/* Timer is bound to CPU */
+	HRTIMER_MODE_ABS_PINNED = 0x02,
+	HRTIMER_MODE_REL_PINNED = 0x03,
+};
+
+enum hrtimer_restart 
+{
+	HRTIMER_NORESTART,	/* Timer is not restarted */
+	HRTIMER_RESTART,	/* Timer must be restarted */
+};
+
+struct timerqueue_node 
+{
+	ktime_t expires;
+};
+
+struct hrtimer_clock_base 
+{
+	int			index;
+	ktime_t			resolution;
+	ktime_t			(*get_time)(void);
+	ktime_t			softirq_time;
+	ktime_t			offset;
+};
+
+struct hrtimer 
+{
+	struct timerqueue_node		node;
+	ktime_t				_softexpires;
+	enum hrtimer_restart		(*function)(struct hrtimer *);
+	struct hrtimer_clock_base	*base;
+	size_t			state;
+	KTIMER                  ktimer;
+	KDPC                    kdpc;
+	LARGE_INTEGER           due_time;
+	struct hrtimer_clock_base	base_hack;
+};
+
+void hrtimer_init(struct hrtimer *timer, clockid_t clock_id, enum hrtimer_mode mode);
+int hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode);
+int hrtimer_cancel(struct hrtimer *timer);
+int hrtimer_restart(struct hrtimer* timer);
+
+static __forceinline void hrtimer_add_expires_ns(struct hrtimer *timer, u64 delta)
+{
+	timer->node.expires = ktime_add_ns(timer->node.expires, delta);
+}
+
+static __forceinline ktime_t hrtimer_get_expires(struct hrtimer *timer)
+{
+	return timer->node.expires;
+}
+
+static __forceinline u64 hrtimer_get_expires_ns(struct hrtimer *timer)
+{
+	return ktime_to_ns(timer->node.expires);
+}
+
+static __forceinline void hrtimer_start_expires(struct hrtimer *timer, int mode)
+{
+	hrtimer_start(timer, timer->node.expires, mode);
+}
+
+static __forceinline ktime_t hrtimer_expires_remaining(const struct hrtimer *timer)
+{
+    return ktime_sub(timer->node.expires, timer->base->get_time());
+}
+
+static __forceinline ktime_t hrtimer_get_remaining(const struct hrtimer *timer)
+{
+	ktime_t rem;
+	rem = hrtimer_expires_remaining(timer);
+	return rem;
+}
+
+/*
+ Memory Management Stuffs
+ */
+
+#define BIT(nr) ((size_t)(1) << (nr))
+#define GFP_KERNEL   BIT(0)
+#define GFP_ATOMIC   BIT(1)
+#define __GFP_ZERO   BIT(3)
+#define GFP_UNALLOC  BIT(5)
+
+ /*
+ * Address types:
+ *
+ *  gva - guest virtual address
+ *  gpa - guest physical address
+ *  gfn - guest frame number
+ *  hva - host virtual address
+ *  hpa - host physical address
+ *  hfn - host frame number
+ */
+
+typedef size_t		   gva_t;
+typedef u64            gpa_t;
+typedef u64            gfn_t;
+typedef u64            phys_addr_t;
+
+typedef size_t		   hva_t;
+typedef u64            hpa_t;
+typedef u64            hfn_t;
+
+typedef hfn_t pfn_t;
+
+typedef struct page
+{
+	void* hva;
+	void* kmap_hva;
+	size_t __private;
+	hpa_t hpa;
+	pfn_t pfn;
+	size_t gfp_mask;
+	PEPROCESS proc;
+}page;
+
+extern u64 max_pagen;
+extern struct page** pglist;
+DECLARE_RAW_SPINLOCK(global_page_lock);
+
+#define page_private(page)			((page)->__private)
+#define set_page_private(page, v)	((page)->__private = (v))
+
+#define __free_page(page) __free_pages((page), 0)
+#define free_page(addr) free_pages((addr), 0)
+
+#define clear_page(page)	memset((page), 0, PAGE_SIZE)
+
+#define virt_to_page(kaddr)	pfn_to_page((__pa(kaddr) >> PAGE_SHIFT))
+
+
+static __inline void *kmalloc(size_t size, size_t flags)
+{
+	void* ret = NULL;
+	int zero = 0;
+
+	if (flags & __GFP_ZERO)
+		zero = 1;
+
+	ret = ExAllocatePoolWithTag(NonPagedPool, size, GVM_POOL_TAG);
+
+	if(ret && zero)
+	{
+		memset(ret, 0, size);
+	}
+	return ret;
+}
+
+static __inline void *kzalloc(size_t size, size_t flags)
+{
+	return kmalloc(size, flags | __GFP_ZERO);
+}
+
+static __inline void kfree(void* hva)
+{
+	if (!hva)
+		return;
+	ExFreePoolWithTag(hva, GVM_POOL_TAG);
+}
+
+static __inline void *vmalloc(size_t size)
+{
+	return ExAllocatePoolWithTag(NonPagedPool, size, GVM_POOL_TAG);
+}
+
+static __inline void vfree(void* hva)
+{
+	if (!hva)
+		return;
+	ExFreePoolWithTag(hva, GVM_POOL_TAG);
+}
+
+static __inline void *vzalloc(size_t size)
+{
+	void *addr = vmalloc(size);
+	if (addr)
+	{
+		memset(addr, 0, size);
+	}
+	return addr;
+}
+
+static __inline void *kmalloc_fast(size_t size, size_t flags)
+{
+	return kmalloc(size, flags);
+}
+
+static __inline void *kzalloc_fast(size_t size, size_t flags)
+{
+	return kmalloc_fast(size, flags | __GFP_ZERO);
+}
+
+static __inline void kfree_fast(void* hva)
+{
+	if (!hva)
+		return;
+	ExFreePoolWithTag(hva, GVM_POOL_TAG);
+}
+
+#define kvfree kfree_fast
+
+#define VERIFY_READ		0
+#define VERIFY_WRITE	1
+
+static __inline pfn_t page_to_pfn(struct page* page)
+{
+	return page->pfn;
+}
+
+static __inline void* page_to_hva(struct page* page)
+{
+	return page->hva;
+}
+
+static __inline hpa_t page_to_phys(struct page* page)
+{
+	return page->hpa;
+}
+
+static __inline hpa_t mdl_to_phys(PMDL mdl)
+{
+	return (hpa_t)MmGetPhysicalAddress(mdl->StartVa).QuadPart;
+}
+
+static __inline struct page* pfn_to_page(pfn_t pfn)
+{
+	return pglist[pfn];
+}
+
+static __inline hpa_t __pa(void* va)
+{
+	PHYSICAL_ADDRESS addr_phys;
+	addr_phys = MmGetPhysicalAddress(va);
+	return (hpa_t)(addr_phys.QuadPart);
+}
+
+static __inline void* __va(hpa_t pa)
+{
+	void* ret = 0;
+	ret = page_to_hva(pfn_to_page(pa >> PAGE_SHIFT));
+	if(!ret)
+	{
+		printk("vmmr0: __va: invalid hpa %p\n", pa);
+	}
+	return ret;
+}
+
+static __inline struct page *alloc_page(unsigned int gfp_mask)
+{
+	void* page_hva = NULL;
+	PHYSICAL_ADDRESS pageaddr_phys;
+	int zero = 0;
+	struct page* page = ExAllocatePoolWithTag(NonPagedPool,
+						  sizeof(*page),
+						  GVM_POOL_TAG);
+	if(!page)
+		goto out_error;
+
+	page_hva = ExAllocatePoolWithTag(NonPagedPool, PAGE_SIZE, GVM_POOL_TAG);
+	if(!page_hva)
+		goto out_error_free;
+
+	if (gfp_mask & __GFP_ZERO)
+		zero = 0;
+
+	ASSERT(!((size_t)page_hva & 0xfffull));
+
+	if(zero)
+		memset(page_hva, 0, PAGE_SIZE);
+
+	pageaddr_phys = MmGetPhysicalAddress(page_hva);
+	page->hpa = pageaddr_phys.QuadPart;
+	page->pfn = page->hpa >> PAGE_SHIFT;
+	page->hva = page_hva;
+	page->gfp_mask = gfp_mask;
+	page->proc = IoGetCurrentProcess();
+	raw_spin_lock(&global_page_lock);
+	pglist[page->pfn] = page;
+	raw_spin_unlock(&global_page_lock);
+	return page;
+
+ out_error_free:
+	ExFreePoolWithTag(page, GVM_POOL_TAG);
+ out_error:
+	return 0;
+}
+
+static __inline void __free_pages(struct page* page, unsigned int order)
+{
+	ExFreePoolWithTag(page->hva, GVM_POOL_TAG);
+
+	raw_spin_lock(&global_page_lock);
+	pglist[page->pfn] = 0;
+	raw_spin_unlock(&global_page_lock);
+
+	ExFreePoolWithTag(page, GVM_POOL_TAG);
+}
+
+static __inline void free_pages(size_t addr, unsigned int order)
+{
+	if (addr != 0)
+	{
+		__free_pages(virt_to_page((void *)addr), order);
+	}
+}
+
+static __inline void* kmap(PMDL mdl)
+{
+
+	if (!mdl)
+		return NULL;
+
+	return MmGetSystemAddressForMdlSafe(mdl, NormalPagePriority);
+}
+
+static __inline void kunmap(PMDL mdl)
+{
+}
+
+static __inline void* page_address(struct page* page)
+{
+	BUG_ON(!page->hva);
+	return page->hva;
+}
+
+static __inline void* get_zeroed_page(unsigned int gfp_mask)
+{
+	struct page* page = alloc_page(gfp_mask);
+	memset(page->hva, 0, PAGE_SIZE);
+	return page->hva;
+}
+
+static __inline size_t __get_free_page(unsigned int gfp_mask)
+{
+	struct page *page;
+	page = alloc_page(gfp_mask);
+	if (!page)
+		return 0;
+	return (size_t) page_address(page);
+}
+
+static __inline int get_user_pages_fast(size_t start, int nr_pages, int write,
+			PMDL *mdl)
+{
+	PMDL _mdl;
+
+	start &= PAGE_MASK;
+	_mdl = IoAllocateMdl((void *)start, nr_pages * PAGE_SIZE,
+			FALSE, FALSE, NULL);
+	if (!_mdl)
+		return 0;
+
+	MmProbeAndLockPages(_mdl, KernelMode, IoWriteAccess);
+	*mdl = _mdl;
+
+	return nr_pages;
+}
+
+static __inline void kvm_release_page(PMDL mdl)
+{
+	if (!mdl)
+		return;
+
+	MmUnlockPages(mdl);
+	IoFreeMdl(mdl);
+}
+
+/* We actually did not copy from *user* here. This function in kvm is used to
+ * ioctl parameters. On Windows, we always use buffered io for device control.
+ * Thus the address supplied to copy_from_user is address in kernel space.
+ * Simple keep the function name here.
+ * __copy_from/to_user is really copying from user space.
+ */
+static __inline size_t copy_from_user(void *dst, const void *src, size_t size)
+{
+	memcpy(dst, src, size);
+	return 0;
+}
+
+static __inline size_t __copy_user(void *dst, const void *src, size_t size,
+	       int from)
+{
+	PMDL lock_mdl;
+	HANDLE handle;
+
+	lock_mdl = IoAllocateMdl(from? src : dst, size, FALSE, FALSE, NULL);
+	if (!lock_mdl)
+		return size;
+	MmProbeAndLockPages(lock_mdl, UserMode, IoWriteAccess);
+	handle = MmSecureVirtualMemory(from? src : dst, size, PAGE_READWRITE);
+	if (!handle)
+		return size;
+	memcpy(dst, src, size);
+	MmUnsecureVirtualMemory(handle);
+	MmUnlockPages(lock_mdl);
+	IoFreeMdl(lock_mdl);
+	return 0;
+}
+
+static __inline size_t __copy_to_user(void *dst, const void *src, size_t size)
+{
+	return __copy_user(dst, src, size, 0);
+}
+
+static __inline size_t __copy_from_user(void *dst, const void *src, size_t size)
+{
+	return __copy_user(dst, src, size, 1);
+}
+
+static __inline void *kmap_atomic(PMDL mdl)
+{
+	return kmap(mdl);
+}
+
+static __inline void kunmap_atomic(PMDL mdl)
+{
+	kunmap(mdl);
+}
+
+static __inline void *memdup_user(const void *user, size_t size)
+{
+	void *buf = kzalloc(size, GFP_KERNEL);
+
+	if (!buf)
+		return ERR_PTR(-ENOMEM);
+	if (copy_from_user(buf, user, size))
+		return ERR_PTR(-EFAULT);
+	return buf;
+}
+
+/*
+ TSC
+ */
+static __forceinline u64 rdtsc(void)
+{
+	return __rdtsc();
+}
+
+static __forceinline int check_tsc_unstable(void)
+{
+	return 0;
+}
+
+static __forceinline int mark_tsc_unstable(void)
+{
+	return 0;
+}
+
+
+/*
+ File
+ */
+struct file {
+	void *private_data;
+};
+
+/*
+Atomic Operations
+*/
+typedef long atomic_t;
+#define ATOMIC_INIT(n) (n)
+static __forceinline void atomic_inc(atomic_t *v)
+{
+	InterlockedIncrement(v);
+}
+
+static __forceinline void atomic_dec(atomic_t *v)
+{
+	InterlockedDecrement(v);
+}
+
+static __forceinline int atomic_dec_and_test(atomic_t *v)
+{
+	return !InterlockedDecrement(v);
+}
+
+static __forceinline int atomic_xchg(atomic_t *v, int val)
+{
+	return InterlockedExchange(v, val);
+}
+
+extern u8 xchg8(u8 *a, u8 b);
+extern u16 xchg16(u16 *a, u16 b);
+#define xchg32(a, b) InterlockedExchange((LONG *)a, b)
+#define xchg64(a, b) InterlockedExchange64((LONG64 *)a, b)
+extern u8 cmpxchg8(u8 *a, u8 b, u8 c);
+extern u16 cmpxchg16(u16 *a, u16 b, u16 c);
+#define cmpxchg32(a, b, c) InterlockedCompareExchange((LONG *)a, c, b)
+#define cmpxchg64(a, b, c) InterlockedCompareExchange64((LONG64 *)a, c, b)
+
+#define xchg(a, b) ((sizeof(*a) == 8)? xchg64((u64 *)a, b) :  \
+					  ((sizeof(*a) == 4)? xchg32((u32 *)a, b) :  \
+					  ((sizeof(*a) == 2)? xchg16((u16 *)a, b) :  \
+					  ((sizeof(*a) == 1)? xchg8((u8 *)a, b) : 0))))
+#define cmpxchg(a, b, c) ((sizeof(*a) == 8)? cmpxchg64((u64 *)a, b, c) :  \
+							((sizeof(*a) == 4)? cmpxchg32((u32 *)a, b, c) :  \
+							((sizeof(*a) == 2)? cmpxchg16((u16 *)a, b, c) :  \
+							((sizeof(*a) == 1)? cmpxchg8((u8 *)a, b, c) : 0))))
+
+#define atomic_cmpxchg(a, b, c) cmpxchg(a, b, c)
+
+static __forceinline int atomic_dec_if_positive(atomic_t *v)
+{
+	int c, old, dec;
+	c = atomic_read(v);
+
+	for (;;) {
+		dec = c - 1;
+		if (unlikely(dec < 0))
+			break;
+		old = atomic_cmpxchg((v), c, dec);
+		if (likely(old == c))
+			break;
+		c = old;
+	}
+	return dec;
+}
+
+#define smp_store_mb(var, value)  do { (void)xchg(&var, value); } while (0)
+#define smp_store_release(p, v) \
+do {							\
+	smp_mb();					\
+	*p = v;						\
+} while (0)
+
+
+/*
+ cpumask
+ */
+static __inline bool zalloc_cpumask_var(cpumask_var_t *mask, int flags)
+{
+	*mask = NULL;
+	*mask = kmalloc(sizeof(cpumask_t), flags | __GFP_ZERO);
+	return !!(*mask);
+}
+static __inline void free_cpumask_var(cpumask_var_t mask)
+{
+	kfree(mask);
+}
+
+/*
+ vm_mmap/unmap
+ */
+#define PROT_READ       0x1             /* page can be read */
+#define PROT_WRITE      0x2             /* page can be written */
+#define PROT_EXEC       0x4             /* page can be executed */
+#define PROT_SEM        0x8             /* page may be used for atomic ops */
+#define PROT_NONE       0x0             /* page can not be accessed */
+#define PROT_GROWSDOWN  0x01000000      /* mprotect flag: extend change to start of growsdown vma */
+#define PROT_GROWSUP    0x02000000      /* mprotect flag: extend change to end of growsup vma */
+
+#define MAP_SHARED      0x01            /* Share changes */
+#define MAP_PRIVATE     0x02            /* Changes are private */
+#define MAP_TYPE        0x0f            /* Mask for type of mapping */
+#define MAP_FIXED       0x10            /* Interpret addr exactly */
+#define MAP_ANONYMOUS   0x20            /* don't use a file */
+#define MAP_UNINITIALIZED 0x0           /* Don't support this flag */
+
+typedef struct gvm_mmap_node
+{
+	PMDL pMDL;
+	PVOID pMem;
+	PVOID UserVA;
+	struct list_head list;
+}gvm_mmap_node;
+
+extern struct list_head gvm_mmap_list;
+
+extern size_t vm_mmap(struct file *file, size_t addr,
+	size_t len, size_t prot, size_t flag, size_t offset);
+extern size_t __vm_mmap(struct file *file, size_t addr,
+	size_t len, size_t prot, size_t flag, size_t offset, size_t keva);
+extern int vm_munmap(size_t start, size_t len);
+extern int __vm_munmap(size_t start, size_t len, bool freepage);
+
+/*
+ smp_call_function
+ */
+extern int smp_call_function_single(int cpu, void(*func)(void *info), void *info, int wait);
+extern int smp_call_function_many(cpumask_var_t mask, void(*func) (void *info), void *info, int wait);
+extern void smp_send_reschedule(int cpu);
+
+/*
+ * srcu tranlation to windows ERESOURCE
+ */
+struct srcu_struct {
+	ERESOURCE eres;
+};
+
+static __inline int srcu_read_lock(struct srcu_struct *sp)
+{
+	ExAcquireResourceSharedLite(&sp->eres, true);
+	return 0;
+}
+
+static __inline void __srcu_read_unlock(struct srcu_struct *sp)
+{
+	ExReleaseResourceLite(&sp->eres);
+}
+#define srcu_read_unlock(sp, idx) __srcu_read_unlock(sp)
+
+static __inline void *srcu_dereference(void *p, struct srcu_struct *sp)
+{
+	return p;
+}
+
+static __inline void synchronize_srcu_expedited(struct srcu_struct *sp)
+{
+	ExAcquireResourceExclusiveLite(&sp->eres, true);
+	ExReleaseResourceLite(&sp->eres);
+}
+
+#define synchronize_srcu(srcu) synchronize_srcu_expedited(srcu)
+
+static __inline int init_srcu_struct(struct srcu_struct *sp)
+{
+	NTSTATUS rc = ExInitializeResourceLite(&sp->eres);
+	return !NT_SUCCESS(rc);
+}
+
+static __inline int cleanup_srcu_struct(struct srcu_struct *sp)
+{
+	NTSTATUS rc = ExDeleteResourceLite(&sp->eres);
+	return !NT_SUCCESS(rc);
+}
+
+/*
+ * RCU
+ */
+static __inline __rcu_assign_pointer(void **p, void *v)
+{
+	*p = v;
+	smp_mb();
+}
+
+#define __rcu
+#define rcu_assign_pointer(p, v) __rcu_assign_pointer(&(void *)p, (void *)v)
+#define rcu_read_lock()
+#define rcu_read_unlock()
+
+static __inline void *rcu_dereference_raw(void *p)
+{
+	return p;
+}
+
+#define rcu_dereference(a) rcu_dereference_raw(a)
+#define hlist_first_rcu(head)   (*((struct hlist_node __rcu **)(&(head)->first)))
+#define hlist_next_rcu(node)    (*((struct hlist_node __rcu **)(&(node)->next)))
+#define hlist_pprev_rcu(node)   (*((struct hlist_node __rcu **)((node)->pprev)))
+
+static __inline void hlist_add_head_rcu(struct hlist_node *n,
+	struct hlist_head *h)
+{
+	struct hlist_node *first = h->first;
+
+	n->next = first;
+	n->pprev = &h->first;
+	rcu_assign_pointer(hlist_first_rcu(h), n);
+	if (first)
+		first->pprev = &n->next;
+}
+
+static __inline void hlist_del_rcu(struct hlist_node *n)
+{
+	__hlist_del(n);
+	n->pprev = LIST_POISON2;
+}
+
+#define hlist_for_each_entry_rcu(pos, head, member)         \
+    for (pos = hlist_entry_safe (rcu_dereference_raw(hlist_first_rcu(head)),\
+            typeof(*(pos)), member);            \
+        pos;                            \
+        pos = hlist_entry_safe(rcu_dereference_raw(hlist_next_rcu(\
+            &(pos)->member)), typeof(*(pos)), member))
+
+/*
+ * It is said there is no cpu online/offline for Windows,
+ * so always return true.
+ */
+static bool cpu_online(int cpu)
+{
+	return true;
+}
+
+/*
+ * xsave related functions
+ */
+#define XSTATE_CPUID			0x0000000d
+#define XCR_XFEATURE_ENABLED_MASK	0x00000000
+
+static inline u64 xgetbv(u32 index)
+{
+	return _xgetbv(index);
+}
+
+static inline void xsetbv(u32 index, u64 value)
+{
+	_xsetbv(index, value);
+}
+
+extern NTSTATUS NtKrUtilsInit(void);
+extern void NtKrUtilsExit(void);