summaryrefslogtreecommitdiff
path: root/ntkrutils.h
diff options
context:
space:
mode:
Diffstat (limited to 'ntkrutils.h')
-rw-r--r--ntkrutils.h1269
1 files changed, 1269 insertions, 0 deletions
diff --git a/ntkrutils.h b/ntkrutils.h
new file mode 100644
index 0000000..bea0a30
--- /dev/null
+++ b/ntkrutils.h
@@ -0,0 +1,1269 @@
+/*
+ * Copyright 2019 Google LLC
+
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#pragma once
+#include <ntddk.h>
+#include <intrin.h>
+#include <gvm_types.h>
+#include <string.h>
+#include <dos.h>
+#include <linux/list.h>
+
+// APC definitions (undocumented)
+typedef enum _KAPC_ENVIRONMENT
+{
+ OriginalApcEnvironment,
+ AttachedApcEnvironment,
+ CurrentApcEnvironment,
+ InsertApcEnvironment
+} KAPC_ENVIRONMENT;
+
+typedef
+VOID
+(NTAPI *PKNORMAL_ROUTINE)(
+ _In_ PVOID NormalContext,
+ _In_ PVOID SystemArgument1,
+ _In_ PVOID SystemArgument2
+ );
+
+typedef
+VOID
+(NTAPI *PKKERNEL_ROUTINE)(
+ _In_ PKAPC Apc,
+ _Inout_ PKNORMAL_ROUTINE* NormalRoutine,
+ _Inout_ PVOID* NormalContext,
+ _Inout_ PVOID* SystemArgument1,
+ _Inout_ PVOID* SystemArgument2
+ );
+
+typedef
+VOID
+(NTAPI *PKRUNDOWN_ROUTINE) (
+ _In_ PKAPC Apc
+ );
+
+NTKERNELAPI
+VOID
+NTAPI
+KeInitializeApc(
+ _Out_ PRKAPC Apc,
+ _In_ PETHREAD Thread,
+ _In_ KAPC_ENVIRONMENT Environment,
+ _In_ PKKERNEL_ROUTINE KernelRoutine,
+ _In_opt_ PKRUNDOWN_ROUTINE RundownRoutine,
+ _In_opt_ PKNORMAL_ROUTINE NormalRoutine,
+ _In_opt_ KPROCESSOR_MODE ApcMode,
+ _In_opt_ PVOID NormalContext
+ );
+
+NTKERNELAPI
+BOOLEAN
+NTAPI
+KeInsertQueueApc(
+ _Inout_ PRKAPC Apc,
+ _In_opt_ PVOID SystemArgument1,
+ _In_opt_ PVOID SystemArgument2,
+ _In_ KPRIORITY Increment
+ );
+
+// MSDN recommends the string in reverse order
+#define GVM_POOL_TAG '_MVG'
+
+// cpuid
+static __forceinline void cpuid(unsigned int op,
+ unsigned int *eax,
+ unsigned int *ebx,
+ unsigned int *ecx,
+ unsigned int *edx)
+{
+ int cpuInfo[4];
+ __cpuid(cpuInfo, op);
+ *eax = cpuInfo[0];
+ *ebx = cpuInfo[1];
+ *ecx = cpuInfo[2];
+ *edx = cpuInfo[3];
+}
+
+static __forceinline void cpuid_count(unsigned int op,
+ unsigned int count,
+ unsigned int *eax,
+ unsigned int *ebx,
+ unsigned int *ecx,
+ unsigned int *edx)
+{
+ int cpuInfo[4];
+ __cpuidex(cpuInfo, op, count);
+ *eax = cpuInfo[0];
+ *ebx = cpuInfo[1];
+ *ecx = cpuInfo[2];
+ *edx = cpuInfo[3];
+}
+
+static __inline unsigned int cpuid_eax(unsigned int op)
+{
+ unsigned int eax, ebx, ecx, edx;
+
+ cpuid(op, &eax, &ebx, &ecx, &edx);
+
+ return eax;
+}
+
+static __inline unsigned int cpuid_ebx(unsigned int op)
+{
+ unsigned int eax, ebx, ecx, edx;
+
+ cpuid(op, &eax, &ebx, &ecx, &edx);
+
+ return ebx;
+}
+
+static __inline unsigned int cpuid_ecx(unsigned int op)
+{
+ unsigned int eax, ebx, ecx, edx;
+
+ cpuid(op, &eax, &ebx, &ecx, &edx);
+
+ return ecx;
+}
+
+static __inline unsigned int cpuid_edx(unsigned int op)
+{
+ unsigned int eax, ebx, ecx, edx;
+
+ cpuid(op, &eax, &ebx, &ecx, &edx);
+
+ return edx;
+}
+
+static __forceinline unsigned int x86_family(unsigned int sig)
+{
+ unsigned int x86;
+
+ x86 = (sig >> 8) & 0xf;
+
+ if (x86 == 0xf)
+ x86 += (sig >> 20) & 0xff;
+
+ return x86;
+}
+
+static __forceinline unsigned int x86_cpuid_family(void)
+{
+ return x86_family(cpuid_eax(1));
+}
+
+static __forceinline unsigned int x86_model(unsigned int sig)
+{
+ unsigned int fam, model;
+
+ fam = x86_family(sig);
+
+ model = (sig >> 4) & 0xf;
+
+ if (fam >= 0x6)
+ model += ((sig >> 16) & 0xf) << 4;
+
+ return model;
+}
+
+static __forceinline unsigned int x86_cpuid_model(void)
+{
+ return x86_model(cpuid_eax(1));
+}
+
+static __forceinline unsigned int x86_stepping(unsigned int sig)
+{
+ return sig & 0xf;
+}
+
+/*
+ * cpu_has_vmx
+ */
+static __inline int cpu_has_vmx(void)
+{
+ size_t ecx = cpuid_ecx(1);
+ return test_bit(5, &ecx); /* CPUID.1:ECX.VMX[bit 5] -> VT */
+}
+
+/*
+ * Memory Barriers
+ */
+#define smp_mb() _mm_mfence()
+#define smp_rmb() _mm_lfence()
+#define smp_wmb() _mm_sfence()
+#define mb() _mm_mfence()
+#define rmb() _mm_lfence()
+#define wmb() _mm_sfence()
+#define smp_mb__after_atomic() _mm_mfence();
+
+// smp_processor_id
+static __inline unsigned int raw_smp_processor_id(void)
+{
+ return KeGetCurrentProcessorNumberEx(NULL);
+}
+
+static __inline unsigned int smp_processor_id(void)
+{
+ return raw_smp_processor_id();
+}
+
+/*
+ * cpu_get/put for ensure vmx safety
+ */
+
+struct cpu_getput_cxt {
+ long count;
+ KIRQL irql;
+};
+
+DECLARE_PER_CPU(struct cpu_getput_cxt, cpu_getput_cxt);
+
+static __inline unsigned int get_cpu()
+{
+ KIRQL oldIrql = KeRaiseIrqlToDpcLevel();
+ unsigned int cpu = smp_processor_id();
+ long newcount = InterlockedIncrement(&per_cpu(cpu_getput_cxt, cpu).count);
+
+ if (newcount == 1)
+ per_cpu(cpu_getput_cxt, cpu).irql = oldIrql;
+
+ return cpu;
+}
+
+static __inline void put_cpu()
+{
+ unsigned int cpu = smp_processor_id();
+ long newcount = InterlockedDecrement(&per_cpu(cpu_getput_cxt, cpu).count);
+ BUG_ON(newcount < 0);
+ if (newcount == 0) {
+ KIRQL oldIrql = per_cpu(cpu_getput_cxt, cpu).irql;
+ per_cpu(cpu_getput_cxt, cpu).irql = 0;
+ KeLowerIrql(oldIrql);
+ }
+}
+
+#define preempt_disable() KeRaiseIrqlToDpcLevel()
+#define preempt_enable() KeLowerIrql(PASSIVE_LEVEL)
+
+// msr access
+static _forceinline void wrmsrl(unsigned int msr, u64 val)
+{
+ __writemsr(msr, val);
+}
+
+extern struct cpumask *cpu_online_mask;
+extern unsigned int cpu_online_count;
+
+/*
+ * SpinLock Implementation
+ * Compared with Windows Native Support, this implementation does not raise IRQL to DPC level.
+ * KVM has nasty lock nesting that might work on Linux but not directly on Windows.
+ */
+struct spin_lock {
+ volatile LONG lock;
+};
+
+typedef struct spin_lock spinlock_t;
+typedef struct spin_lock raw_spinlock_t;
+
+#define DEFINE_SPINLOCK(x) spinlock_t x
+#define DECLARE_SPINLOCK(x) extern spinlock_t x
+#define DEFINE_RAW_SPINLOCK(x) spinlock_t x
+#define DECLARE_RAW_SPINLOCK(x) extern spinlock_t x
+
+static __forceinline void spin_lock_init(spinlock_t *lock)
+{
+ lock->lock = 0;
+}
+
+extern __forceinline void __spin_lock(spinlock_t *lock);
+static __forceinline void spin_lock(spinlock_t *lock)
+{
+ __spin_lock(lock);
+}
+
+static __forceinline void spin_unlock(spinlock_t *lock)
+{
+ lock->lock = 0;
+}
+
+static __forceinline void raw_spin_lock_init(spinlock_t *lock)
+{
+ spin_lock_init(lock);
+}
+
+static __forceinline void raw_spin_lock(spinlock_t *lock)
+{
+ spin_lock(lock);
+}
+
+static __forceinline void raw_spin_unlock(spinlock_t *lock)
+{
+ spin_unlock(lock);
+}
+
+/*
+ Mutex Windows Implementation
+ */
+struct mutex
+{
+ FAST_MUTEX mutex;
+};
+typedef struct mutex mutex;
+
+static __forceinline void mutex_init(struct mutex *lock)
+{
+ ExInitializeFastMutex(&lock->mutex);
+}
+
+static __forceinline void mutex_lock(struct mutex *lock)
+{
+ ExAcquireFastMutex(&lock->mutex);
+}
+
+static __forceinline void mutex_unlock(struct mutex *lock)
+{
+ ExReleaseFastMutex(&lock->mutex);
+}
+
+#define __KERNEL_CS 0x10
+#define __KERNEL_DS 0x28
+#define __KERNEL_SS 0x18
+#define __KERNEL_FS 0x53
+
+/*
+ MSR access
+ */
+static __inline void __rdmsr(u32 index, u32 *low, u32 *high)
+{
+ u64 val = __readmsr(index);
+ *low = (u32)val;
+ *high = (u32)(val >> 32);
+}
+
+static __inline int __rdmsr_safe(u32 index, u32 *low, u32 *high)
+{
+ u64 val = 0;
+ __try {
+ val = __readmsr(index);
+ *low = (u32)val;
+ *high = (u32)(val >> 32);
+ } __except(EXCEPTION_EXECUTE_HANDLER) {
+ return -1;
+ }
+ return 0;
+}
+
+static __inline int __rdmsrl_safe(u32 index, u64 *val)
+{
+ __try {
+ *val = __readmsr(index);
+ } __except(EXCEPTION_EXECUTE_HANDLER) {
+ return -1;
+ }
+ return 0;
+}
+
+static __inline u64 native_read_msr_safe(u32 index, int *err)
+{
+ u64 value = 0;
+ *err = __rdmsrl_safe(index, &value);
+ return value;
+}
+
+static __inline int __wrmsr_safe(u32 index, u32 low, u32 high)
+{
+ u64 val = (((u64)high) << 32) | low;
+ __try {
+ __writemsr(index, val);
+ } __except(EXCEPTION_EXECUTE_HANDLER) {
+ return -1;
+ }
+ return 0;
+}
+
+static __inline int __wrmsrl_safe(u32 index, u64 val)
+{
+ __try {
+ __writemsr(index, val);
+ } __except(EXCEPTION_EXECUTE_HANDLER) {
+ return -1;
+ }
+ return 0;
+}
+
+static __inline int native_write_msr_safe(u32 index, u32 low, u32 high)
+{
+ return __wrmsr_safe(index, low, high);
+}
+
+#define rdmsr(a, b, c) __rdmsr(a, &b, &c)
+#define rdmsr_safe(a, b, c) __rdmsr_safe(a, b, c)
+#define rdmsrl(a, b) b=__readmsr(a)
+#define rdmsrl_safe(a, b) __rdmsrl_safe(a, b)
+
+#define wrmsr(a,b) __writemsr(a,b)
+#define wrmsrl(a,b) __writemsr(a,b)
+#define wrmsr_safe(a, b, c) __wrmsr_safe(a, b, c)
+#define wrmsrl_safe(a,b) __wrmsrl_safe(a,b)
+
+/*
+ Local Irq Disable
+ */
+static __forceinline void local_irq_disable(void)
+{
+ _disable();
+}
+
+static __forceinline void local_irq_enable(void)
+{
+ _enable();
+}
+
+/*
+ Timer Stuffs
+ */
+
+#define MSEC_PER_SEC 1000L
+#define USEC_PER_MSEC 1000L
+#define NSEC_PER_USEC 1000L
+#define NSEC_PER_MSEC 1000000L
+#define USEC_PER_SEC 1000000L
+#define NSEC_PER_SEC 1000000000L
+#define FSEC_PER_SEC 1000000000000000LL
+
+union ktime
+{
+ s64 tv64;
+ struct {
+ s32 nsec, sec;
+ } tv;
+};
+
+typedef union ktime ktime_t;
+
+#define KTIME_MAX ((s64)~((u64)1 << 63))
+#define KTIME_SEC_MAX LONG_MAX
+
+#pragma warning(disable : 4204)
+static __forceinline ktime_t ktime_set(const long secs, const size_t nsecs)
+{
+#if 0
+ if (unlikely(secs >= KTIME_SEC_MAX))
+ return (ktime_t){ .tv64 = KTIME_MAX };
+#endif
+ return (ktime_t) { .tv64 = (s64)secs * NSEC_PER_SEC + (s64)nsecs };
+}
+
+/* Subtract two ktime_t variables. rem = lhs -rhs: */
+#define ktime_sub(lhs, rhs) \
+ (ktime_t){ .tv64 = (lhs).tv64 - (rhs).tv64 }
+
+/* Add two ktime_t variables. res = lhs + rhs: */
+#define ktime_add(lhs, rhs) \
+ (ktime_t){ .tv64 = (lhs).tv64 + (rhs).tv64 }
+
+/*
+ * Add a ktime_t variable and a scalar nanosecond value.
+ * res = kt + nsval:
+ */
+#define ktime_add_ns(kt, nsval) \
+ (ktime_t){ .tv64 = (kt).tv64 + (nsval) }
+
+/*
+ * Subtract a scalar nanosecod from a ktime_t variable
+ * res = kt - nsval:
+ */
+#define ktime_sub_ns(kt, nsval) \
+ (ktime_t){ .tv64 = (kt).tv64 - (nsval) }
+
+
+/* Map the ktime_t to timespec conversion to ns_to_timespec function */
+#define ktime_to_timespec(kt) ns_to_timespec((kt).tv64)
+
+/* Map the ktime_t to timeval conversion to ns_to_timeval function */
+#define ktime_to_timeval(kt) ns_to_timeval((kt).tv64)
+
+/* Convert ktime_t to nanoseconds - NOP in the scalar storage format: */
+#define ktime_to_ns(kt) ((kt).tv64)
+
+static __forceinline int ktime_equal(const ktime_t cmp1, const ktime_t cmp2)
+{
+ return cmp1.tv64 == cmp2.tv64;
+}
+
+/**
+ * ktime_compare - Compares two ktime_t variables for less, greater or equal
+ * @cmp1: comparable1
+ * @cmp2: comparable2
+ *
+ * Returns ...
+ * cmp1 < cmp2: return <0
+ * cmp1 == cmp2: return 0
+ * cmp1 > cmp2: return >0
+ */
+static __forceinline int ktime_compare(const ktime_t cmp1, const ktime_t cmp2)
+{
+ if (cmp1.tv64 < cmp2.tv64)
+ return -1;
+ if (cmp1.tv64 > cmp2.tv64)
+ return 1;
+ return 0;
+}
+
+static __forceinline ktime_t ktime_add_us(const ktime_t kt, const u64 usec)
+{
+ return ktime_add_ns(kt, usec * 1000);
+}
+
+static __forceinline ktime_t ktime_sub_us(const ktime_t kt, const u64 usec)
+{
+ return ktime_sub_ns(kt, usec * 1000);
+}
+
+static __forceinline ktime_t ns_to_ktime(u64 ns)
+{
+ static const ktime_t ktime_zero = { .tv64 = 0 };
+ return ktime_add_ns(ktime_zero, ns);
+}
+
+static __forceinline ktime_t ktime_get(void)
+{
+ s64 nsecs = 0;
+ LARGE_INTEGER time;
+ KeQuerySystemTime(&time);
+ nsecs = time.QuadPart;
+ nsecs *= 100;
+
+ return (ktime_t){.tv64 = nsecs};
+}
+typedef size_t clockid_t;
+#define CLOCK_REALTIME 0
+#define CLOCK_MONOTONIC 1
+#define CLOCK_PROCESS_CPUTIME_ID 2
+#define CLOCK_THREAD_CPUTIME_ID 3
+#define CLOCK_MONOTONIC_RAW 4
+#define CLOCK_REALTIME_COARSE 5
+#define CLOCK_MONOTONIC_COARSE 6
+#define CLOCK_BOOTTIME 7
+#define CLOCK_REALTIME_ALARM 8
+#define CLOCK_BOOTTIME_ALARM 9
+
+enum hrtimer_mode
+{
+ HRTIMER_MODE_ABS = 0x0, /* Time value is absolute */
+ HRTIMER_MODE_REL = 0x1, /* Time value is relative to now */
+ HRTIMER_MODE_PINNED = 0x02, /* Timer is bound to CPU */
+ HRTIMER_MODE_ABS_PINNED = 0x02,
+ HRTIMER_MODE_REL_PINNED = 0x03,
+};
+
+enum hrtimer_restart
+{
+ HRTIMER_NORESTART, /* Timer is not restarted */
+ HRTIMER_RESTART, /* Timer must be restarted */
+};
+
+struct timerqueue_node
+{
+ ktime_t expires;
+};
+
+struct hrtimer_clock_base
+{
+ int index;
+ ktime_t resolution;
+ ktime_t (*get_time)(void);
+ ktime_t softirq_time;
+ ktime_t offset;
+};
+
+struct hrtimer
+{
+ struct timerqueue_node node;
+ ktime_t _softexpires;
+ enum hrtimer_restart (*function)(struct hrtimer *);
+ struct hrtimer_clock_base *base;
+ size_t state;
+ KTIMER ktimer;
+ KDPC kdpc;
+ LARGE_INTEGER due_time;
+ struct hrtimer_clock_base base_hack;
+};
+
+void hrtimer_init(struct hrtimer *timer, clockid_t clock_id, enum hrtimer_mode mode);
+int hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode);
+int hrtimer_cancel(struct hrtimer *timer);
+int hrtimer_restart(struct hrtimer* timer);
+
+static __forceinline void hrtimer_add_expires_ns(struct hrtimer *timer, u64 delta)
+{
+ timer->node.expires = ktime_add_ns(timer->node.expires, delta);
+}
+
+static __forceinline ktime_t hrtimer_get_expires(struct hrtimer *timer)
+{
+ return timer->node.expires;
+}
+
+static __forceinline u64 hrtimer_get_expires_ns(struct hrtimer *timer)
+{
+ return ktime_to_ns(timer->node.expires);
+}
+
+static __forceinline void hrtimer_start_expires(struct hrtimer *timer, int mode)
+{
+ hrtimer_start(timer, timer->node.expires, mode);
+}
+
+static __forceinline ktime_t hrtimer_expires_remaining(const struct hrtimer *timer)
+{
+ return ktime_sub(timer->node.expires, timer->base->get_time());
+}
+
+static __forceinline ktime_t hrtimer_get_remaining(const struct hrtimer *timer)
+{
+ ktime_t rem;
+ rem = hrtimer_expires_remaining(timer);
+ return rem;
+}
+
+/*
+ Memory Management Stuffs
+ */
+
+#define BIT(nr) ((size_t)(1) << (nr))
+#define GFP_KERNEL BIT(0)
+#define GFP_ATOMIC BIT(1)
+#define __GFP_ZERO BIT(3)
+#define GFP_UNALLOC BIT(5)
+
+ /*
+ * Address types:
+ *
+ * gva - guest virtual address
+ * gpa - guest physical address
+ * gfn - guest frame number
+ * hva - host virtual address
+ * hpa - host physical address
+ * hfn - host frame number
+ */
+
+typedef size_t gva_t;
+typedef u64 gpa_t;
+typedef u64 gfn_t;
+typedef u64 phys_addr_t;
+
+typedef size_t hva_t;
+typedef u64 hpa_t;
+typedef u64 hfn_t;
+
+typedef hfn_t pfn_t;
+
+typedef struct page
+{
+ void* hva;
+ void* kmap_hva;
+ size_t __private;
+ hpa_t hpa;
+ pfn_t pfn;
+ size_t gfp_mask;
+ PEPROCESS proc;
+}page;
+
+extern u64 max_pagen;
+extern struct page** pglist;
+DECLARE_RAW_SPINLOCK(global_page_lock);
+
+#define page_private(page) ((page)->__private)
+#define set_page_private(page, v) ((page)->__private = (v))
+
+#define __free_page(page) __free_pages((page), 0)
+#define free_page(addr) free_pages((addr), 0)
+
+#define clear_page(page) memset((page), 0, PAGE_SIZE)
+
+#define virt_to_page(kaddr) pfn_to_page((__pa(kaddr) >> PAGE_SHIFT))
+
+
+static __inline void *kmalloc(size_t size, size_t flags)
+{
+ void* ret = NULL;
+ int zero = 0;
+
+ if (flags & __GFP_ZERO)
+ zero = 1;
+
+ ret = ExAllocatePoolWithTag(NonPagedPool, size, GVM_POOL_TAG);
+
+ if(ret && zero)
+ {
+ memset(ret, 0, size);
+ }
+ return ret;
+}
+
+static __inline void *kzalloc(size_t size, size_t flags)
+{
+ return kmalloc(size, flags | __GFP_ZERO);
+}
+
+static __inline void kfree(void* hva)
+{
+ if (!hva)
+ return;
+ ExFreePoolWithTag(hva, GVM_POOL_TAG);
+}
+
+static __inline void *vmalloc(size_t size)
+{
+ return ExAllocatePoolWithTag(NonPagedPool, size, GVM_POOL_TAG);
+}
+
+static __inline void vfree(void* hva)
+{
+ if (!hva)
+ return;
+ ExFreePoolWithTag(hva, GVM_POOL_TAG);
+}
+
+static __inline void *vzalloc(size_t size)
+{
+ void *addr = vmalloc(size);
+ if (addr)
+ {
+ memset(addr, 0, size);
+ }
+ return addr;
+}
+
+static __inline void *kmalloc_fast(size_t size, size_t flags)
+{
+ return kmalloc(size, flags);
+}
+
+static __inline void *kzalloc_fast(size_t size, size_t flags)
+{
+ return kmalloc_fast(size, flags | __GFP_ZERO);
+}
+
+static __inline void kfree_fast(void* hva)
+{
+ if (!hva)
+ return;
+ ExFreePoolWithTag(hva, GVM_POOL_TAG);
+}
+
+#define kvfree kfree_fast
+
+#define VERIFY_READ 0
+#define VERIFY_WRITE 1
+
+static __inline pfn_t page_to_pfn(struct page* page)
+{
+ return page->pfn;
+}
+
+static __inline void* page_to_hva(struct page* page)
+{
+ return page->hva;
+}
+
+static __inline hpa_t page_to_phys(struct page* page)
+{
+ return page->hpa;
+}
+
+static __inline hpa_t mdl_to_phys(PMDL mdl)
+{
+ return (hpa_t)MmGetPhysicalAddress(mdl->StartVa).QuadPart;
+}
+
+static __inline struct page* pfn_to_page(pfn_t pfn)
+{
+ return pglist[pfn];
+}
+
+static __inline hpa_t __pa(void* va)
+{
+ PHYSICAL_ADDRESS addr_phys;
+ addr_phys = MmGetPhysicalAddress(va);
+ return (hpa_t)(addr_phys.QuadPart);
+}
+
+static __inline void* __va(hpa_t pa)
+{
+ void* ret = 0;
+ ret = page_to_hva(pfn_to_page(pa >> PAGE_SHIFT));
+ if(!ret)
+ {
+ printk("vmmr0: __va: invalid hpa %p\n", pa);
+ }
+ return ret;
+}
+
+static __inline struct page *alloc_page(unsigned int gfp_mask)
+{
+ void* page_hva = NULL;
+ PHYSICAL_ADDRESS pageaddr_phys;
+ int zero = 0;
+ struct page* page = ExAllocatePoolWithTag(NonPagedPool,
+ sizeof(*page),
+ GVM_POOL_TAG);
+ if(!page)
+ goto out_error;
+
+ page_hva = ExAllocatePoolWithTag(NonPagedPool, PAGE_SIZE, GVM_POOL_TAG);
+ if(!page_hva)
+ goto out_error_free;
+
+ if (gfp_mask & __GFP_ZERO)
+ zero = 0;
+
+ ASSERT(!((size_t)page_hva & 0xfffull));
+
+ if(zero)
+ memset(page_hva, 0, PAGE_SIZE);
+
+ pageaddr_phys = MmGetPhysicalAddress(page_hva);
+ page->hpa = pageaddr_phys.QuadPart;
+ page->pfn = page->hpa >> PAGE_SHIFT;
+ page->hva = page_hva;
+ page->gfp_mask = gfp_mask;
+ page->proc = IoGetCurrentProcess();
+ raw_spin_lock(&global_page_lock);
+ pglist[page->pfn] = page;
+ raw_spin_unlock(&global_page_lock);
+ return page;
+
+ out_error_free:
+ ExFreePoolWithTag(page, GVM_POOL_TAG);
+ out_error:
+ return 0;
+}
+
+static __inline void __free_pages(struct page* page, unsigned int order)
+{
+ ExFreePoolWithTag(page->hva, GVM_POOL_TAG);
+
+ raw_spin_lock(&global_page_lock);
+ pglist[page->pfn] = 0;
+ raw_spin_unlock(&global_page_lock);
+
+ ExFreePoolWithTag(page, GVM_POOL_TAG);
+}
+
+static __inline void free_pages(size_t addr, unsigned int order)
+{
+ if (addr != 0)
+ {
+ __free_pages(virt_to_page((void *)addr), order);
+ }
+}
+
+static __inline void* kmap(PMDL mdl)
+{
+
+ if (!mdl)
+ return NULL;
+
+ return MmGetSystemAddressForMdlSafe(mdl, NormalPagePriority);
+}
+
+static __inline void kunmap(PMDL mdl)
+{
+}
+
+static __inline void* page_address(struct page* page)
+{
+ BUG_ON(!page->hva);
+ return page->hva;
+}
+
+static __inline void* get_zeroed_page(unsigned int gfp_mask)
+{
+ struct page* page = alloc_page(gfp_mask);
+ memset(page->hva, 0, PAGE_SIZE);
+ return page->hva;
+}
+
+static __inline size_t __get_free_page(unsigned int gfp_mask)
+{
+ struct page *page;
+ page = alloc_page(gfp_mask);
+ if (!page)
+ return 0;
+ return (size_t) page_address(page);
+}
+
+static __inline int get_user_pages_fast(size_t start, int nr_pages, int write,
+ PMDL *mdl)
+{
+ PMDL _mdl;
+
+ start &= PAGE_MASK;
+ _mdl = IoAllocateMdl((void *)start, nr_pages * PAGE_SIZE,
+ FALSE, FALSE, NULL);
+ if (!_mdl)
+ return 0;
+
+ MmProbeAndLockPages(_mdl, KernelMode, IoWriteAccess);
+ *mdl = _mdl;
+
+ return nr_pages;
+}
+
+static __inline void kvm_release_page(PMDL mdl)
+{
+ if (!mdl)
+ return;
+
+ MmUnlockPages(mdl);
+ IoFreeMdl(mdl);
+}
+
+/* We actually did not copy from *user* here. This function in kvm is used to
+ * ioctl parameters. On Windows, we always use buffered io for device control.
+ * Thus the address supplied to copy_from_user is address in kernel space.
+ * Simple keep the function name here.
+ * __copy_from/to_user is really copying from user space.
+ */
+static __inline size_t copy_from_user(void *dst, const void *src, size_t size)
+{
+ memcpy(dst, src, size);
+ return 0;
+}
+
+static __inline size_t __copy_user(void *dst, const void *src, size_t size,
+ int from)
+{
+ PMDL lock_mdl;
+ HANDLE handle;
+
+ lock_mdl = IoAllocateMdl(from? src : dst, size, FALSE, FALSE, NULL);
+ if (!lock_mdl)
+ return size;
+ MmProbeAndLockPages(lock_mdl, UserMode, IoWriteAccess);
+ handle = MmSecureVirtualMemory(from? src : dst, size, PAGE_READWRITE);
+ if (!handle)
+ return size;
+ memcpy(dst, src, size);
+ MmUnsecureVirtualMemory(handle);
+ MmUnlockPages(lock_mdl);
+ IoFreeMdl(lock_mdl);
+ return 0;
+}
+
+static __inline size_t __copy_to_user(void *dst, const void *src, size_t size)
+{
+ return __copy_user(dst, src, size, 0);
+}
+
+static __inline size_t __copy_from_user(void *dst, const void *src, size_t size)
+{
+ return __copy_user(dst, src, size, 1);
+}
+
+static __inline void *kmap_atomic(PMDL mdl)
+{
+ return kmap(mdl);
+}
+
+static __inline void kunmap_atomic(PMDL mdl)
+{
+ kunmap(mdl);
+}
+
+static __inline void *memdup_user(const void *user, size_t size)
+{
+ void *buf = kzalloc(size, GFP_KERNEL);
+
+ if (!buf)
+ return ERR_PTR(-ENOMEM);
+ if (copy_from_user(buf, user, size))
+ return ERR_PTR(-EFAULT);
+ return buf;
+}
+
+/*
+ TSC
+ */
+static __forceinline u64 rdtsc(void)
+{
+ return __rdtsc();
+}
+
+static __forceinline int check_tsc_unstable(void)
+{
+ return 0;
+}
+
+static __forceinline int mark_tsc_unstable(void)
+{
+ return 0;
+}
+
+
+/*
+ File
+ */
+struct file {
+ void *private_data;
+};
+
+/*
+Atomic Operations
+*/
+typedef long atomic_t;
+#define ATOMIC_INIT(n) (n)
+static __forceinline void atomic_inc(atomic_t *v)
+{
+ InterlockedIncrement(v);
+}
+
+static __forceinline void atomic_dec(atomic_t *v)
+{
+ InterlockedDecrement(v);
+}
+
+static __forceinline int atomic_dec_and_test(atomic_t *v)
+{
+ return !InterlockedDecrement(v);
+}
+
+static __forceinline int atomic_xchg(atomic_t *v, int val)
+{
+ return InterlockedExchange(v, val);
+}
+
+extern u8 xchg8(u8 *a, u8 b);
+extern u16 xchg16(u16 *a, u16 b);
+#define xchg32(a, b) InterlockedExchange((LONG *)a, b)
+#define xchg64(a, b) InterlockedExchange64((LONG64 *)a, b)
+extern u8 cmpxchg8(u8 *a, u8 b, u8 c);
+extern u16 cmpxchg16(u16 *a, u16 b, u16 c);
+#define cmpxchg32(a, b, c) InterlockedCompareExchange((LONG *)a, c, b)
+#define cmpxchg64(a, b, c) InterlockedCompareExchange64((LONG64 *)a, c, b)
+
+#define xchg(a, b) ((sizeof(*a) == 8)? xchg64((u64 *)a, b) : \
+ ((sizeof(*a) == 4)? xchg32((u32 *)a, b) : \
+ ((sizeof(*a) == 2)? xchg16((u16 *)a, b) : \
+ ((sizeof(*a) == 1)? xchg8((u8 *)a, b) : 0))))
+#define cmpxchg(a, b, c) ((sizeof(*a) == 8)? cmpxchg64((u64 *)a, b, c) : \
+ ((sizeof(*a) == 4)? cmpxchg32((u32 *)a, b, c) : \
+ ((sizeof(*a) == 2)? cmpxchg16((u16 *)a, b, c) : \
+ ((sizeof(*a) == 1)? cmpxchg8((u8 *)a, b, c) : 0))))
+
+#define atomic_cmpxchg(a, b, c) cmpxchg(a, b, c)
+
+static __forceinline int atomic_dec_if_positive(atomic_t *v)
+{
+ int c, old, dec;
+ c = atomic_read(v);
+
+ for (;;) {
+ dec = c - 1;
+ if (unlikely(dec < 0))
+ break;
+ old = atomic_cmpxchg((v), c, dec);
+ if (likely(old == c))
+ break;
+ c = old;
+ }
+ return dec;
+}
+
+#define smp_store_mb(var, value) do { (void)xchg(&var, value); } while (0)
+#define smp_store_release(p, v) \
+do { \
+ smp_mb(); \
+ *p = v; \
+} while (0)
+
+
+/*
+ cpumask
+ */
+static __inline bool zalloc_cpumask_var(cpumask_var_t *mask, int flags)
+{
+ *mask = NULL;
+ *mask = kmalloc(sizeof(cpumask_t), flags | __GFP_ZERO);
+ return !!(*mask);
+}
+static __inline void free_cpumask_var(cpumask_var_t mask)
+{
+ kfree(mask);
+}
+
+/*
+ vm_mmap/unmap
+ */
+#define PROT_READ 0x1 /* page can be read */
+#define PROT_WRITE 0x2 /* page can be written */
+#define PROT_EXEC 0x4 /* page can be executed */
+#define PROT_SEM 0x8 /* page may be used for atomic ops */
+#define PROT_NONE 0x0 /* page can not be accessed */
+#define PROT_GROWSDOWN 0x01000000 /* mprotect flag: extend change to start of growsdown vma */
+#define PROT_GROWSUP 0x02000000 /* mprotect flag: extend change to end of growsup vma */
+
+#define MAP_SHARED 0x01 /* Share changes */
+#define MAP_PRIVATE 0x02 /* Changes are private */
+#define MAP_TYPE 0x0f /* Mask for type of mapping */
+#define MAP_FIXED 0x10 /* Interpret addr exactly */
+#define MAP_ANONYMOUS 0x20 /* don't use a file */
+#define MAP_UNINITIALIZED 0x0 /* Don't support this flag */
+
+typedef struct gvm_mmap_node
+{
+ PMDL pMDL;
+ PVOID pMem;
+ PVOID UserVA;
+ struct list_head list;
+}gvm_mmap_node;
+
+extern struct list_head gvm_mmap_list;
+
+extern size_t vm_mmap(struct file *file, size_t addr,
+ size_t len, size_t prot, size_t flag, size_t offset);
+extern size_t __vm_mmap(struct file *file, size_t addr,
+ size_t len, size_t prot, size_t flag, size_t offset, size_t keva);
+extern int vm_munmap(size_t start, size_t len);
+extern int __vm_munmap(size_t start, size_t len, bool freepage);
+
+/*
+ smp_call_function
+ */
+extern int smp_call_function_single(int cpu, void(*func)(void *info), void *info, int wait);
+extern int smp_call_function_many(cpumask_var_t mask, void(*func) (void *info), void *info, int wait);
+extern void smp_send_reschedule(int cpu);
+
+/*
+ * srcu tranlation to windows ERESOURCE
+ */
+struct srcu_struct {
+ ERESOURCE eres;
+};
+
+static __inline int srcu_read_lock(struct srcu_struct *sp)
+{
+ ExAcquireResourceSharedLite(&sp->eres, true);
+ return 0;
+}
+
+static __inline void __srcu_read_unlock(struct srcu_struct *sp)
+{
+ ExReleaseResourceLite(&sp->eres);
+}
+#define srcu_read_unlock(sp, idx) __srcu_read_unlock(sp)
+
+static __inline void *srcu_dereference(void *p, struct srcu_struct *sp)
+{
+ return p;
+}
+
+static __inline void synchronize_srcu_expedited(struct srcu_struct *sp)
+{
+ ExAcquireResourceExclusiveLite(&sp->eres, true);
+ ExReleaseResourceLite(&sp->eres);
+}
+
+#define synchronize_srcu(srcu) synchronize_srcu_expedited(srcu)
+
+static __inline int init_srcu_struct(struct srcu_struct *sp)
+{
+ NTSTATUS rc = ExInitializeResourceLite(&sp->eres);
+ return !NT_SUCCESS(rc);
+}
+
+static __inline int cleanup_srcu_struct(struct srcu_struct *sp)
+{
+ NTSTATUS rc = ExDeleteResourceLite(&sp->eres);
+ return !NT_SUCCESS(rc);
+}
+
+/*
+ * RCU
+ */
+static __inline __rcu_assign_pointer(void **p, void *v)
+{
+ *p = v;
+ smp_mb();
+}
+
+#define __rcu
+#define rcu_assign_pointer(p, v) __rcu_assign_pointer(&(void *)p, (void *)v)
+#define rcu_read_lock()
+#define rcu_read_unlock()
+
+static __inline void *rcu_dereference_raw(void *p)
+{
+ return p;
+}
+
+#define rcu_dereference(a) rcu_dereference_raw(a)
+#define hlist_first_rcu(head) (*((struct hlist_node __rcu **)(&(head)->first)))
+#define hlist_next_rcu(node) (*((struct hlist_node __rcu **)(&(node)->next)))
+#define hlist_pprev_rcu(node) (*((struct hlist_node __rcu **)((node)->pprev)))
+
+static __inline void hlist_add_head_rcu(struct hlist_node *n,
+ struct hlist_head *h)
+{
+ struct hlist_node *first = h->first;
+
+ n->next = first;
+ n->pprev = &h->first;
+ rcu_assign_pointer(hlist_first_rcu(h), n);
+ if (first)
+ first->pprev = &n->next;
+}
+
+static __inline void hlist_del_rcu(struct hlist_node *n)
+{
+ __hlist_del(n);
+ n->pprev = LIST_POISON2;
+}
+
+#define hlist_for_each_entry_rcu(pos, head, member) \
+ for (pos = hlist_entry_safe (rcu_dereference_raw(hlist_first_rcu(head)),\
+ typeof(*(pos)), member); \
+ pos; \
+ pos = hlist_entry_safe(rcu_dereference_raw(hlist_next_rcu(\
+ &(pos)->member)), typeof(*(pos)), member))
+
+/*
+ * It is said there is no cpu online/offline for Windows,
+ * so always return true.
+ */
+static bool cpu_online(int cpu)
+{
+ return true;
+}
+
+/*
+ * xsave related functions
+ */
+#define XSTATE_CPUID 0x0000000d
+#define XCR_XFEATURE_ENABLED_MASK 0x00000000
+
+static inline u64 xgetbv(u32 index)
+{
+ return _xgetbv(index);
+}
+
+static inline void xsetbv(u32 index, u64 value)
+{
+ _xsetbv(index, value);
+}
+
+extern NTSTATUS NtKrUtilsInit(void);
+extern void NtKrUtilsExit(void);