summaryrefslogtreecommitdiff
path: root/arch/x86/kvm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kvm')
-rw-r--r--arch/x86/kvm/Kconfig106
-rw-r--r--arch/x86/kvm/Makefile25
-rw-r--r--arch/x86/kvm/assigned-dev.c1058
-rw-r--r--arch/x86/kvm/assigned-dev.h32
-rwxr-xr-x[-rw-r--r--]arch/x86/kvm/cpuid.c225
-rwxr-xr-x[-rw-r--r--]arch/x86/kvm/cpuid.h64
-rw-r--r--arch/x86/kvm/debugfs.c69
-rwxr-xr-x[-rw-r--r--]arch/x86/kvm/emulate.c732
-rw-r--r--arch/x86/kvm/hyperv.c1266
-rw-r--r--arch/x86/kvm/hyperv.h90
-rw-r--r--arch/x86/kvm/i8254.c738
-rw-r--r--arch/x86/kvm/i8254.h66
-rwxr-xr-x[-rw-r--r--]arch/x86/kvm/i8259.c33
-rwxr-xr-x[-rw-r--r--]arch/x86/kvm/ioapic.c90
-rwxr-xr-x[-rw-r--r--]arch/x86/kvm/ioapic.h34
-rw-r--r--arch/x86/kvm/iommu.c356
-rwxr-xr-x[-rw-r--r--]arch/x86/kvm/irq.c26
-rwxr-xr-x[-rw-r--r--]arch/x86/kvm/irq.h17
-rwxr-xr-x[-rw-r--r--]arch/x86/kvm/irq_comm.c126
-rwxr-xr-x[-rw-r--r--]arch/x86/kvm/kvm_cache_regs.h51
-rwxr-xr-x[-rw-r--r--]arch/x86/kvm/lapic.c613
-rwxr-xr-x[-rw-r--r--]arch/x86/kvm/lapic.h81
-rwxr-xr-x[-rw-r--r--]arch/x86/kvm/mmu.c868
-rwxr-xr-x[-rw-r--r--]arch/x86/kvm/mmu.h39
-rwxr-xr-x[-rw-r--r--]arch/x86/kvm/mmu_audit.c5
-rw-r--r--arch/x86/kvm/mmutrace.h333
-rwxr-xr-x[-rw-r--r--]arch/x86/kvm/mtrr.c52
-rwxr-xr-x[-rw-r--r--]arch/x86/kvm/page_track.c33
-rwxr-xr-x[-rw-r--r--]arch/x86/kvm/paging_tmpl.h87
-rwxr-xr-x[-rw-r--r--]arch/x86/kvm/pmu.c26
-rwxr-xr-x[-rw-r--r--]arch/x86/kvm/pmu.h4
-rwxr-xr-x[-rw-r--r--]arch/x86/kvm/pmu_amd.c2
-rwxr-xr-x[-rw-r--r--]arch/x86/kvm/pmu_intel.c7
-rwxr-xr-x[-rw-r--r--]arch/x86/kvm/svm.c1213
-rwxr-xr-xarch/x86/kvm/svm_def.h176
-rw-r--r--arch/x86/kvm/trace.h1374
-rwxr-xr-x[-rw-r--r--]arch/x86/kvm/tss.h0
-rwxr-xr-x[-rw-r--r--]arch/x86/kvm/vmx.c4438
-rwxr-xr-xarch/x86/kvm/vmx_def.h425
-rwxr-xr-x[-rw-r--r--]arch/x86/kvm/x86.c4183
-rwxr-xr-x[-rw-r--r--]arch/x86/kvm/x86.h37
41 files changed, 4009 insertions, 15191 deletions
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
deleted file mode 100644
index ab8e32f..0000000
--- a/arch/x86/kvm/Kconfig
+++ /dev/null
@@ -1,106 +0,0 @@
-#
-# KVM configuration
-#
-
-source "virt/kvm/Kconfig"
-
-menuconfig VIRTUALIZATION
- bool "Virtualization"
- depends on HAVE_KVM || X86
- default y
- ---help---
- Say Y here to get to see options for using your Linux host to run other
- operating systems inside virtual machines (guests).
- This option alone does not add any kernel code.
-
- If you say N, all options in this submenu will be skipped and disabled.
-
-if VIRTUALIZATION
-
-config KVM
- tristate "Kernel-based Virtual Machine (KVM) support"
- depends on HAVE_KVM
- depends on HIGH_RES_TIMERS
- # for TASKSTATS/TASK_DELAY_ACCT:
- depends on NET
- select PREEMPT_NOTIFIERS
- select MMU_NOTIFIER
- select ANON_INODES
- select HAVE_KVM_IRQCHIP
- select HAVE_KVM_IRQFD
- select IRQ_BYPASS_MANAGER
- select HAVE_KVM_IRQ_BYPASS
- select HAVE_KVM_IRQ_ROUTING
- select HAVE_KVM_EVENTFD
- select KVM_ASYNC_PF
- select USER_RETURN_NOTIFIER
- select KVM_MMIO
- select TASKSTATS
- select TASK_DELAY_ACCT
- select PERF_EVENTS
- select HAVE_KVM_MSI
- select HAVE_KVM_CPU_RELAX_INTERCEPT
- select KVM_GENERIC_DIRTYLOG_READ_PROTECT
- select KVM_VFIO
- select SRCU
- ---help---
- Support hosting fully virtualized guest machines using hardware
- virtualization extensions. You will need a fairly recent
- processor equipped with virtualization extensions. You will also
- need to select one or more of the processor modules below.
-
- This module provides access to the hardware capabilities through
- a character device node named /dev/kvm.
-
- To compile this as a module, choose M here: the module
- will be called kvm.
-
- If unsure, say N.
-
-config KVM_INTEL
- tristate "KVM for Intel processors support"
- depends on KVM
- # for perf_guest_get_msrs():
- depends on CPU_SUP_INTEL
- ---help---
- Provides support for KVM on Intel processors equipped with the VT
- extensions.
-
- To compile this as a module, choose M here: the module
- will be called kvm-intel.
-
-config KVM_AMD
- tristate "KVM for AMD processors support"
- depends on KVM
- ---help---
- Provides support for KVM on AMD processors equipped with the AMD-V
- (SVM) extensions.
-
- To compile this as a module, choose M here: the module
- will be called kvm-amd.
-
-config KVM_MMU_AUDIT
- bool "Audit KVM MMU"
- depends on KVM && TRACEPOINTS
- ---help---
- This option adds a R/W kVM module parameter 'mmu_audit', which allows
- auditing of KVM MMU events at runtime.
-
-config KVM_DEVICE_ASSIGNMENT
- bool "KVM legacy PCI device assignment support (DEPRECATED)"
- depends on KVM && PCI && IOMMU_API
- default n
- ---help---
- Provide support for legacy PCI device assignment through KVM. The
- kernel now also supports a full featured userspace device driver
- framework through VFIO, which supersedes this support and provides
- better security.
-
- If unsure, say N.
-
-# OK, it's a little counter-intuitive to do this, but it puts it neatly under
-# the virtualization menu.
-source drivers/vhost/Kconfig
-source drivers/lguest/Kconfig
-
-endif # VIRTUALIZATION
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
deleted file mode 100644
index 3bff207..0000000
--- a/arch/x86/kvm/Makefile
+++ /dev/null
@@ -1,25 +0,0 @@
-
-ccflags-y += -Iarch/x86/kvm
-
-CFLAGS_x86.o := -I.
-CFLAGS_svm.o := -I.
-CFLAGS_vmx.o := -I.
-
-KVM := ../../../virt/kvm
-
-kvm-y += $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o \
- $(KVM)/eventfd.o $(KVM)/irqchip.o $(KVM)/vfio.o
-kvm-$(CONFIG_KVM_ASYNC_PF) += $(KVM)/async_pf.o
-
-kvm-y += x86.o mmu.o emulate.o i8259.o irq.o lapic.o \
- i8254.o ioapic.o irq_comm.o cpuid.o pmu.o mtrr.o \
- hyperv.o page_track.o debugfs.o
-
-kvm-$(CONFIG_KVM_DEVICE_ASSIGNMENT) += assigned-dev.o iommu.o
-
-kvm-intel-y += vmx.o pmu_intel.o
-kvm-amd-y += svm.o pmu_amd.o
-
-obj-$(CONFIG_KVM) += kvm.o
-obj-$(CONFIG_KVM_INTEL) += kvm-intel.o
-obj-$(CONFIG_KVM_AMD) += kvm-amd.o
diff --git a/arch/x86/kvm/assigned-dev.c b/arch/x86/kvm/assigned-dev.c
deleted file mode 100644
index 308b859..0000000
--- a/arch/x86/kvm/assigned-dev.c
+++ /dev/null
@@ -1,1058 +0,0 @@
-/*
- * Kernel-based Virtual Machine - device assignment support
- *
- * Copyright (C) 2010 Red Hat, Inc. and/or its affiliates.
- *
- * This work is licensed under the terms of the GNU GPL, version 2. See
- * the COPYING file in the top-level directory.
- *
- */
-
-#include <linux/kvm_host.h>
-#include <linux/kvm.h>
-#include <linux/uaccess.h>
-#include <linux/vmalloc.h>
-#include <linux/errno.h>
-#include <linux/spinlock.h>
-#include <linux/pci.h>
-#include <linux/interrupt.h>
-#include <linux/slab.h>
-#include <linux/namei.h>
-#include <linux/fs.h>
-#include "irq.h"
-#include "assigned-dev.h"
-#include "trace/events/kvm.h"
-
-struct kvm_assigned_dev_kernel {
- struct kvm_irq_ack_notifier ack_notifier;
- struct list_head list;
- int assigned_dev_id;
- int host_segnr;
- int host_busnr;
- int host_devfn;
- unsigned int entries_nr;
- int host_irq;
- bool host_irq_disabled;
- bool pci_2_3;
- struct msix_entry *host_msix_entries;
- int guest_irq;
- struct msix_entry *guest_msix_entries;
- unsigned long irq_requested_type;
- int irq_source_id;
- int flags;
- struct pci_dev *dev;
- struct kvm *kvm;
- spinlock_t intx_lock;
- spinlock_t intx_mask_lock;
- char irq_name[32];
- struct pci_saved_state *pci_saved_state;
-};
-
-static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *head,
- int assigned_dev_id)
-{
- struct kvm_assigned_dev_kernel *match;
-
- list_for_each_entry(match, head, list) {
- if (match->assigned_dev_id == assigned_dev_id)
- return match;
- }
- return NULL;
-}
-
-static int find_index_from_host_irq(struct kvm_assigned_dev_kernel
- *assigned_dev, int irq)
-{
- int i, index;
- struct msix_entry *host_msix_entries;
-
- host_msix_entries = assigned_dev->host_msix_entries;
-
- index = -1;
- for (i = 0; i < assigned_dev->entries_nr; i++)
- if (irq == host_msix_entries[i].vector) {
- index = i;
- break;
- }
- if (index < 0)
- printk(KERN_WARNING "Fail to find correlated MSI-X entry!\n");
-
- return index;
-}
-
-static irqreturn_t kvm_assigned_dev_intx(int irq, void *dev_id)
-{
- struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
- int ret;
-
- spin_lock(&assigned_dev->intx_lock);
- if (pci_check_and_mask_intx(assigned_dev->dev)) {
- assigned_dev->host_irq_disabled = true;
- ret = IRQ_WAKE_THREAD;
- } else
- ret = IRQ_NONE;
- spin_unlock(&assigned_dev->intx_lock);
-
- return ret;
-}
-
-static void
-kvm_assigned_dev_raise_guest_irq(struct kvm_assigned_dev_kernel *assigned_dev,
- int vector)
-{
- if (unlikely(assigned_dev->irq_requested_type &
- KVM_DEV_IRQ_GUEST_INTX)) {
- spin_lock(&assigned_dev->intx_mask_lock);
- if (!(assigned_dev->flags & KVM_DEV_ASSIGN_MASK_INTX))
- kvm_set_irq(assigned_dev->kvm,
- assigned_dev->irq_source_id, vector, 1,
- false);
- spin_unlock(&assigned_dev->intx_mask_lock);
- } else
- kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id,
- vector, 1, false);
-}
-
-static irqreturn_t kvm_assigned_dev_thread_intx(int irq, void *dev_id)
-{
- struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
-
- if (!(assigned_dev->flags & KVM_DEV_ASSIGN_PCI_2_3)) {
- spin_lock_irq(&assigned_dev->intx_lock);
- disable_irq_nosync(irq);
- assigned_dev->host_irq_disabled = true;
- spin_unlock_irq(&assigned_dev->intx_lock);
- }
-
- kvm_assigned_dev_raise_guest_irq(assigned_dev,
- assigned_dev->guest_irq);
-
- return IRQ_HANDLED;
-}
-
-/*
- * Deliver an IRQ in an atomic context if we can, or return a failure,
- * user can retry in a process context.
- * Return value:
- * -EWOULDBLOCK - Can't deliver in atomic context: retry in a process context.
- * Other values - No need to retry.
- */
-static int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq,
- int level)
-{
- struct kvm_kernel_irq_routing_entry entries[KVM_NR_IRQCHIPS];
- struct kvm_kernel_irq_routing_entry *e;
- int ret = -EINVAL;
- int idx;
-
- trace_kvm_set_irq(irq, level, irq_source_id);
-
- /*
- * Injection into either PIC or IOAPIC might need to scan all CPUs,
- * which would need to be retried from thread context; when same GSI
- * is connected to both PIC and IOAPIC, we'd have to report a
- * partial failure here.
- * Since there's no easy way to do this, we only support injecting MSI
- * which is limited to 1:1 GSI mapping.
- */
- idx = srcu_read_lock(&kvm->irq_srcu);
- if (kvm_irq_map_gsi(kvm, entries, irq) > 0) {
- e = &entries[0];
- ret = kvm_arch_set_irq_inatomic(e, kvm, irq_source_id,
- irq, level);
- }
- srcu_read_unlock(&kvm->irq_srcu, idx);
- return ret;
-}
-
-
-static irqreturn_t kvm_assigned_dev_msi(int irq, void *dev_id)
-{
- struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
- int ret = kvm_set_irq_inatomic(assigned_dev->kvm,
- assigned_dev->irq_source_id,
- assigned_dev->guest_irq, 1);
- return unlikely(ret == -EWOULDBLOCK) ? IRQ_WAKE_THREAD : IRQ_HANDLED;
-}
-
-static irqreturn_t kvm_assigned_dev_thread_msi(int irq, void *dev_id)
-{
- struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
-
- kvm_assigned_dev_raise_guest_irq(assigned_dev,
- assigned_dev->guest_irq);
-
- return IRQ_HANDLED;
-}
-
-static irqreturn_t kvm_assigned_dev_msix(int irq, void *dev_id)
-{
- struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
- int index = find_index_from_host_irq(assigned_dev, irq);
- u32 vector;
- int ret = 0;
-
- if (index >= 0) {
- vector = assigned_dev->guest_msix_entries[index].vector;
- ret = kvm_set_irq_inatomic(assigned_dev->kvm,
- assigned_dev->irq_source_id,
- vector, 1);
- }
-
- return unlikely(ret == -EWOULDBLOCK) ? IRQ_WAKE_THREAD : IRQ_HANDLED;
-}
-
-static irqreturn_t kvm_assigned_dev_thread_msix(int irq, void *dev_id)
-{
- struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
- int index = find_index_from_host_irq(assigned_dev, irq);
- u32 vector;
-
- if (index >= 0) {
- vector = assigned_dev->guest_msix_entries[index].vector;
- kvm_assigned_dev_raise_guest_irq(assigned_dev, vector);
- }
-
- return IRQ_HANDLED;
-}
-
-/* Ack the irq line for an assigned device */
-static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian)
-{
- struct kvm_assigned_dev_kernel *dev =
- container_of(kian, struct kvm_assigned_dev_kernel,
- ack_notifier);
-
- kvm_set_irq(dev->kvm, dev->irq_source_id, dev->guest_irq, 0, false);
-
- spin_lock(&dev->intx_mask_lock);
-
- if (!(dev->flags & KVM_DEV_ASSIGN_MASK_INTX)) {
- bool reassert = false;
-
- spin_lock_irq(&dev->intx_lock);
- /*
- * The guest IRQ may be shared so this ack can come from an
- * IRQ for another guest device.
- */
- if (dev->host_irq_disabled) {
- if (!(dev->flags & KVM_DEV_ASSIGN_PCI_2_3))
- enable_irq(dev->host_irq);
- else if (!pci_check_and_unmask_intx(dev->dev))
- reassert = true;
- dev->host_irq_disabled = reassert;
- }
- spin_unlock_irq(&dev->intx_lock);
-
- if (reassert)
- kvm_set_irq(dev->kvm, dev->irq_source_id,
- dev->guest_irq, 1, false);
- }
-
- spin_unlock(&dev->intx_mask_lock);
-}
-
-static void deassign_guest_irq(struct kvm *kvm,
- struct kvm_assigned_dev_kernel *assigned_dev)
-{
- if (assigned_dev->ack_notifier.gsi != -1)
- kvm_unregister_irq_ack_notifier(kvm,
- &assigned_dev->ack_notifier);
-
- kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id,
- assigned_dev->guest_irq, 0, false);
-
- if (assigned_dev->irq_source_id != -1)
- kvm_free_irq_source_id(kvm, assigned_dev->irq_source_id);
- assigned_dev->irq_source_id = -1;
- assigned_dev->irq_requested_type &= ~(KVM_DEV_IRQ_GUEST_MASK);
-}
-
-/* The function implicit hold kvm->lock mutex due to cancel_work_sync() */
-static void deassign_host_irq(struct kvm *kvm,
- struct kvm_assigned_dev_kernel *assigned_dev)
-{
- /*
- * We disable irq here to prevent further events.
- *
- * Notice this maybe result in nested disable if the interrupt type is
- * INTx, but it's OK for we are going to free it.
- *
- * If this function is a part of VM destroy, please ensure that till
- * now, the kvm state is still legal for probably we also have to wait
- * on a currently running IRQ handler.
- */
- if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) {
- int i;
- for (i = 0; i < assigned_dev->entries_nr; i++)
- disable_irq(assigned_dev->host_msix_entries[i].vector);
-
- for (i = 0; i < assigned_dev->entries_nr; i++)
- free_irq(assigned_dev->host_msix_entries[i].vector,
- assigned_dev);
-
- assigned_dev->entries_nr = 0;
- kfree(assigned_dev->host_msix_entries);
- kfree(assigned_dev->guest_msix_entries);
- pci_disable_msix(assigned_dev->dev);
- } else {
- /* Deal with MSI and INTx */
- if ((assigned_dev->irq_requested_type &
- KVM_DEV_IRQ_HOST_INTX) &&
- (assigned_dev->flags & KVM_DEV_ASSIGN_PCI_2_3)) {
- spin_lock_irq(&assigned_dev->intx_lock);
- pci_intx(assigned_dev->dev, false);
- spin_unlock_irq(&assigned_dev->intx_lock);
- synchronize_irq(assigned_dev->host_irq);
- } else
- disable_irq(assigned_dev->host_irq);
-
- free_irq(assigned_dev->host_irq, assigned_dev);
-
- if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSI)
- pci_disable_msi(assigned_dev->dev);
- }
-
- assigned_dev->irq_requested_type &= ~(KVM_DEV_IRQ_HOST_MASK);
-}
-
-static int kvm_deassign_irq(struct kvm *kvm,
- struct kvm_assigned_dev_kernel *assigned_dev,
- unsigned long irq_requested_type)
-{
- unsigned long guest_irq_type, host_irq_type;
-
- if (!irqchip_in_kernel(kvm))
- return -EINVAL;
- /* no irq assignment to deassign */
- if (!assigned_dev->irq_requested_type)
- return -ENXIO;
-
- host_irq_type = irq_requested_type & KVM_DEV_IRQ_HOST_MASK;
- guest_irq_type = irq_requested_type & KVM_DEV_IRQ_GUEST_MASK;
-
- if (host_irq_type)
- deassign_host_irq(kvm, assigned_dev);
- if (guest_irq_type)
- deassign_guest_irq(kvm, assigned_dev);
-
- return 0;
-}
-
-static void kvm_free_assigned_irq(struct kvm *kvm,
- struct kvm_assigned_dev_kernel *assigned_dev)
-{
- kvm_deassign_irq(kvm, assigned_dev, assigned_dev->irq_requested_type);
-}
-
-static void kvm_free_assigned_device(struct kvm *kvm,
- struct kvm_assigned_dev_kernel
- *assigned_dev)
-{
- kvm_free_assigned_irq(kvm, assigned_dev);
-
- pci_reset_function(assigned_dev->dev);
- if (pci_load_and_free_saved_state(assigned_dev->dev,
- &assigned_dev->pci_saved_state))
- printk(KERN_INFO "%s: Couldn't reload %s saved state\n",
- __func__, dev_name(&assigned_dev->dev->dev));
- else
- pci_restore_state(assigned_dev->dev);
-
- pci_clear_dev_assigned(assigned_dev->dev);
-
- pci_release_regions(assigned_dev->dev);
- pci_disable_device(assigned_dev->dev);
- pci_dev_put(assigned_dev->dev);
-
- list_del(&assigned_dev->list);
- kfree(assigned_dev);
-}
-
-void kvm_free_all_assigned_devices(struct kvm *kvm)
-{
- struct kvm_assigned_dev_kernel *assigned_dev, *tmp;
-
- list_for_each_entry_safe(assigned_dev, tmp,
- &kvm->arch.assigned_dev_head, list) {
- kvm_free_assigned_device(kvm, assigned_dev);
- }
-}
-
-static int assigned_device_enable_host_intx(struct kvm *kvm,
- struct kvm_assigned_dev_kernel *dev)
-{
- irq_handler_t irq_handler;
- unsigned long flags;
-
- dev->host_irq = dev->dev->irq;
-
- /*
- * We can only share the IRQ line with other host devices if we are
- * able to disable the IRQ source at device-level - independently of
- * the guest driver. Otherwise host devices may suffer from unbounded
- * IRQ latencies when the guest keeps the line asserted.
- */
- if (dev->flags & KVM_DEV_ASSIGN_PCI_2_3) {
- irq_handler = kvm_assigned_dev_intx;
- flags = IRQF_SHARED;
- } else {
- irq_handler = NULL;
- flags = IRQF_ONESHOT;
- }
- if (request_threaded_irq(dev->host_irq, irq_handler,
- kvm_assigned_dev_thread_intx, flags,
- dev->irq_name, dev))
- return -EIO;
-
- if (dev->flags & KVM_DEV_ASSIGN_PCI_2_3) {
- spin_lock_irq(&dev->intx_lock);
- pci_intx(dev->dev, true);
- spin_unlock_irq(&dev->intx_lock);
- }
- return 0;
-}
-
-static int assigned_device_enable_host_msi(struct kvm *kvm,
- struct kvm_assigned_dev_kernel *dev)
-{
- int r;
-
- if (!dev->dev->msi_enabled) {
- r = pci_enable_msi(dev->dev);
- if (r)
- return r;
- }
-
- dev->host_irq = dev->dev->irq;
- if (request_threaded_irq(dev->host_irq, kvm_assigned_dev_msi,
- kvm_assigned_dev_thread_msi, 0,
- dev->irq_name, dev)) {
- pci_disable_msi(dev->dev);
- return -EIO;
- }
-
- return 0;
-}
-
-static int assigned_device_enable_host_msix(struct kvm *kvm,
- struct kvm_assigned_dev_kernel *dev)
-{
- int i, r = -EINVAL;
-
- /* host_msix_entries and guest_msix_entries should have been
- * initialized */
- if (dev->entries_nr == 0)
- return r;
-
- r = pci_enable_msix_exact(dev->dev,
- dev->host_msix_entries, dev->entries_nr);
- if (r)
- return r;
-
- for (i = 0; i < dev->entries_nr; i++) {
- r = request_threaded_irq(dev->host_msix_entries[i].vector,
- kvm_assigned_dev_msix,
- kvm_assigned_dev_thread_msix,
- 0, dev->irq_name, dev);
- if (r)
- goto err;
- }
-
- return 0;
-err:
- for (i -= 1; i >= 0; i--)
- free_irq(dev->host_msix_entries[i].vector, dev);
- pci_disable_msix(dev->dev);
- return r;
-}
-
-static int assigned_device_enable_guest_intx(struct kvm *kvm,
- struct kvm_assigned_dev_kernel *dev,
- struct kvm_assigned_irq *irq)
-{
- dev->guest_irq = irq->guest_irq;
- dev->ack_notifier.gsi = irq->guest_irq;
- return 0;
-}
-
-static int assigned_device_enable_guest_msi(struct kvm *kvm,
- struct kvm_assigned_dev_kernel *dev,
- struct kvm_assigned_irq *irq)
-{
- dev->guest_irq = irq->guest_irq;
- dev->ack_notifier.gsi = -1;
- return 0;
-}
-
-static int assigned_device_enable_guest_msix(struct kvm *kvm,
- struct kvm_assigned_dev_kernel *dev,
- struct kvm_assigned_irq *irq)
-{
- dev->guest_irq = irq->guest_irq;
- dev->ack_notifier.gsi = -1;
- return 0;
-}
-
-static int assign_host_irq(struct kvm *kvm,
- struct kvm_assigned_dev_kernel *dev,
- __u32 host_irq_type)
-{
- int r = -EEXIST;
-
- if (dev->irq_requested_type & KVM_DEV_IRQ_HOST_MASK)
- return r;
-
- snprintf(dev->irq_name, sizeof(dev->irq_name), "kvm:%s",
- pci_name(dev->dev));
-
- switch (host_irq_type) {
- case KVM_DEV_IRQ_HOST_INTX:
- r = assigned_device_enable_host_intx(kvm, dev);
- break;
- case KVM_DEV_IRQ_HOST_MSI:
- r = assigned_device_enable_host_msi(kvm, dev);
- break;
- case KVM_DEV_IRQ_HOST_MSIX:
- r = assigned_device_enable_host_msix(kvm, dev);
- break;
- default:
- r = -EINVAL;
- }
- dev->host_irq_disabled = false;
-
- if (!r)
- dev->irq_requested_type |= host_irq_type;
-
- return r;
-}
-
-static int assign_guest_irq(struct kvm *kvm,
- struct kvm_assigned_dev_kernel *dev,
- struct kvm_assigned_irq *irq,
- unsigned long guest_irq_type)
-{
- int id;
- int r = -EEXIST;
-
- if (dev->irq_requested_type & KVM_DEV_IRQ_GUEST_MASK)
- return r;
-
- id = kvm_request_irq_source_id(kvm);
- if (id < 0)
- return id;
-
- dev->irq_source_id = id;
-
- switch (guest_irq_type) {
- case KVM_DEV_IRQ_GUEST_INTX:
- r = assigned_device_enable_guest_intx(kvm, dev, irq);
- break;
- case KVM_DEV_IRQ_GUEST_MSI:
- r = assigned_device_enable_guest_msi(kvm, dev, irq);
- break;
- case KVM_DEV_IRQ_GUEST_MSIX:
- r = assigned_device_enable_guest_msix(kvm, dev, irq);
- break;
- default:
- r = -EINVAL;
- }
-
- if (!r) {
- dev->irq_requested_type |= guest_irq_type;
- if (dev->ack_notifier.gsi != -1)
- kvm_register_irq_ack_notifier(kvm, &dev->ack_notifier);
- } else {
- kvm_free_irq_source_id(kvm, dev->irq_source_id);
- dev->irq_source_id = -1;
- }
-
- return r;
-}
-
-/* TODO Deal with KVM_DEV_IRQ_ASSIGNED_MASK_MSIX */
-static int kvm_vm_ioctl_assign_irq(struct kvm *kvm,
- struct kvm_assigned_irq *assigned_irq)
-{
- int r = -EINVAL;
- struct kvm_assigned_dev_kernel *match;
- unsigned long host_irq_type, guest_irq_type;
-
- if (!irqchip_in_kernel(kvm))
- return r;
-
- mutex_lock(&kvm->lock);
- r = -ENODEV;
- match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
- assigned_irq->assigned_dev_id);
- if (!match)
- goto out;
-
- host_irq_type = (assigned_irq->flags & KVM_DEV_IRQ_HOST_MASK);
- guest_irq_type = (assigned_irq->flags & KVM_DEV_IRQ_GUEST_MASK);
-
- r = -EINVAL;
- /* can only assign one type at a time */
- if (hweight_long(host_irq_type) > 1)
- goto out;
- if (hweight_long(guest_irq_type) > 1)
- goto out;
- if (host_irq_type == 0 && guest_irq_type == 0)
- goto out;
-
- r = 0;
- if (host_irq_type)
- r = assign_host_irq(kvm, match, host_irq_type);
- if (r)
- goto out;
-
- if (guest_irq_type)
- r = assign_guest_irq(kvm, match, assigned_irq, guest_irq_type);
-out:
- mutex_unlock(&kvm->lock);
- return r;
-}
-
-static int kvm_vm_ioctl_deassign_dev_irq(struct kvm *kvm,
- struct kvm_assigned_irq
- *assigned_irq)
-{
- int r = -ENODEV;
- struct kvm_assigned_dev_kernel *match;
- unsigned long irq_type;
-
- mutex_lock(&kvm->lock);
-
- match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
- assigned_irq->assigned_dev_id);
- if (!match)
- goto out;
-
- irq_type = assigned_irq->flags & (KVM_DEV_IRQ_HOST_MASK |
- KVM_DEV_IRQ_GUEST_MASK);
- r = kvm_deassign_irq(kvm, match, irq_type);
-out:
- mutex_unlock(&kvm->lock);
- return r;
-}
-
-/*
- * We want to test whether the caller has been granted permissions to
- * use this device. To be able to configure and control the device,
- * the user needs access to PCI configuration space and BAR resources.
- * These are accessed through PCI sysfs. PCI config space is often
- * passed to the process calling this ioctl via file descriptor, so we
- * can't rely on access to that file. We can check for permissions
- * on each of the BAR resource files, which is a pretty clear
- * indicator that the user has been granted access to the device.
- */
-static int probe_sysfs_permissions(struct pci_dev *dev)
-{
-#ifdef CONFIG_SYSFS
- int i;
- bool bar_found = false;
-
- for (i = PCI_STD_RESOURCES; i <= PCI_STD_RESOURCE_END; i++) {
- char *kpath, *syspath;
- struct path path;
- struct inode *inode;
- int r;
-
- if (!pci_resource_len(dev, i))
- continue;
-
- kpath = kobject_get_path(&dev->dev.kobj, GFP_KERNEL);
- if (!kpath)
- return -ENOMEM;
-
- /* Per sysfs-rules, sysfs is always at /sys */
- syspath = kasprintf(GFP_KERNEL, "/sys%s/resource%d", kpath, i);
- kfree(kpath);
- if (!syspath)
- return -ENOMEM;
-
- r = kern_path(syspath, LOOKUP_FOLLOW, &path);
- kfree(syspath);
- if (r)
- return r;
-
- inode = d_backing_inode(path.dentry);
-
- r = inode_permission(inode, MAY_READ | MAY_WRITE | MAY_ACCESS);
- path_put(&path);
- if (r)
- return r;
-
- bar_found = true;
- }
-
- /* If no resources, probably something special */
- if (!bar_found)
- return -EPERM;
-
- return 0;
-#else
- return -EINVAL; /* No way to control the device without sysfs */
-#endif
-}
-
-static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
- struct kvm_assigned_pci_dev *assigned_dev)
-{
- int r = 0, idx;
- struct kvm_assigned_dev_kernel *match;
- struct pci_dev *dev;
-
- if (!(assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU))
- return -EINVAL;
-
- mutex_lock(&kvm->lock);
- idx = srcu_read_lock(&kvm->srcu);
-
- match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
- assigned_dev->assigned_dev_id);
- if (match) {
- /* device already assigned */
- r = -EEXIST;
- goto out;
- }
-
- match = kzalloc(sizeof(struct kvm_assigned_dev_kernel), GFP_KERNEL);
- if (match == NULL) {
- printk(KERN_INFO "%s: Couldn't allocate memory\n",
- __func__);
- r = -ENOMEM;
- goto out;
- }
- dev = pci_get_domain_bus_and_slot(assigned_dev->segnr,
- assigned_dev->busnr,
- assigned_dev->devfn);
- if (!dev) {
- printk(KERN_INFO "%s: host device not found\n", __func__);
- r = -EINVAL;
- goto out_free;
- }
-
- /* Don't allow bridges to be assigned */
- if (dev->hdr_type != PCI_HEADER_TYPE_NORMAL) {
- r = -EPERM;
- goto out_put;
- }
-
- r = probe_sysfs_permissions(dev);
- if (r)
- goto out_put;
-
- if (pci_enable_device(dev)) {
- printk(KERN_INFO "%s: Could not enable PCI device\n", __func__);
- r = -EBUSY;
- goto out_put;
- }
- r = pci_request_regions(dev, "kvm_assigned_device");
- if (r) {
- printk(KERN_INFO "%s: Could not get access to device regions\n",
- __func__);
- goto out_disable;
- }
-
- pci_reset_function(dev);
- pci_save_state(dev);
- match->pci_saved_state = pci_store_saved_state(dev);
- if (!match->pci_saved_state)
- printk(KERN_DEBUG "%s: Couldn't store %s saved state\n",
- __func__, dev_name(&dev->dev));
-
- if (!pci_intx_mask_supported(dev))
- assigned_dev->flags &= ~KVM_DEV_ASSIGN_PCI_2_3;
-
- match->assigned_dev_id = assigned_dev->assigned_dev_id;
- match->host_segnr = assigned_dev->segnr;
- match->host_busnr = assigned_dev->busnr;
- match->host_devfn = assigned_dev->devfn;
- match->flags = assigned_dev->flags;
- match->dev = dev;
- spin_lock_init(&match->intx_lock);
- spin_lock_init(&match->intx_mask_lock);
- match->irq_source_id = -1;
- match->kvm = kvm;
- match->ack_notifier.irq_acked = kvm_assigned_dev_ack_irq;
-
- list_add(&match->list, &kvm->arch.assigned_dev_head);
-
- if (!kvm->arch.iommu_domain) {
- r = kvm_iommu_map_guest(kvm);
- if (r)
- goto out_list_del;
- }
- r = kvm_assign_device(kvm, match->dev);
- if (r)
- goto out_list_del;
-
-out:
- srcu_read_unlock(&kvm->srcu, idx);
- mutex_unlock(&kvm->lock);
- return r;
-out_list_del:
- if (pci_load_and_free_saved_state(dev, &match->pci_saved_state))
- printk(KERN_INFO "%s: Couldn't reload %s saved state\n",
- __func__, dev_name(&dev->dev));
- list_del(&match->list);
- pci_release_regions(dev);
-out_disable:
- pci_disable_device(dev);
-out_put:
- pci_dev_put(dev);
-out_free:
- kfree(match);
- srcu_read_unlock(&kvm->srcu, idx);
- mutex_unlock(&kvm->lock);
- return r;
-}
-
-static int kvm_vm_ioctl_deassign_device(struct kvm *kvm,
- struct kvm_assigned_pci_dev *assigned_dev)
-{
- int r = 0;
- struct kvm_assigned_dev_kernel *match;
-
- mutex_lock(&kvm->lock);
-
- match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
- assigned_dev->assigned_dev_id);
- if (!match) {
- printk(KERN_INFO "%s: device hasn't been assigned before, "
- "so cannot be deassigned\n", __func__);
- r = -EINVAL;
- goto out;
- }
-
- kvm_deassign_device(kvm, match->dev);
-
- kvm_free_assigned_device(kvm, match);
-
-out:
- mutex_unlock(&kvm->lock);
- return r;
-}
-
-
-static int kvm_vm_ioctl_set_msix_nr(struct kvm *kvm,
- struct kvm_assigned_msix_nr *entry_nr)
-{
- int r = 0;
- struct kvm_assigned_dev_kernel *adev;
-
- mutex_lock(&kvm->lock);
-
- adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
- entry_nr->assigned_dev_id);
- if (!adev) {
- r = -EINVAL;
- goto msix_nr_out;
- }
-
- if (adev->entries_nr == 0) {
- adev->entries_nr = entry_nr->entry_nr;
- if (adev->entries_nr == 0 ||
- adev->entries_nr > KVM_MAX_MSIX_PER_DEV) {
- r = -EINVAL;
- goto msix_nr_out;
- }
-
- adev->host_msix_entries = kzalloc(sizeof(struct msix_entry) *
- entry_nr->entry_nr,
- GFP_KERNEL);
- if (!adev->host_msix_entries) {
- r = -ENOMEM;
- goto msix_nr_out;
- }
- adev->guest_msix_entries =
- kzalloc(sizeof(struct msix_entry) * entry_nr->entry_nr,
- GFP_KERNEL);
- if (!adev->guest_msix_entries) {
- kfree(adev->host_msix_entries);
- r = -ENOMEM;
- goto msix_nr_out;
- }
- } else /* Not allowed set MSI-X number twice */
- r = -EINVAL;
-msix_nr_out:
- mutex_unlock(&kvm->lock);
- return r;
-}
-
-static int kvm_vm_ioctl_set_msix_entry(struct kvm *kvm,
- struct kvm_assigned_msix_entry *entry)
-{
- int r = 0, i;
- struct kvm_assigned_dev_kernel *adev;
-
- mutex_lock(&kvm->lock);
-
- adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
- entry->assigned_dev_id);
-
- if (!adev) {
- r = -EINVAL;
- goto msix_entry_out;
- }
-
- for (i = 0; i < adev->entries_nr; i++)
- if (adev->guest_msix_entries[i].vector == 0 ||
- adev->guest_msix_entries[i].entry == entry->entry) {
- adev->guest_msix_entries[i].entry = entry->entry;
- adev->guest_msix_entries[i].vector = entry->gsi;
- adev->host_msix_entries[i].entry = entry->entry;
- break;
- }
- if (i == adev->entries_nr) {
- r = -ENOSPC;
- goto msix_entry_out;
- }
-
-msix_entry_out:
- mutex_unlock(&kvm->lock);
-
- return r;
-}
-
-static int kvm_vm_ioctl_set_pci_irq_mask(struct kvm *kvm,
- struct kvm_assigned_pci_dev *assigned_dev)
-{
- int r = 0;
- struct kvm_assigned_dev_kernel *match;
-
- mutex_lock(&kvm->lock);
-
- match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
- assigned_dev->assigned_dev_id);
- if (!match) {
- r = -ENODEV;
- goto out;
- }
-
- spin_lock(&match->intx_mask_lock);
-
- match->flags &= ~KVM_DEV_ASSIGN_MASK_INTX;
- match->flags |= assigned_dev->flags & KVM_DEV_ASSIGN_MASK_INTX;
-
- if (match->irq_requested_type & KVM_DEV_IRQ_GUEST_INTX) {
- if (assigned_dev->flags & KVM_DEV_ASSIGN_MASK_INTX) {
- kvm_set_irq(match->kvm, match->irq_source_id,
- match->guest_irq, 0, false);
- /*
- * Masking at hardware-level is performed on demand,
- * i.e. when an IRQ actually arrives at the host.
- */
- } else if (!(assigned_dev->flags & KVM_DEV_ASSIGN_PCI_2_3)) {
- /*
- * Unmask the IRQ line if required. Unmasking at
- * device level will be performed by user space.
- */
- spin_lock_irq(&match->intx_lock);
- if (match->host_irq_disabled) {
- enable_irq(match->host_irq);
- match->host_irq_disabled = false;
- }
- spin_unlock_irq(&match->intx_lock);
- }
- }
-
- spin_unlock(&match->intx_mask_lock);
-
-out:
- mutex_unlock(&kvm->lock);
- return r;
-}
-
-long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl,
- unsigned long arg)
-{
- void __user *argp = (void __user *)arg;
- int r;
-
- switch (ioctl) {
- case KVM_ASSIGN_PCI_DEVICE: {
- struct kvm_assigned_pci_dev assigned_dev;
-
- r = -EFAULT;
- if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev))
- goto out;
- r = kvm_vm_ioctl_assign_device(kvm, &assigned_dev);
- if (r)
- goto out;
- break;
- }
- case KVM_ASSIGN_IRQ: {
- r = -EOPNOTSUPP;
- break;
- }
- case KVM_ASSIGN_DEV_IRQ: {
- struct kvm_assigned_irq assigned_irq;
-
- r = -EFAULT;
- if (copy_from_user(&assigned_irq, argp, sizeof assigned_irq))
- goto out;
- r = kvm_vm_ioctl_assign_irq(kvm, &assigned_irq);
- if (r)
- goto out;
- break;
- }
- case KVM_DEASSIGN_DEV_IRQ: {
- struct kvm_assigned_irq assigned_irq;
-
- r = -EFAULT;
- if (copy_from_user(&assigned_irq, argp, sizeof assigned_irq))
- goto out;
- r = kvm_vm_ioctl_deassign_dev_irq(kvm, &assigned_irq);
- if (r)
- goto out;
- break;
- }
- case KVM_DEASSIGN_PCI_DEVICE: {
- struct kvm_assigned_pci_dev assigned_dev;
-
- r = -EFAULT;
- if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev))
- goto out;
- r = kvm_vm_ioctl_deassign_device(kvm, &assigned_dev);
- if (r)
- goto out;
- break;
- }
- case KVM_ASSIGN_SET_MSIX_NR: {
- struct kvm_assigned_msix_nr entry_nr;
- r = -EFAULT;
- if (copy_from_user(&entry_nr, argp, sizeof entry_nr))
- goto out;
- r = kvm_vm_ioctl_set_msix_nr(kvm, &entry_nr);
- if (r)
- goto out;
- break;
- }
- case KVM_ASSIGN_SET_MSIX_ENTRY: {
- struct kvm_assigned_msix_entry entry;
- r = -EFAULT;
- if (copy_from_user(&entry, argp, sizeof entry))
- goto out;
- r = kvm_vm_ioctl_set_msix_entry(kvm, &entry);
- if (r)
- goto out;
- break;
- }
- case KVM_ASSIGN_SET_INTX_MASK: {
- struct kvm_assigned_pci_dev assigned_dev;
-
- r = -EFAULT;
- if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev))
- goto out;
- r = kvm_vm_ioctl_set_pci_irq_mask(kvm, &assigned_dev);
- break;
- }
- default:
- r = -ENOTTY;
- break;
- }
-out:
- return r;
-}
diff --git a/arch/x86/kvm/assigned-dev.h b/arch/x86/kvm/assigned-dev.h
deleted file mode 100644
index a428c1a..0000000
--- a/arch/x86/kvm/assigned-dev.h
+++ /dev/null
@@ -1,32 +0,0 @@
-#ifndef ARCH_X86_KVM_ASSIGNED_DEV_H
-#define ARCH_X86_KVM_ASSIGNED_DEV_H
-
-#include <linux/kvm_host.h>
-
-#ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
-int kvm_assign_device(struct kvm *kvm, struct pci_dev *pdev);
-int kvm_deassign_device(struct kvm *kvm, struct pci_dev *pdev);
-
-int kvm_iommu_map_guest(struct kvm *kvm);
-int kvm_iommu_unmap_guest(struct kvm *kvm);
-
-long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl,
- unsigned long arg);
-
-void kvm_free_all_assigned_devices(struct kvm *kvm);
-#else
-static inline int kvm_iommu_unmap_guest(struct kvm *kvm)
-{
- return 0;
-}
-
-static inline long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl,
- unsigned long arg)
-{
- return -ENOTTY;
-}
-
-static inline void kvm_free_all_assigned_devices(struct kvm *kvm) {}
-#endif /* CONFIG_KVM_DEVICE_ASSIGNMENT */
-
-#endif /* ARCH_X86_KVM_ASSIGNED_DEV_H */
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index afa7bbb..ad85822 100644..100755
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -6,6 +6,7 @@
*
* Copyright 2011 Red Hat, Inc. and/or its affiliates.
* Copyright IBM Corporation, 2008
+ * Copyright 2019 Google LLC
*
* This work is licensed under the terms of the GNU GPL, version 2. See
* the COPYING file in the top-level directory.
@@ -13,16 +14,9 @@
*/
#include <linux/kvm_host.h>
-#include <linux/export.h>
-#include <linux/vmalloc.h>
-#include <linux/uaccess.h>
-#include <asm/fpu/internal.h> /* For use_eager_fpu. Ugh! */
-#include <asm/user.h>
-#include <asm/fpu/xstate.h>
#include "cpuid.h"
#include "lapic.h"
#include "mmu.h"
-#include "trace.h"
#include "pmu.h"
static u32 xstate_required_size(u64 xstate_bv, bool compacted)
@@ -33,7 +27,7 @@ static u32 xstate_required_size(u64 xstate_bv, bool compacted)
xstate_bv &= XFEATURE_MASK_EXTEND;
while (xstate_bv) {
if (xstate_bv & 0x1) {
- u32 eax, ebx, ecx, edx, offset;
+ u32 eax = 0, ebx = 0, ecx, edx, offset;
cpuid_count(0xD, feature_bit, &eax, &ebx, &ecx, &edx);
offset = compacted ? ret : ebx;
ret = max(ret, offset + eax);
@@ -51,11 +45,10 @@ bool kvm_mpx_supported(void)
return ((host_xcr0 & (XFEATURE_MASK_BNDREGS | XFEATURE_MASK_BNDCSR))
&& kvm_x86_ops->mpx_supported());
}
-EXPORT_SYMBOL_GPL(kvm_mpx_supported);
u64 kvm_supported_xcr0(void)
{
- u64 xcr0 = KVM_SUPPORTED_XCR0 & host_xcr0;
+ u64 xcr0 = GVM_SUPPORTED_XCR0 & host_xcr0;
if (!kvm_mpx_supported())
xcr0 &= ~(XFEATURE_MASK_BNDREGS | XFEATURE_MASK_BNDCSR);
@@ -67,7 +60,7 @@ u64 kvm_supported_xcr0(void)
int kvm_update_cpuid(struct kvm_vcpu *vcpu)
{
- struct kvm_cpuid_entry2 *best;
+ struct kvm_cpuid_entry *best;
struct kvm_lapic *apic = vcpu->arch.apic;
best = kvm_find_cpuid_entry(vcpu, 1, 0);
@@ -114,9 +107,6 @@ int kvm_update_cpuid(struct kvm_vcpu *vcpu)
if (best && (best->eax & (F(XSAVES) | F(XSAVEC))))
best->ebx = xstate_required_size(vcpu->arch.xcr0, true);
- if (use_eager_fpu())
- kvm_x86_ops->fpu_activate(vcpu);
-
/*
* The existing code assumes virtual address is 48-bit in the canonical
* address checks; exit if it is ever changed.
@@ -144,7 +134,7 @@ static int is_efer_nx(void)
static void cpuid_fix_nx_cap(struct kvm_vcpu *vcpu)
{
int i;
- struct kvm_cpuid_entry2 *e, *entry;
+ struct kvm_cpuid_entry *e, *entry;
entry = NULL;
for (i = 0; i < vcpu->arch.cpuid_nent; ++i) {
@@ -162,7 +152,7 @@ static void cpuid_fix_nx_cap(struct kvm_vcpu *vcpu)
int cpuid_query_maxphyaddr(struct kvm_vcpu *vcpu)
{
- struct kvm_cpuid_entry2 *best;
+ struct kvm_cpuid_entry *best;
best = kvm_find_cpuid_entry(vcpu, 0x80000000, 0);
if (!best || best->eax < 0x80000008)
@@ -173,65 +163,19 @@ int cpuid_query_maxphyaddr(struct kvm_vcpu *vcpu)
not_found:
return 36;
}
-EXPORT_SYMBOL_GPL(cpuid_query_maxphyaddr);
-/* when an old userspace process fills a new kernel module */
int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu,
- struct kvm_cpuid *cpuid,
- struct kvm_cpuid_entry __user *entries)
-{
- int r, i;
- struct kvm_cpuid_entry *cpuid_entries = NULL;
-
- r = -E2BIG;
- if (cpuid->nent > KVM_MAX_CPUID_ENTRIES)
- goto out;
- r = -ENOMEM;
- if (cpuid->nent) {
- cpuid_entries = vmalloc(sizeof(struct kvm_cpuid_entry) *
- cpuid->nent);
- if (!cpuid_entries)
- goto out;
- r = -EFAULT;
- if (copy_from_user(cpuid_entries, entries,
- cpuid->nent * sizeof(struct kvm_cpuid_entry)))
- goto out;
- }
- for (i = 0; i < cpuid->nent; i++) {
- vcpu->arch.cpuid_entries[i].function = cpuid_entries[i].function;
- vcpu->arch.cpuid_entries[i].eax = cpuid_entries[i].eax;
- vcpu->arch.cpuid_entries[i].ebx = cpuid_entries[i].ebx;
- vcpu->arch.cpuid_entries[i].ecx = cpuid_entries[i].ecx;
- vcpu->arch.cpuid_entries[i].edx = cpuid_entries[i].edx;
- vcpu->arch.cpuid_entries[i].index = 0;
- vcpu->arch.cpuid_entries[i].flags = 0;
- vcpu->arch.cpuid_entries[i].padding[0] = 0;
- vcpu->arch.cpuid_entries[i].padding[1] = 0;
- vcpu->arch.cpuid_entries[i].padding[2] = 0;
- }
- vcpu->arch.cpuid_nent = cpuid->nent;
- cpuid_fix_nx_cap(vcpu);
- kvm_apic_set_version(vcpu);
- kvm_x86_ops->cpuid_update(vcpu);
- r = kvm_update_cpuid(vcpu);
-
-out:
- vfree(cpuid_entries);
- return r;
-}
-
-int kvm_vcpu_ioctl_set_cpuid2(struct kvm_vcpu *vcpu,
- struct kvm_cpuid2 *cpuid,
- struct kvm_cpuid_entry2 __user *entries)
+ struct kvm_cpuid *cpuid,
+ struct kvm_cpuid_entry __user *entries)
{
int r;
r = -E2BIG;
- if (cpuid->nent > KVM_MAX_CPUID_ENTRIES)
+ if (cpuid->nent > GVM_MAX_CPUID_ENTRIES)
goto out;
r = -EFAULT;
if (copy_from_user(&vcpu->arch.cpuid_entries, entries,
- cpuid->nent * sizeof(struct kvm_cpuid_entry2)))
+ cpuid->nent * sizeof(struct kvm_cpuid_entry)))
goto out;
vcpu->arch.cpuid_nent = cpuid->nent;
kvm_apic_set_version(vcpu);
@@ -241,19 +185,16 @@ out:
return r;
}
-int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu,
- struct kvm_cpuid2 *cpuid,
- struct kvm_cpuid_entry2 __user *entries)
+int kvm_vcpu_ioctl_get_cpuid(struct kvm_vcpu *vcpu,
+ struct kvm_cpuid *cpuid,
+ struct kvm_cpuid_entry __user *entries)
{
int r;
r = -E2BIG;
if (cpuid->nent < vcpu->arch.cpuid_nent)
goto out;
- r = -EFAULT;
- if (copy_to_user(entries, &vcpu->arch.cpuid_entries,
- vcpu->arch.cpuid_nent * sizeof(struct kvm_cpuid_entry2)))
- goto out;
+
return 0;
out:
@@ -263,10 +204,12 @@ out:
static void cpuid_mask(u32 *word, int wordnum)
{
+#if 0
*word &= boot_cpu_data.x86_capability[wordnum];
+#endif
}
-static void do_cpuid_1_ent(struct kvm_cpuid_entry2 *entry, u32 function,
+static void do_cpuid_1_ent(struct kvm_cpuid_entry *entry, u32 function,
u32 index)
{
entry->function = function;
@@ -276,7 +219,7 @@ static void do_cpuid_1_ent(struct kvm_cpuid_entry2 *entry, u32 function,
entry->flags = 0;
}
-static int __do_cpuid_ent_emulated(struct kvm_cpuid_entry2 *entry,
+static int __do_cpuid_ent_emulated(struct kvm_cpuid_entry *entry,
u32 func, u32 index, int *nent, int maxnent)
{
switch (func) {
@@ -298,7 +241,7 @@ static int __do_cpuid_ent_emulated(struct kvm_cpuid_entry2 *entry,
return 0;
}
-static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
+static inline int __do_cpuid_ent(struct kvm_cpuid_entry *entry, u32 function,
u32 index, int *nent, int maxnent)
{
int r;
@@ -346,7 +289,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
F(FMA) | F(CX16) | 0 /* xTPR Update, PDCM */ |
F(PCID) | 0 /* Reserved, DCA */ | F(XMM4_1) |
F(XMM4_2) | F(X2APIC) | F(MOVBE) | F(POPCNT) |
- 0 /* Reserved*/ | F(AES) | F(XSAVE) | 0 /* OSXSAVE */ | F(AVX) |
+ 0 /* Reserved*/ | F(AES) | 0 /*F(XSAVE)*/ | 0 /* OSXSAVE */ | 0 /*F(AVX)*/ |
F(F16C) | F(RDRAND);
/* cpuid 0x80000001.ecx */
const u32 kvm_cpuid_8000_0001_ecx_x86_features =
@@ -403,18 +346,18 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
/* function 2 entries are STATEFUL. That is, repeated cpuid commands
* may return different values. This forces us to get_cpu() before
* issuing the first command, and also to emulate this annoying behavior
- * in kvm_emulate_cpuid() using KVM_CPUID_FLAG_STATE_READ_NEXT */
+ * in kvm_emulate_cpuid() using GVM_CPUID_FLAG_STATE_READ_NEXT */
case 2: {
int t, times = entry->eax & 0xff;
- entry->flags |= KVM_CPUID_FLAG_STATEFUL_FUNC;
- entry->flags |= KVM_CPUID_FLAG_STATE_READ_NEXT;
+ entry->flags |= GVM_CPUID_FLAG_STATEFUL_FUNC;
+ entry->flags |= GVM_CPUID_FLAG_STATE_READ_NEXT;
for (t = 1; t < times; ++t) {
if (*nent >= maxnent)
goto out;
do_cpuid_1_ent(&entry[t], function, 0);
- entry[t].flags |= KVM_CPUID_FLAG_STATEFUL_FUNC;
+ entry[t].flags |= GVM_CPUID_FLAG_STATEFUL_FUNC;
++*nent;
}
break;
@@ -423,7 +366,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
case 4: {
int i, cache_type;
- entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
+ entry->flags |= GVM_CPUID_FLAG_SIGNIFCANT_INDEX;
/* read more entries until cache_type is zero */
for (i = 1; ; ++i) {
if (*nent >= maxnent)
@@ -434,7 +377,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
break;
do_cpuid_1_ent(&entry[i], function, i);
entry[i].flags |=
- KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
+ GVM_CPUID_FLAG_SIGNIFCANT_INDEX;
++*nent;
}
break;
@@ -446,7 +389,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
entry->edx = 0;
break;
case 7: {
- entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
+ entry->flags |= GVM_CPUID_FLAG_SIGNIFCANT_INDEX;
/* Mask ebx against host capability word 9 */
if (index == 0) {
entry->ebx &= kvm_cpuid_7_0_ebx_x86_features;
@@ -469,6 +412,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
case 9:
break;
case 0xa: { /* Architectural Performance Monitoring */
+#if 0
struct x86_pmu_capability cap;
union cpuid10_eax eax;
union cpuid10_edx edx;
@@ -495,13 +439,14 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
entry->ebx = cap.events_mask;
entry->ecx = 0;
entry->edx = edx.full;
+#endif
break;
}
/* function 0xb has additional index. */
case 0xb: {
int i, level_type;
- entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
+ entry->flags |= GVM_CPUID_FLAG_SIGNIFCANT_INDEX;
/* read more entries until level_type is zero */
for (i = 1; ; ++i) {
if (*nent >= maxnent)
@@ -512,7 +457,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
break;
do_cpuid_1_ent(&entry[i], function, i);
entry[i].flags |=
- KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
+ GVM_CPUID_FLAG_SIGNIFCANT_INDEX;
++*nent;
}
break;
@@ -525,7 +470,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
entry->ebx = xstate_required_size(supported, false);
entry->ecx = entry->ebx;
entry->edx &= supported >> 32;
- entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
+ entry->flags |= GVM_CPUID_FLAG_SIGNIFCANT_INDEX;
if (!supported)
break;
@@ -552,37 +497,12 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
entry[i].ecx = 0;
entry[i].edx = 0;
entry[i].flags |=
- KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
+ GVM_CPUID_FLAG_SIGNIFCANT_INDEX;
++*nent;
++i;
}
break;
}
- case KVM_CPUID_SIGNATURE: {
- static const char signature[12] = "KVMKVMKVM\0\0";
- const u32 *sigptr = (const u32 *)signature;
- entry->eax = KVM_CPUID_FEATURES;
- entry->ebx = sigptr[0];
- entry->ecx = sigptr[1];
- entry->edx = sigptr[2];
- break;
- }
- case KVM_CPUID_FEATURES:
- entry->eax = (1 << KVM_FEATURE_CLOCKSOURCE) |
- (1 << KVM_FEATURE_NOP_IO_DELAY) |
- (1 << KVM_FEATURE_CLOCKSOURCE2) |
- (1 << KVM_FEATURE_ASYNC_PF) |
- (1 << KVM_FEATURE_PV_EOI) |
- (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT) |
- (1 << KVM_FEATURE_PV_UNHALT);
-
- if (sched_info_on())
- entry->eax |= (1 << KVM_FEATURE_STEAL_TIME);
-
- entry->ebx = 0;
- entry->ecx = 0;
- entry->edx = 0;
- break;
case 0x80000000:
entry->eax = min(entry->eax, 0x8000001a);
break;
@@ -593,11 +513,13 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
cpuid_mask(&entry->ecx, CPUID_8000_0001_ECX);
break;
case 0x80000007: /* Advanced power management */
+#if 0
/* invariant TSC is CPUID.80000007H:EDX[8] */
entry->edx &= (1 << 8);
/* mask against host */
entry->edx &= boot_cpu_data.x86_power;
entry->eax = entry->ebx = entry->ecx = 0;
+#endif
break;
case 0x80000008: {
unsigned g_phys_as = (entry->eax >> 16) & 0xff;
@@ -646,10 +568,10 @@ out:
return r;
}
-static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 func,
+static int do_cpuid_ent(struct kvm_cpuid_entry *entry, u32 func,
u32 idx, int *nent, int maxnent, unsigned int type)
{
- if (type == KVM_GET_EMULATED_CPUID)
+ if (type == GVM_GET_EMULATED_CPUID)
return __do_cpuid_ent_emulated(entry, func, idx, nent, maxnent);
return __do_cpuid_ent(entry, func, idx, nent, maxnent);
@@ -666,23 +588,23 @@ struct kvm_cpuid_param {
static bool is_centaur_cpu(const struct kvm_cpuid_param *param)
{
- return boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR;
+ return 0;
}
-static bool sanity_check_entries(struct kvm_cpuid_entry2 __user *entries,
+static bool sanity_check_entries(struct kvm_cpuid_entry __user *entries,
__u32 num_entries, unsigned int ioctl_type)
{
int i;
__u32 pad[3];
- if (ioctl_type != KVM_GET_EMULATED_CPUID)
+ if (ioctl_type != GVM_GET_EMULATED_CPUID)
return false;
/*
* We want to make sure that ->padding is being passed clean from
* userspace in case we want to use it for something in the future.
*
- * Sadly, this wasn't enforced for KVM_GET_SUPPORTED_CPUID and so we
+ * Sadly, this wasn't enforced for GVM_GET_SUPPORTED_CPUID and so we
* have to give ourselves satisfied only with the emulated side. /me
* sheds a tear.
*/
@@ -696,31 +618,29 @@ static bool sanity_check_entries(struct kvm_cpuid_entry2 __user *entries,
return false;
}
-int kvm_dev_ioctl_get_cpuid(struct kvm_cpuid2 *cpuid,
- struct kvm_cpuid_entry2 __user *entries,
+int kvm_dev_ioctl_get_cpuid(PIRP pIrp, struct kvm_cpuid *cpuid,
+ struct kvm_cpuid_entry __user *entries,
unsigned int type)
{
- struct kvm_cpuid_entry2 *cpuid_entries;
+ struct kvm_cpuid_entry *cpuid_entries;
int limit, nent = 0, r = -E2BIG, i;
u32 func;
static const struct kvm_cpuid_param param[] = {
{ .func = 0, .has_leaf_count = true },
{ .func = 0x80000000, .has_leaf_count = true },
{ .func = 0xC0000000, .qualifier = is_centaur_cpu, .has_leaf_count = true },
- { .func = KVM_CPUID_SIGNATURE },
- { .func = KVM_CPUID_FEATURES },
};
if (cpuid->nent < 1)
goto out;
- if (cpuid->nent > KVM_MAX_CPUID_ENTRIES)
- cpuid->nent = KVM_MAX_CPUID_ENTRIES;
+ if (cpuid->nent > GVM_MAX_CPUID_ENTRIES)
+ cpuid->nent = GVM_MAX_CPUID_ENTRIES;
if (sanity_check_entries(entries, cpuid->nent, type))
return -EINVAL;
r = -ENOMEM;
- cpuid_entries = vzalloc(sizeof(struct kvm_cpuid_entry2) * cpuid->nent);
+ cpuid_entries = vzalloc(sizeof(struct kvm_cpuid_entry) * cpuid->nent);
if (!cpuid_entries)
goto out;
@@ -749,11 +669,19 @@ int kvm_dev_ioctl_get_cpuid(struct kvm_cpuid2 *cpuid,
goto out_free;
}
- r = -EFAULT;
- if (copy_to_user(entries, cpuid_entries,
- nent * sizeof(struct kvm_cpuid_entry2)))
- goto out_free;
cpuid->nent = nent;
+
+ r = gvmUpdateReturnBuffer(pIrp, 0, cpuid, sizeof(cpuid));
+ if (!NT_SUCCESS(r)) {
+ r = -EFAULT;
+ goto out_free;
+ }
+ r = gvmUpdateReturnBuffer(pIrp, sizeof(cpuid), cpuid_entries,
+ nent * sizeof(struct kvm_cpuid_entry));
+ if (!NT_SUCCESS(r)) {
+ r = -EFAULT;
+ goto out_free;
+ }
r = 0;
out_free:
@@ -764,48 +692,47 @@ out:
static int move_to_next_stateful_cpuid_entry(struct kvm_vcpu *vcpu, int i)
{
- struct kvm_cpuid_entry2 *e = &vcpu->arch.cpuid_entries[i];
+ struct kvm_cpuid_entry *e = &vcpu->arch.cpuid_entries[i];
int j, nent = vcpu->arch.cpuid_nent;
- e->flags &= ~KVM_CPUID_FLAG_STATE_READ_NEXT;
+ e->flags &= ~GVM_CPUID_FLAG_STATE_READ_NEXT;
/* when no next entry is found, the current entry[i] is reselected */
for (j = i + 1; ; j = (j + 1) % nent) {
- struct kvm_cpuid_entry2 *ej = &vcpu->arch.cpuid_entries[j];
+ struct kvm_cpuid_entry *ej = &vcpu->arch.cpuid_entries[j];
if (ej->function == e->function) {
- ej->flags |= KVM_CPUID_FLAG_STATE_READ_NEXT;
+ ej->flags |= GVM_CPUID_FLAG_STATE_READ_NEXT;
return j;
}
}
- return 0; /* silence gcc, even though control never reaches here */
}
/* find an entry with matching function, matching index (if needed), and that
* should be read next (if it's stateful) */
-static int is_matching_cpuid_entry(struct kvm_cpuid_entry2 *e,
+static int is_matching_cpuid_entry(struct kvm_cpuid_entry *e,
u32 function, u32 index)
{
if (e->function != function)
return 0;
- if ((e->flags & KVM_CPUID_FLAG_SIGNIFCANT_INDEX) && e->index != index)
+ if ((e->flags & GVM_CPUID_FLAG_SIGNIFCANT_INDEX) && e->index != index)
return 0;
- if ((e->flags & KVM_CPUID_FLAG_STATEFUL_FUNC) &&
- !(e->flags & KVM_CPUID_FLAG_STATE_READ_NEXT))
+ if ((e->flags & GVM_CPUID_FLAG_STATEFUL_FUNC) &&
+ !(e->flags & GVM_CPUID_FLAG_STATE_READ_NEXT))
return 0;
return 1;
}
-struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
+struct kvm_cpuid_entry *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
u32 function, u32 index)
{
int i;
- struct kvm_cpuid_entry2 *best = NULL;
+ struct kvm_cpuid_entry *best = NULL;
for (i = 0; i < vcpu->arch.cpuid_nent; ++i) {
- struct kvm_cpuid_entry2 *e;
+ struct kvm_cpuid_entry *e;
e = &vcpu->arch.cpuid_entries[i];
if (is_matching_cpuid_entry(e, function, index)) {
- if (e->flags & KVM_CPUID_FLAG_STATEFUL_FUNC)
+ if (e->flags & GVM_CPUID_FLAG_STATEFUL_FUNC)
move_to_next_stateful_cpuid_entry(vcpu, i);
best = e;
break;
@@ -813,17 +740,16 @@ struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
}
return best;
}
-EXPORT_SYMBOL_GPL(kvm_find_cpuid_entry);
/*
* If no match is found, check whether we exceed the vCPU's limit
* and return the content of the highest valid _standard_ leaf instead.
* This is to satisfy the CPUID specification.
*/
-static struct kvm_cpuid_entry2* check_cpuid_limit(struct kvm_vcpu *vcpu,
+static struct kvm_cpuid_entry* check_cpuid_limit(struct kvm_vcpu *vcpu,
u32 function, u32 index)
{
- struct kvm_cpuid_entry2 *maxlevel;
+ struct kvm_cpuid_entry *maxlevel;
maxlevel = kvm_find_cpuid_entry(vcpu, function & 0x80000000, 0);
if (!maxlevel || maxlevel->eax >= function)
@@ -839,7 +765,7 @@ static struct kvm_cpuid_entry2* check_cpuid_limit(struct kvm_vcpu *vcpu,
void kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx, u32 *ecx, u32 *edx)
{
u32 function = *eax, index = *ecx;
- struct kvm_cpuid_entry2 *best;
+ struct kvm_cpuid_entry *best;
best = kvm_find_cpuid_entry(vcpu, function, index);
@@ -859,9 +785,7 @@ void kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx, u32 *ecx, u32 *edx)
*edx = best->edx;
} else
*eax = *ebx = *ecx = *edx = 0;
- trace_kvm_cpuid(function, *eax, *ebx, *ecx, *edx);
}
-EXPORT_SYMBOL_GPL(kvm_cpuid);
void kvm_emulate_cpuid(struct kvm_vcpu *vcpu)
{
@@ -876,4 +800,3 @@ void kvm_emulate_cpuid(struct kvm_vcpu *vcpu)
kvm_register_write(vcpu, VCPU_REGS_RDX, edx);
kvm_x86_ops->skip_emulated_instruction(vcpu);
}
-EXPORT_SYMBOL_GPL(kvm_emulate_cpuid);
diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
index 35058c2..4bfa008 100644..100755
--- a/arch/x86/kvm/cpuid.h
+++ b/arch/x86/kvm/cpuid.h
@@ -1,25 +1,31 @@
+/*
+ * Copyright 2019 Google LLC
+ */
+
#ifndef ARCH_X86_KVM_CPUID_H
#define ARCH_X86_KVM_CPUID_H
#include "x86.h"
-#include <asm/cpu.h>
+#include <gvm_types.h>
+#include <asm/cpufeatures.h>
+#include <ntkrutils.h>
int kvm_update_cpuid(struct kvm_vcpu *vcpu);
bool kvm_mpx_supported(void);
-struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
+struct kvm_cpuid_entry *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
u32 function, u32 index);
-int kvm_dev_ioctl_get_cpuid(struct kvm_cpuid2 *cpuid,
- struct kvm_cpuid_entry2 __user *entries,
+int kvm_dev_ioctl_get_cpuid(PIRP pIrp, struct kvm_cpuid *cpuid,
+ struct kvm_cpuid_entry __user *entries,
unsigned int type);
int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu,
struct kvm_cpuid *cpuid,
struct kvm_cpuid_entry __user *entries);
-int kvm_vcpu_ioctl_set_cpuid2(struct kvm_vcpu *vcpu,
- struct kvm_cpuid2 *cpuid,
- struct kvm_cpuid_entry2 __user *entries);
-int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu,
- struct kvm_cpuid2 *cpuid,
- struct kvm_cpuid_entry2 __user *entries);
+int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu,
+ struct kvm_cpuid *cpuid,
+ struct kvm_cpuid_entry __user *entries);
+int kvm_vcpu_ioctl_get_cpuid(struct kvm_vcpu *vcpu,
+ struct kvm_cpuid *cpuid,
+ struct kvm_cpuid_entry __user *entries);
void kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx, u32 *ecx, u32 *edx);
int cpuid_query_maxphyaddr(struct kvm_vcpu *vcpu);
@@ -31,7 +37,7 @@ static inline int cpuid_maxphyaddr(struct kvm_vcpu *vcpu)
static inline bool guest_cpuid_has_xsave(struct kvm_vcpu *vcpu)
{
- struct kvm_cpuid_entry2 *best;
+ struct kvm_cpuid_entry *best;
if (!static_cpu_has(X86_FEATURE_XSAVE))
return false;
@@ -42,7 +48,7 @@ static inline bool guest_cpuid_has_xsave(struct kvm_vcpu *vcpu)
static inline bool guest_cpuid_has_mtrr(struct kvm_vcpu *vcpu)
{
- struct kvm_cpuid_entry2 *best;
+ struct kvm_cpuid_entry *best;
best = kvm_find_cpuid_entry(vcpu, 1, 0);
return best && (best->edx & bit(X86_FEATURE_MTRR));
@@ -50,7 +56,7 @@ static inline bool guest_cpuid_has_mtrr(struct kvm_vcpu *vcpu)
static inline bool guest_cpuid_has_tsc_adjust(struct kvm_vcpu *vcpu)
{
- struct kvm_cpuid_entry2 *best;
+ struct kvm_cpuid_entry *best;
best = kvm_find_cpuid_entry(vcpu, 7, 0);
return best && (best->ebx & bit(X86_FEATURE_TSC_ADJUST));
@@ -58,7 +64,7 @@ static inline bool guest_cpuid_has_tsc_adjust(struct kvm_vcpu *vcpu)
static inline bool guest_cpuid_has_smep(struct kvm_vcpu *vcpu)
{
- struct kvm_cpuid_entry2 *best;
+ struct kvm_cpuid_entry *best;
best = kvm_find_cpuid_entry(vcpu, 7, 0);
return best && (best->ebx & bit(X86_FEATURE_SMEP));
@@ -66,7 +72,7 @@ static inline bool guest_cpuid_has_smep(struct kvm_vcpu *vcpu)
static inline bool guest_cpuid_has_smap(struct kvm_vcpu *vcpu)
{
- struct kvm_cpuid_entry2 *best;
+ struct kvm_cpuid_entry *best;
best = kvm_find_cpuid_entry(vcpu, 7, 0);
return best && (best->ebx & bit(X86_FEATURE_SMAP));
@@ -74,7 +80,7 @@ static inline bool guest_cpuid_has_smap(struct kvm_vcpu *vcpu)
static inline bool guest_cpuid_has_fsgsbase(struct kvm_vcpu *vcpu)
{
- struct kvm_cpuid_entry2 *best;
+ struct kvm_cpuid_entry *best;
best = kvm_find_cpuid_entry(vcpu, 7, 0);
return best && (best->ebx & bit(X86_FEATURE_FSGSBASE));
@@ -82,7 +88,7 @@ static inline bool guest_cpuid_has_fsgsbase(struct kvm_vcpu *vcpu)
static inline bool guest_cpuid_has_pku(struct kvm_vcpu *vcpu)
{
- struct kvm_cpuid_entry2 *best;
+ struct kvm_cpuid_entry *best;
best = kvm_find_cpuid_entry(vcpu, 7, 0);
return best && (best->ecx & bit(X86_FEATURE_PKU));
@@ -90,7 +96,7 @@ static inline bool guest_cpuid_has_pku(struct kvm_vcpu *vcpu)
static inline bool guest_cpuid_has_longmode(struct kvm_vcpu *vcpu)
{
- struct kvm_cpuid_entry2 *best;
+ struct kvm_cpuid_entry *best;
best = kvm_find_cpuid_entry(vcpu, 0x80000001, 0);
return best && (best->edx & bit(X86_FEATURE_LM));
@@ -98,7 +104,7 @@ static inline bool guest_cpuid_has_longmode(struct kvm_vcpu *vcpu)
static inline bool guest_cpuid_has_osvw(struct kvm_vcpu *vcpu)
{
- struct kvm_cpuid_entry2 *best;
+ struct kvm_cpuid_entry *best;
best = kvm_find_cpuid_entry(vcpu, 0x80000001, 0);
return best && (best->ecx & bit(X86_FEATURE_OSVW));
@@ -106,7 +112,7 @@ static inline bool guest_cpuid_has_osvw(struct kvm_vcpu *vcpu)
static inline bool guest_cpuid_has_pcid(struct kvm_vcpu *vcpu)
{
- struct kvm_cpuid_entry2 *best;
+ struct kvm_cpuid_entry *best;
best = kvm_find_cpuid_entry(vcpu, 1, 0);
return best && (best->ecx & bit(X86_FEATURE_PCID));
@@ -114,7 +120,7 @@ static inline bool guest_cpuid_has_pcid(struct kvm_vcpu *vcpu)
static inline bool guest_cpuid_has_x2apic(struct kvm_vcpu *vcpu)
{
- struct kvm_cpuid_entry2 *best;
+ struct kvm_cpuid_entry *best;
best = kvm_find_cpuid_entry(vcpu, 1, 0);
return best && (best->ecx & bit(X86_FEATURE_X2APIC));
@@ -122,7 +128,7 @@ static inline bool guest_cpuid_has_x2apic(struct kvm_vcpu *vcpu)
static inline bool guest_cpuid_is_amd(struct kvm_vcpu *vcpu)
{
- struct kvm_cpuid_entry2 *best;
+ struct kvm_cpuid_entry *best;
best = kvm_find_cpuid_entry(vcpu, 0, 0);
return best && best->ebx == X86EMUL_CPUID_VENDOR_AuthenticAMD_ebx;
@@ -130,7 +136,7 @@ static inline bool guest_cpuid_is_amd(struct kvm_vcpu *vcpu)
static inline bool guest_cpuid_has_gbpages(struct kvm_vcpu *vcpu)
{
- struct kvm_cpuid_entry2 *best;
+ struct kvm_cpuid_entry *best;
best = kvm_find_cpuid_entry(vcpu, 0x80000001, 0);
return best && (best->edx & bit(X86_FEATURE_GBPAGES));
@@ -138,7 +144,7 @@ static inline bool guest_cpuid_has_gbpages(struct kvm_vcpu *vcpu)
static inline bool guest_cpuid_has_rtm(struct kvm_vcpu *vcpu)
{
- struct kvm_cpuid_entry2 *best;
+ struct kvm_cpuid_entry *best;
best = kvm_find_cpuid_entry(vcpu, 7, 0);
return best && (best->ebx & bit(X86_FEATURE_RTM));
@@ -146,7 +152,7 @@ static inline bool guest_cpuid_has_rtm(struct kvm_vcpu *vcpu)
static inline bool guest_cpuid_has_rdtscp(struct kvm_vcpu *vcpu)
{
- struct kvm_cpuid_entry2 *best;
+ struct kvm_cpuid_entry *best;
best = kvm_find_cpuid_entry(vcpu, 0x80000001, 0);
return best && (best->edx & bit(X86_FEATURE_RDTSCP));
@@ -159,7 +165,7 @@ static inline bool guest_cpuid_has_rdtscp(struct kvm_vcpu *vcpu)
static inline bool guest_cpuid_has_nrips(struct kvm_vcpu *vcpu)
{
- struct kvm_cpuid_entry2 *best;
+ struct kvm_cpuid_entry *best;
best = kvm_find_cpuid_entry(vcpu, 0x8000000a, 0);
@@ -174,7 +180,7 @@ static inline bool guest_cpuid_has_nrips(struct kvm_vcpu *vcpu)
static inline int guest_cpuid_family(struct kvm_vcpu *vcpu)
{
- struct kvm_cpuid_entry2 *best;
+ struct kvm_cpuid_entry *best;
best = kvm_find_cpuid_entry(vcpu, 0x1, 0);
if (!best)
@@ -185,7 +191,7 @@ static inline int guest_cpuid_family(struct kvm_vcpu *vcpu)
static inline int guest_cpuid_model(struct kvm_vcpu *vcpu)
{
- struct kvm_cpuid_entry2 *best;
+ struct kvm_cpuid_entry *best;
best = kvm_find_cpuid_entry(vcpu, 0x1, 0);
if (!best)
@@ -196,7 +202,7 @@ static inline int guest_cpuid_model(struct kvm_vcpu *vcpu)
static inline int guest_cpuid_stepping(struct kvm_vcpu *vcpu)
{
- struct kvm_cpuid_entry2 *best;
+ struct kvm_cpuid_entry *best;
best = kvm_find_cpuid_entry(vcpu, 0x1, 0);
if (!best)
diff --git a/arch/x86/kvm/debugfs.c b/arch/x86/kvm/debugfs.c
deleted file mode 100644
index c19c7ed..0000000
--- a/arch/x86/kvm/debugfs.c
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
- * Kernel-based Virtual Machine driver for Linux
- *
- * Copyright 2016 Red Hat, Inc. and/or its affiliates.
- *
- * This work is licensed under the terms of the GNU GPL, version 2. See
- * the COPYING file in the top-level directory.
- *
- */
-#include <linux/kvm_host.h>
-#include <linux/debugfs.h>
-
-bool kvm_arch_has_vcpu_debugfs(void)
-{
- return true;
-}
-
-static int vcpu_get_tsc_offset(void *data, u64 *val)
-{
- struct kvm_vcpu *vcpu = (struct kvm_vcpu *) data;
- *val = vcpu->arch.tsc_offset;
- return 0;
-}
-
-DEFINE_SIMPLE_ATTRIBUTE(vcpu_tsc_offset_fops, vcpu_get_tsc_offset, NULL, "%lld\n");
-
-static int vcpu_get_tsc_scaling_ratio(void *data, u64 *val)
-{
- struct kvm_vcpu *vcpu = (struct kvm_vcpu *) data;
- *val = vcpu->arch.tsc_scaling_ratio;
- return 0;
-}
-
-DEFINE_SIMPLE_ATTRIBUTE(vcpu_tsc_scaling_fops, vcpu_get_tsc_scaling_ratio, NULL, "%llu\n");
-
-static int vcpu_get_tsc_scaling_frac_bits(void *data, u64 *val)
-{
- *val = kvm_tsc_scaling_ratio_frac_bits;
- return 0;
-}
-
-DEFINE_SIMPLE_ATTRIBUTE(vcpu_tsc_scaling_frac_fops, vcpu_get_tsc_scaling_frac_bits, NULL, "%llu\n");
-
-int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
-{
- struct dentry *ret;
-
- ret = debugfs_create_file("tsc-offset", 0444,
- vcpu->debugfs_dentry,
- vcpu, &vcpu_tsc_offset_fops);
- if (!ret)
- return -ENOMEM;
-
- if (kvm_has_tsc_control) {
- ret = debugfs_create_file("tsc-scaling-ratio", 0444,
- vcpu->debugfs_dentry,
- vcpu, &vcpu_tsc_scaling_fops);
- if (!ret)
- return -ENOMEM;
- ret = debugfs_create_file("tsc-scaling-ratio-frac-bits", 0444,
- vcpu->debugfs_dentry,
- vcpu, &vcpu_tsc_scaling_frac_fops);
- if (!ret)
- return -ENOMEM;
-
- }
-
- return 0;
-}
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index a3ce9d2..6ae4ce5 100644..100755
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -10,6 +10,7 @@
*
* Copyright (C) 2006 Qumranet
* Copyright 2010 Red Hat, Inc. and/or its affiliates.
+ * Copyright 2019 Google LLC
*
* Avi Kivity <avi@qumranet.com>
* Yaniv Kamay <yaniv@qumranet.com>
@@ -23,12 +24,15 @@
#include <linux/kvm_host.h>
#include "kvm_cache_regs.h"
#include <asm/kvm_emulate.h>
-#include <linux/stringify.h>
-#include <asm/debugreg.h>
+#include <uapi/asm/debugreg.h>
#include "x86.h"
#include "tss.h"
+#include <gvm_types.h>
+#include <uapi/asm/processor-flags.h>
+#include <asm/cpufeatures.h>
+
/*
* Operand types
*/
@@ -174,14 +178,14 @@
#define DstXacc (DstAccLo | SrcAccHi | SrcWrite)
-#define X2(x...) x, x
-#define X3(x...) X2(x), x
-#define X4(x...) X2(x), X2(x)
-#define X5(x...) X4(x), x
-#define X6(x...) X4(x), X2(x)
-#define X7(x...) X4(x), X3(x)
-#define X8(x...) X4(x), X4(x)
-#define X16(x...) X8(x), X8(x)
+#define X2(x,...) x, x
+#define X3(x,...) X2(x), x
+#define X4(x,...) X2(x), X2(x)
+#define X5(x,...) X4(x), x
+#define X6(x,...) X4(x), X2(x)
+#define X7(x,...) X4(x), X3(x)
+#define X8(x,...) X4(x), X4(x)
+#define X16(x,...) X8(x), X8(x)
#define NR_FASTOP (ilog2(sizeof(ulong)) + 1)
#define FASTOP_SIZE 8
@@ -281,7 +285,7 @@ static ulong *reg_rmw(struct x86_emulate_ctxt *ctxt, unsigned nr)
static void writeback_registers(struct x86_emulate_ctxt *ctxt)
{
- unsigned reg;
+ unsigned reg = 0;
for_each_set_bit(reg, (ulong *)&ctxt->regs_dirty, 16)
ctxt->ops->write_gpr(ctxt, reg, ctxt->_regs[reg]);
@@ -308,32 +312,23 @@ static void invalidate_registers(struct x86_emulate_ctxt *ctxt)
static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *));
-#define FOP_FUNC(name) \
- ".align " __stringify(FASTOP_SIZE) " \n\t" \
- ".type " name ", @function \n\t" \
- name ":\n\t"
+#define FOP_FUNC(name)
-#define FOP_RET "ret \n\t"
+#define FOP_RET
#define FOP_START(op) \
- extern void em_##op(struct fastop *fake); \
- asm(".pushsection .text, \"ax\" \n\t" \
- ".global em_" #op " \n\t" \
- FOP_FUNC("em_" #op)
+ extern void em_##op(struct fastop *fake);
-#define FOP_END \
- ".popsection")
+#define FOP_END
#define FOPNOP() \
- FOP_FUNC(__stringify(__UNIQUE_ID(nop))) \
- FOP_RET
+ FOP_FUNC(__stringify(__UNIQUE_ID(nop)))
#define FOP1E(op, dst) \
- FOP_FUNC(#op "_" #dst) \
- "10: " #op " %" #dst " \n\t" FOP_RET
+ FOP_FUNC(#op "_" #dst)
#define FOP1EEX(op, dst) \
- FOP1E(op, dst) _ASM_EXTABLE(10b, kvm_fastop_exception)
+ FOP1E(op, dst)
#define FASTOP1(op) \
FOP_START(op) \
@@ -362,8 +357,7 @@ static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *));
FOP_END
#define FOP2E(op, dst, src) \
- FOP_FUNC(#op "_" #dst "_" #src) \
- #op " %" #src ", %" #dst " \n\t" FOP_RET
+ FOP_FUNC(#op "_" #dst "_" #src)
#define FASTOP2(op) \
FOP_START(op) \
@@ -401,8 +395,7 @@ static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *));
FOP_END
#define FOP3E(op, dst, src, src2) \
- FOP_FUNC(#op "_" #dst "_" #src "_" #src2) \
- #op " %" #src2 ", %" #src ", %" #dst " \n\t" FOP_RET
+ FOP_FUNC(#op "_" #dst "_" #src "_" #src2)
/* 3-operand, word-only, src2=cl */
#define FASTOP3WCL(op) \
@@ -414,15 +407,7 @@ static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *));
FOP_END
/* Special case for SETcc - 1 instruction per cc */
-#define FOP_SETCC(op) \
- ".align 4 \n\t" \
- ".type " #op ", @function \n\t" \
- #op ": \n\t" \
- #op " %al \n\t" \
- FOP_RET
-
-asm(".global kvm_fastop_exception \n"
- "kvm_fastop_exception: xor %esi, %esi; ret");
+#define FOP_SETCC(op)
FOP_START(setcc)
FOP_SETCC(seto)
@@ -443,7 +428,7 @@ FOP_SETCC(setle)
FOP_SETCC(setnle)
FOP_END;
-FOP_START(salc) "pushf; sbb %al, %al; popf \n\t" FOP_RET
+FOP_START(salc)
FOP_END;
static int emulator_check_intercept(struct x86_emulate_ctxt *ctxt,
@@ -472,7 +457,7 @@ static void assign_masked(ulong *dest, ulong src, ulong mask)
*dest = (*dest & ~mask) | (src & mask);
}
-static void assign_register(unsigned long *reg, u64 val, int bytes)
+static void assign_register(size_t *reg, u64 val, int bytes)
{
/* The 4-byte case *is* correct: in 64-bit mode we zero-extend. */
switch (bytes) {
@@ -491,9 +476,9 @@ static void assign_register(unsigned long *reg, u64 val, int bytes)
}
}
-static inline unsigned long ad_mask(struct x86_emulate_ctxt *ctxt)
+static inline size_t ad_mask(struct x86_emulate_ctxt *ctxt)
{
- return (1UL << (ctxt->ad_bytes << 3)) - 1;
+ return (1ULL << (ctxt->ad_bytes << 3)) - 1;
}
static ulong stack_mask(struct x86_emulate_ctxt *ctxt)
@@ -513,16 +498,16 @@ static int stack_size(struct x86_emulate_ctxt *ctxt)
}
/* Access/update address held in a register, based on addressing mode. */
-static inline unsigned long
-address_mask(struct x86_emulate_ctxt *ctxt, unsigned long reg)
+static inline size_t
+address_mask(struct x86_emulate_ctxt *ctxt, size_t reg)
{
- if (ctxt->ad_bytes == sizeof(unsigned long))
+ if (ctxt->ad_bytes == sizeof(size_t))
return reg;
else
return reg & ad_mask(ctxt);
}
-static inline unsigned long
+static inline size_t
register_address(struct x86_emulate_ctxt *ctxt, int reg)
{
return address_mask(ctxt, reg_read(ctxt, reg));
@@ -553,7 +538,7 @@ static u32 desc_limit_scaled(struct desc_struct *desc)
return desc->g ? (limit << 12) | 0xfff : limit;
}
-static unsigned long seg_base(struct x86_emulate_ctxt *ctxt, int seg)
+static size_t seg_base(struct x86_emulate_ctxt *ctxt, int seg)
{
if (ctxt->mode == X86EMUL_MODE_PROT64 && seg < VCPU_SREG_FS)
return 0;
@@ -733,8 +718,8 @@ static inline int assign_eip(struct x86_emulate_ctxt *ctxt, ulong dst,
struct segmented_address addr = { .seg = VCPU_SREG_CS,
.ea = dst };
- if (ctxt->op_bytes != sizeof(unsigned long))
- addr.ea = dst & ((1UL << (ctxt->op_bytes << 3)) - 1);
+ if (ctxt->op_bytes != sizeof(size_t))
+ addr.ea = dst & ((1ULL << (ctxt->op_bytes << 3)) - 1);
rc = __linearize(ctxt, addr, &max_size, 1, false, true, mode, &linear);
if (rc == X86EMUL_CONTINUE)
ctxt->_eip = addr.ea;
@@ -799,7 +784,7 @@ static int __do_insn_fetch_bytes(struct x86_emulate_ctxt *ctxt, int op_size)
{
int rc;
unsigned size, max_size;
- unsigned long linear;
+ size_t linear;
int cur_size = ctxt->fetch.end - ctxt->fetch.data;
struct segmented_address addr = { .seg = VCPU_SREG_CS,
.ea = ctxt->eip + cur_size };
@@ -851,27 +836,53 @@ static __always_inline int do_insn_fetch_bytes(struct x86_emulate_ctxt *ctxt,
}
/* Fetch next part of the instruction being emulated. */
-#define insn_fetch(_type, _ctxt) \
-({ _type _x; \
- \
- rc = do_insn_fetch_bytes(_ctxt, sizeof(_type)); \
- if (rc != X86EMUL_CONTINUE) \
- goto done; \
- ctxt->_eip += sizeof(_type); \
- _x = *(_type __aligned(1) *) ctxt->fetch.ptr; \
- ctxt->fetch.ptr += sizeof(_type); \
- _x; \
-})
-
-#define insn_fetch_arr(_arr, _size, _ctxt) \
-({ \
- rc = do_insn_fetch_bytes(_ctxt, _size); \
- if (rc != X86EMUL_CONTINUE) \
- goto done; \
- ctxt->_eip += (_size); \
- memcpy(_arr, ctxt->fetch.ptr, _size); \
- ctxt->fetch.ptr += (_size); \
-})
+#define __insn_fetch_type(_type) \
+static __always_inline int \
+ __insn_fetch_##_type(struct x86_emulate_ctxt *ctxt, _type *_x) \
+{ \
+ int rc; \
+ rc = do_insn_fetch_bytes(ctxt, sizeof(_type)); \
+ if (rc == X86EMUL_CONTINUE) { \
+ ctxt->_eip += sizeof(_type); \
+ *_x = *(_type *) ctxt->fetch.ptr; \
+ ctxt->fetch.ptr += sizeof(_type); \
+ } \
+ return rc; \
+}
+
+__insn_fetch_type(u8)
+__insn_fetch_type(s8)
+__insn_fetch_type(u16)
+__insn_fetch_type(s16)
+__insn_fetch_type(u32)
+__insn_fetch_type(s32)
+__insn_fetch_type(u64)
+__insn_fetch_type(s64)
+
+#define insn_fetch(_type, _ctxt, _data) __insn_fetch_##_type(_ctxt, &(_type)_data)
+
+#define insn_fetch_modrmea(_type, _ctxt) \
+ do { \
+ _type __temp; \
+ rc = insn_fetch(_type, _ctxt, __temp); \
+ if (rc != X86EMUL_CONTINUE) \
+ goto done; \
+ modrm_ea += __temp; \
+ } while (0)
+
+
+static __always_inline int insn_fetch_arr(char *_arr,
+ unsigned int _size, struct x86_emulate_ctxt *_ctxt)
+{
+ int rc;
+ rc = do_insn_fetch_bytes(_ctxt, _size);
+ if (rc == X86EMUL_CONTINUE) {
+ _ctxt->_eip += (_size);
+ memcpy(_arr, _ctxt->fetch.ptr, _size);
+ _ctxt->fetch.ptr += (_size);
+ }
+ return rc;
+}
/*
* Given the 'reg' portion of a ModRM byte, and a register block, return a
@@ -893,7 +904,7 @@ static void *decode_register(struct x86_emulate_ctxt *ctxt, u8 modrm_reg,
static int read_descriptor(struct x86_emulate_ctxt *ctxt,
struct segmented_address addr,
- u16 *size, unsigned long *address, int op_bytes)
+ u16 *size, size_t *address, int op_bytes)
{
int rc;
@@ -968,14 +979,14 @@ static int em_bsr_c(struct x86_emulate_ctxt *ctxt)
return fastop(ctxt, em_bsr);
}
-static __always_inline u8 test_cc(unsigned int condition, unsigned long flags)
+extern u8 __asm_test_cc(void *fop, size_t flags);
+static __always_inline u8 test_cc(unsigned int condition, size_t flags)
{
- u8 rc;
- void (*fop)(void) = (void *)em_setcc + 4 * (condition & 0xf);
+ u8 rc = 0;
+ void(*fop)(void) = (void(*)(void))((char *)em_setcc + 4 * (condition & 0xf));
flags = (flags & EFLAGS_MASK) | X86_EFLAGS_IF;
- asm("push %[flags]; popf; call *%[fastop]"
- : "=a"(rc) : [fastop]"r"(fop), [flags]"r"(flags));
+ rc = __asm_test_cc(fop, flags);
return rc;
}
@@ -997,55 +1008,80 @@ static void fetch_register_operand(struct operand *op)
}
}
+#define DECLARE_XMM(n) \
+extern __asm_save_xmm##n(sse128_t *data); \
+extern __asm_store_xmm##n(sse128_t *data);
+
+DECLARE_XMM(0)
+DECLARE_XMM(1)
+DECLARE_XMM(2)
+DECLARE_XMM(3)
+DECLARE_XMM(4)
+DECLARE_XMM(5)
+DECLARE_XMM(6)
+DECLARE_XMM(7)
+DECLARE_XMM(8)
+DECLARE_XMM(9)
+DECLARE_XMM(10)
+DECLARE_XMM(11)
+DECLARE_XMM(12)
+DECLARE_XMM(13)
+DECLARE_XMM(14)
+DECLARE_XMM(15)
+
+#define SAVE_XMM(n) \
+case n: __asm_save_xmm##n(data); break;
static void read_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data, int reg)
{
ctxt->ops->get_fpu(ctxt);
switch (reg) {
- case 0: asm("movdqa %%xmm0, %0" : "=m"(*data)); break;
- case 1: asm("movdqa %%xmm1, %0" : "=m"(*data)); break;
- case 2: asm("movdqa %%xmm2, %0" : "=m"(*data)); break;
- case 3: asm("movdqa %%xmm3, %0" : "=m"(*data)); break;
- case 4: asm("movdqa %%xmm4, %0" : "=m"(*data)); break;
- case 5: asm("movdqa %%xmm5, %0" : "=m"(*data)); break;
- case 6: asm("movdqa %%xmm6, %0" : "=m"(*data)); break;
- case 7: asm("movdqa %%xmm7, %0" : "=m"(*data)); break;
+ SAVE_XMM(0)
+ SAVE_XMM(1)
+ SAVE_XMM(2)
+ SAVE_XMM(3)
+ SAVE_XMM(4)
+ SAVE_XMM(5)
+ SAVE_XMM(6)
+ SAVE_XMM(7)
#ifdef CONFIG_X86_64
- case 8: asm("movdqa %%xmm8, %0" : "=m"(*data)); break;
- case 9: asm("movdqa %%xmm9, %0" : "=m"(*data)); break;
- case 10: asm("movdqa %%xmm10, %0" : "=m"(*data)); break;
- case 11: asm("movdqa %%xmm11, %0" : "=m"(*data)); break;
- case 12: asm("movdqa %%xmm12, %0" : "=m"(*data)); break;
- case 13: asm("movdqa %%xmm13, %0" : "=m"(*data)); break;
- case 14: asm("movdqa %%xmm14, %0" : "=m"(*data)); break;
- case 15: asm("movdqa %%xmm15, %0" : "=m"(*data)); break;
+ SAVE_XMM(8)
+ SAVE_XMM(9)
+ SAVE_XMM(10)
+ SAVE_XMM(11)
+ SAVE_XMM(12)
+ SAVE_XMM(13)
+ SAVE_XMM(14)
+ SAVE_XMM(15)
#endif
default: BUG();
}
ctxt->ops->put_fpu(ctxt);
}
+#define STORE_XMM(n) \
+case n: __asm_store_xmm##n(data); break;
static void write_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data,
int reg)
{
ctxt->ops->get_fpu(ctxt);
switch (reg) {
- case 0: asm("movdqa %0, %%xmm0" : : "m"(*data)); break;
- case 1: asm("movdqa %0, %%xmm1" : : "m"(*data)); break;
- case 2: asm("movdqa %0, %%xmm2" : : "m"(*data)); break;
- case 3: asm("movdqa %0, %%xmm3" : : "m"(*data)); break;
- case 4: asm("movdqa %0, %%xmm4" : : "m"(*data)); break;
- case 5: asm("movdqa %0, %%xmm5" : : "m"(*data)); break;
- case 6: asm("movdqa %0, %%xmm6" : : "m"(*data)); break;
- case 7: asm("movdqa %0, %%xmm7" : : "m"(*data)); break;
+ STORE_XMM(0)
+ STORE_XMM(1)
+ STORE_XMM(2)
+ STORE_XMM(3)
+ STORE_XMM(4)
+ STORE_XMM(5)
+ STORE_XMM(6)
+ STORE_XMM(7)
#ifdef CONFIG_X86_64
- case 8: asm("movdqa %0, %%xmm8" : : "m"(*data)); break;
- case 9: asm("movdqa %0, %%xmm9" : : "m"(*data)); break;
- case 10: asm("movdqa %0, %%xmm10" : : "m"(*data)); break;
- case 11: asm("movdqa %0, %%xmm11" : : "m"(*data)); break;
- case 12: asm("movdqa %0, %%xmm12" : : "m"(*data)); break;
- case 13: asm("movdqa %0, %%xmm13" : : "m"(*data)); break;
- case 14: asm("movdqa %0, %%xmm14" : : "m"(*data)); break;
- case 15: asm("movdqa %0, %%xmm15" : : "m"(*data)); break;
+ STORE_XMM(8)
+ STORE_XMM(9)
+ STORE_XMM(10)
+ STORE_XMM(11)
+ STORE_XMM(12)
+ STORE_XMM(13)
+ STORE_XMM(14)
+ STORE_XMM(15)
#endif
default: BUG();
}
@@ -1056,14 +1092,14 @@ static void read_mmx_reg(struct x86_emulate_ctxt *ctxt, u64 *data, int reg)
{
ctxt->ops->get_fpu(ctxt);
switch (reg) {
- case 0: asm("movq %%mm0, %0" : "=m"(*data)); break;
- case 1: asm("movq %%mm1, %0" : "=m"(*data)); break;
- case 2: asm("movq %%mm2, %0" : "=m"(*data)); break;
- case 3: asm("movq %%mm3, %0" : "=m"(*data)); break;
- case 4: asm("movq %%mm4, %0" : "=m"(*data)); break;
- case 5: asm("movq %%mm5, %0" : "=m"(*data)); break;
- case 6: asm("movq %%mm6, %0" : "=m"(*data)); break;
- case 7: asm("movq %%mm7, %0" : "=m"(*data)); break;
+ case 0: __asm_save_mm0(data); break;
+ case 1: __asm_save_mm1(data); break;
+ case 2: __asm_save_mm2(data); break;
+ case 3: __asm_save_mm3(data); break;
+ case 4: __asm_save_mm4(data); break;
+ case 5: __asm_save_mm5(data); break;
+ case 6: __asm_save_mm6(data); break;
+ case 7: __asm_save_mm7(data); break;
default: BUG();
}
ctxt->ops->put_fpu(ctxt);
@@ -1073,14 +1109,14 @@ static void write_mmx_reg(struct x86_emulate_ctxt *ctxt, u64 *data, int reg)
{
ctxt->ops->get_fpu(ctxt);
switch (reg) {
- case 0: asm("movq %0, %%mm0" : : "m"(*data)); break;
- case 1: asm("movq %0, %%mm1" : : "m"(*data)); break;
- case 2: asm("movq %0, %%mm2" : : "m"(*data)); break;
- case 3: asm("movq %0, %%mm3" : : "m"(*data)); break;
- case 4: asm("movq %0, %%mm4" : : "m"(*data)); break;
- case 5: asm("movq %0, %%mm5" : : "m"(*data)); break;
- case 6: asm("movq %0, %%mm6" : : "m"(*data)); break;
- case 7: asm("movq %0, %%mm7" : : "m"(*data)); break;
+ case 0: __asm_store_mm0(data); break;
+ case 1: __asm_store_mm1(data); break;
+ case 2: __asm_store_mm2(data); break;
+ case 3: __asm_store_mm3(data); break;
+ case 4: __asm_store_mm4(data); break;
+ case 5: __asm_store_mm5(data); break;
+ case 6: __asm_store_mm6(data); break;
+ case 7: __asm_store_mm7(data); break;
default: BUG();
}
ctxt->ops->put_fpu(ctxt);
@@ -1092,20 +1128,20 @@ static int em_fninit(struct x86_emulate_ctxt *ctxt)
return emulate_nm(ctxt);
ctxt->ops->get_fpu(ctxt);
- asm volatile("fninit");
+ __fninit();
ctxt->ops->put_fpu(ctxt);
return X86EMUL_CONTINUE;
}
static int em_fnstcw(struct x86_emulate_ctxt *ctxt)
{
- u16 fcw;
+ u16 fcw = 0;
if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM))
return emulate_nm(ctxt);
ctxt->ops->get_fpu(ctxt);
- asm volatile("fnstcw %0": "+m"(fcw));
+ __fnstcw(&fcw);
ctxt->ops->put_fpu(ctxt);
ctxt->dst.val = fcw;
@@ -1115,13 +1151,13 @@ static int em_fnstcw(struct x86_emulate_ctxt *ctxt)
static int em_fnstsw(struct x86_emulate_ctxt *ctxt)
{
- u16 fsw;
+ u16 fsw = 0;
if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM))
return emulate_nm(ctxt);
ctxt->ops->get_fpu(ctxt);
- asm volatile("fnstsw %0": "+m"(fsw));
+ __fnstsw(&fsw);
ctxt->ops->put_fpu(ctxt);
ctxt->dst.val = fsw;
@@ -1217,13 +1253,13 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt,
switch (ctxt->modrm_mod) {
case 0:
if (ctxt->modrm_rm == 6)
- modrm_ea += insn_fetch(u16, ctxt);
+ insn_fetch_modrmea(u16, ctxt);
break;
case 1:
- modrm_ea += insn_fetch(s8, ctxt);
+ insn_fetch_modrmea(s8, ctxt);
break;
case 2:
- modrm_ea += insn_fetch(u16, ctxt);
+ insn_fetch_modrmea(u16, ctxt);
break;
}
switch (ctxt->modrm_rm) {
@@ -1260,13 +1296,15 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt,
} else {
/* 32/64-bit ModR/M decode. */
if ((ctxt->modrm_rm & 7) == 4) {
- sib = insn_fetch(u8, ctxt);
+ rc = insn_fetch(u8, ctxt, sib);
+ if (rc != X86EMUL_CONTINUE)
+ goto done;
index_reg |= (sib >> 3) & 7;
base_reg |= sib & 7;
scale = sib >> 6;
if ((base_reg & 7) == 5 && ctxt->modrm_mod == 0)
- modrm_ea += insn_fetch(s32, ctxt);
+ insn_fetch_modrmea(s32, ctxt);
else {
modrm_ea += reg_read(ctxt, base_reg);
adjust_modrm_seg(ctxt, base_reg);
@@ -1278,7 +1316,7 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt,
if (index_reg != 4)
modrm_ea += reg_read(ctxt, index_reg) << scale;
} else if ((ctxt->modrm_rm & 7) == 5 && ctxt->modrm_mod == 0) {
- modrm_ea += insn_fetch(s32, ctxt);
+ insn_fetch_modrmea(s32, ctxt);
if (ctxt->mode == X86EMUL_MODE_PROT64)
ctxt->rip_relative = 1;
} else {
@@ -1288,10 +1326,10 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt,
}
switch (ctxt->modrm_mod) {
case 1:
- modrm_ea += insn_fetch(s8, ctxt);
+ insn_fetch_modrmea(s8, ctxt);
break;
case 2:
- modrm_ea += insn_fetch(s32, ctxt);
+ insn_fetch_modrmea(s32, ctxt);
break;
}
}
@@ -1311,13 +1349,19 @@ static int decode_abs(struct x86_emulate_ctxt *ctxt,
op->type = OP_MEM;
switch (ctxt->ad_bytes) {
case 2:
- op->addr.mem.ea = insn_fetch(u16, ctxt);
+ rc = insn_fetch(u16, ctxt, op->addr.mem.ea);
+ if (rc != X86EMUL_CONTINUE)
+ goto done;
break;
case 4:
- op->addr.mem.ea = insn_fetch(u32, ctxt);
+ rc = insn_fetch(u32, ctxt, op->addr.mem.ea);
+ if (rc != X86EMUL_CONTINUE)
+ goto done;
break;
case 8:
- op->addr.mem.ea = insn_fetch(u64, ctxt);
+ rc = insn_fetch(u64, ctxt, op->addr.mem.ea);
+ if (rc != X86EMUL_CONTINUE)
+ goto done;
break;
}
done:
@@ -1347,7 +1391,7 @@ static void fetch_bit_operand(struct x86_emulate_ctxt *ctxt)
}
static int read_emulated(struct x86_emulate_ctxt *ctxt,
- unsigned long addr, void *dest, unsigned size)
+ size_t addr, void *dest, unsigned size)
{
int rc;
struct read_cache *mc = &ctxt->mem_read;
@@ -1716,7 +1760,7 @@ static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
static void write_register_operand(struct operand *op)
{
- return assign_register(op->addr.reg, op->val, op->bytes);
+ assign_register(op->addr.reg, op->val, op->bytes);
}
static int writeback(struct x86_emulate_ctxt *ctxt, struct operand *op)
@@ -1802,7 +1846,7 @@ static int emulate_popf(struct x86_emulate_ctxt *ctxt,
void *dest, int len)
{
int rc;
- unsigned long val, change_mask;
+ size_t val, change_mask;
int iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> X86_EFLAGS_IOPL_BIT;
int cpl = ctxt->ops->cpl(ctxt);
@@ -1834,7 +1878,7 @@ static int emulate_popf(struct x86_emulate_ctxt *ctxt,
break;
}
- *(unsigned long *)dest =
+ *(size_t *)dest =
(ctxt->eflags & ~change_mask) | (val & change_mask);
return rc;
@@ -1893,7 +1937,7 @@ static int em_push_sreg(struct x86_emulate_ctxt *ctxt)
static int em_pop_sreg(struct x86_emulate_ctxt *ctxt)
{
int seg = ctxt->src2.val;
- unsigned long selector;
+ size_t selector;
int rc;
rc = emulate_pop(ctxt, &selector, 2);
@@ -1901,7 +1945,7 @@ static int em_pop_sreg(struct x86_emulate_ctxt *ctxt)
return rc;
if (ctxt->modrm_reg == VCPU_SREG_SS)
- ctxt->interruptibility = KVM_X86_SHADOW_INT_MOV_SS;
+ ctxt->interruptibility = GVM_X86_SHADOW_INT_MOV_SS;
if (ctxt->op_bytes > 2)
rsp_increment(ctxt, ctxt->op_bytes - 2);
@@ -1911,7 +1955,7 @@ static int em_pop_sreg(struct x86_emulate_ctxt *ctxt)
static int em_pusha(struct x86_emulate_ctxt *ctxt)
{
- unsigned long old_esp = reg_read(ctxt, VCPU_REGS_RSP);
+ size_t old_esp = reg_read(ctxt, VCPU_REGS_RSP);
int rc = X86EMUL_CONTINUE;
int reg = VCPU_REGS_RAX;
@@ -1931,7 +1975,7 @@ static int em_pusha(struct x86_emulate_ctxt *ctxt)
static int em_pushf(struct x86_emulate_ctxt *ctxt)
{
- ctxt->src.val = (unsigned long)ctxt->eflags & ~X86_EFLAGS_VM;
+ ctxt->src.val = (size_t)ctxt->eflags & ~X86_EFLAGS_VM;
return em_push(ctxt);
}
@@ -2034,16 +2078,16 @@ static int emulate_int(struct x86_emulate_ctxt *ctxt, int irq)
static int emulate_iret_real(struct x86_emulate_ctxt *ctxt)
{
int rc = X86EMUL_CONTINUE;
- unsigned long temp_eip = 0;
- unsigned long temp_eflags = 0;
- unsigned long cs = 0;
- unsigned long mask = X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF |
+ size_t temp_eip = 0;
+ size_t temp_eflags = 0;
+ size_t cs = 0;
+ size_t mask = X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF |
X86_EFLAGS_ZF | X86_EFLAGS_SF | X86_EFLAGS_TF |
X86_EFLAGS_IF | X86_EFLAGS_DF | X86_EFLAGS_OF |
X86_EFLAGS_IOPL | X86_EFLAGS_NT | X86_EFLAGS_RF |
X86_EFLAGS_AC | X86_EFLAGS_ID |
X86_EFLAGS_FIXED;
- unsigned long vm86_mask = X86_EFLAGS_VM | X86_EFLAGS_VIF |
+ size_t vm86_mask = X86_EFLAGS_VM | X86_EFLAGS_VIF |
X86_EFLAGS_VIP;
/* TODO: Add stack limit check */
@@ -2168,7 +2212,7 @@ static int em_cmpxchg8b(struct x86_emulate_ctxt *ctxt)
static int em_ret(struct x86_emulate_ctxt *ctxt)
{
int rc;
- unsigned long eip;
+ size_t eip;
rc = emulate_pop(ctxt, &eip, ctxt->op_bytes);
if (rc != X86EMUL_CONTINUE)
@@ -2180,7 +2224,7 @@ static int em_ret(struct x86_emulate_ctxt *ctxt)
static int em_ret_far(struct x86_emulate_ctxt *ctxt)
{
int rc;
- unsigned long eip, cs;
+ size_t eip, cs;
int cpl = ctxt->ops->cpl(ctxt);
struct desc_struct new_desc;
@@ -2267,15 +2311,26 @@ static int emulator_has_longmode(struct x86_emulate_ctxt *ctxt)
return edx & bit(X86_FEATURE_LM);
}
-#define GET_SMSTATE(type, smbase, offset) \
- ({ \
- type __val; \
- int r = ctxt->ops->read_phys(ctxt, smbase + offset, &__val, \
- sizeof(__val)); \
- if (r != X86EMUL_CONTINUE) \
- return X86EMUL_UNHANDLEABLE; \
- __val; \
- })
+#define GET_SMSTATE(type, smbase, offset, val) \
+do { \
+ type __val; \
+ int __r = ctxt->ops->read_phys(ctxt, smbase + offset, &__val,\
+ sizeof(__val)); \
+ if (__r != X86EMUL_CONTINUE) \
+ return X86EMUL_UNHANDLEABLE; \
+ val = __val; \
+} while(0)
+
+#define __GET_SMSTATE_TYPE(type, smbase, offset) \
+static __always_inline int __get_smstate_##type(size_t smbase, size_t offset, type *val) \
+{ \
+ type __val; \
+ int __r = ctxt->ops->read_phys(ctxt, smbase + offset, &__val, \
+ sizeof(__val)); \
+ if (__r == X86EMUL_CONTINUE) \
+ *val = __val; \
+ return r; \
+}
static void rsm_set_desc_flags(struct desc_struct *desc, u32 flags)
{
@@ -2294,17 +2349,21 @@ static int rsm_load_seg_32(struct x86_emulate_ctxt *ctxt, u64 smbase, int n)
struct desc_struct desc;
int offset;
u16 selector;
+ u32 temp;
- selector = GET_SMSTATE(u32, smbase, 0x7fa8 + n * 4);
+ GET_SMSTATE(int, smbase, 0x7fa8 + n * 4, selector);
if (n < 3)
offset = 0x7f84 + n * 12;
else
offset = 0x7f2c + (n - 3) * 12;
- set_desc_base(&desc, GET_SMSTATE(u32, smbase, offset + 8));
- set_desc_limit(&desc, GET_SMSTATE(u32, smbase, offset + 4));
- rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smbase, offset));
+ GET_SMSTATE(u32, smbase, offset + 8, temp);
+ set_desc_base(&desc, temp);
+ GET_SMSTATE(u32, smbase, offset + 4, temp);
+ set_desc_limit(&desc, temp);
+ GET_SMSTATE(u32, smbase, offset, temp);
+ rsm_set_desc_flags(&desc, temp);
ctxt->ops->set_segment(ctxt, selector, &desc, 0, n);
return X86EMUL_CONTINUE;
}
@@ -2313,16 +2372,19 @@ static int rsm_load_seg_64(struct x86_emulate_ctxt *ctxt, u64 smbase, int n)
{
struct desc_struct desc;
int offset;
- u16 selector;
- u32 base3;
+ u16 selector, temp16;
+ u32 base3, temp;
offset = 0x7e00 + n * 16;
- selector = GET_SMSTATE(u16, smbase, offset);
- rsm_set_desc_flags(&desc, GET_SMSTATE(u16, smbase, offset + 2) << 8);
- set_desc_limit(&desc, GET_SMSTATE(u32, smbase, offset + 4));
- set_desc_base(&desc, GET_SMSTATE(u32, smbase, offset + 8));
- base3 = GET_SMSTATE(u32, smbase, offset + 12);
+ GET_SMSTATE(u16, smbase, offset, selector);
+ GET_SMSTATE(u16, smbase, offset + 2, temp16);
+ rsm_set_desc_flags(&desc, temp16 << 8);
+ GET_SMSTATE(u32, smbase, offset + 4, temp);
+ set_desc_limit(&desc, temp);
+ GET_SMSTATE(u32, smbase, offset + 8, temp);
+ set_desc_base(&desc, temp);
+ GET_SMSTATE(u32, smbase, offset + 12, base3);
ctxt->ops->set_segment(ctxt, selector, &desc, base3, n);
return X86EMUL_CONTINUE;
@@ -2362,38 +2424,47 @@ static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt, u64 smbase)
u16 selector;
u32 val, cr0, cr4;
int i;
+ u32 temp;
- cr0 = GET_SMSTATE(u32, smbase, 0x7ffc);
- ctxt->ops->set_cr(ctxt, 3, GET_SMSTATE(u32, smbase, 0x7ff8));
- ctxt->eflags = GET_SMSTATE(u32, smbase, 0x7ff4) | X86_EFLAGS_FIXED;
- ctxt->_eip = GET_SMSTATE(u32, smbase, 0x7ff0);
+ GET_SMSTATE(u32, smbase, 0x7ffc, cr0);
+ GET_SMSTATE(u32, smbase, 0x7ff8, temp);
+ ctxt->ops->set_cr(ctxt, 3, temp);
+ GET_SMSTATE(u32, smbase, 0x7ff4, ctxt->eflags);
+ ctxt->eflags |= X86_EFLAGS_FIXED;
+ GET_SMSTATE(u32, smbase, 0x7ff0, ctxt->_eip);
for (i = 0; i < 8; i++)
- *reg_write(ctxt, i) = GET_SMSTATE(u32, smbase, 0x7fd0 + i * 4);
+ GET_SMSTATE(u32, smbase, 0x7fd0 + i * 4, *reg_write(ctxt, i));
- val = GET_SMSTATE(u32, smbase, 0x7fcc);
+ GET_SMSTATE(u32, smbase, 0x7fcc, val);
ctxt->ops->set_dr(ctxt, 6, (val & DR6_VOLATILE) | DR6_FIXED_1);
- val = GET_SMSTATE(u32, smbase, 0x7fc8);
+ GET_SMSTATE(u32, smbase, 0x7fc8, val);
ctxt->ops->set_dr(ctxt, 7, (val & DR7_VOLATILE) | DR7_FIXED_1);
- selector = GET_SMSTATE(u32, smbase, 0x7fc4);
- set_desc_base(&desc, GET_SMSTATE(u32, smbase, 0x7f64));
- set_desc_limit(&desc, GET_SMSTATE(u32, smbase, 0x7f60));
- rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smbase, 0x7f5c));
+ GET_SMSTATE(u32, smbase, 0x7fc4, selector);
+ GET_SMSTATE(u32, smbase, 0x7f64, temp);
+ set_desc_base(&desc, temp);
+ GET_SMSTATE(u32, smbase, 0x7f60, temp);
+ set_desc_limit(&desc, temp);
+ GET_SMSTATE(u32, smbase, 0x7f5c, temp);
+ rsm_set_desc_flags(&desc, temp);
ctxt->ops->set_segment(ctxt, selector, &desc, 0, VCPU_SREG_TR);
- selector = GET_SMSTATE(u32, smbase, 0x7fc0);
- set_desc_base(&desc, GET_SMSTATE(u32, smbase, 0x7f80));
- set_desc_limit(&desc, GET_SMSTATE(u32, smbase, 0x7f7c));
- rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smbase, 0x7f78));
+ GET_SMSTATE(u32, smbase, 0x7fc0, selector);
+ GET_SMSTATE(u32, smbase, 0x7f80, temp);
+ set_desc_base(&desc, temp);
+ GET_SMSTATE(u32, smbase, 0x7f7c, temp);
+ set_desc_limit(&desc, temp);
+ GET_SMSTATE(u32, smbase, 0x7f78, temp);
+ rsm_set_desc_flags(&desc, temp);
ctxt->ops->set_segment(ctxt, selector, &desc, 0, VCPU_SREG_LDTR);
- dt.address = GET_SMSTATE(u32, smbase, 0x7f74);
- dt.size = GET_SMSTATE(u32, smbase, 0x7f70);
+ GET_SMSTATE(u32, smbase, 0x7f74, dt.address);
+ GET_SMSTATE(u32, smbase, 0x7f70, dt.size);
ctxt->ops->set_gdt(ctxt, &dt);
- dt.address = GET_SMSTATE(u32, smbase, 0x7f58);
- dt.size = GET_SMSTATE(u32, smbase, 0x7f54);
+ GET_SMSTATE(u32, smbase, 0x7f58, dt.address);
+ GET_SMSTATE(u32, smbase, 0x7f54, dt.size);
ctxt->ops->set_idt(ctxt, &dt);
for (i = 0; i < 6; i++) {
@@ -2402,9 +2473,10 @@ static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt, u64 smbase)
return r;
}
- cr4 = GET_SMSTATE(u32, smbase, 0x7f14);
+ GET_SMSTATE(u32, smbase, 0x7f14, cr4);
- ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smbase, 0x7ef8));
+ GET_SMSTATE(u32, smbase, 0x7ef8, temp);
+ ctxt->ops->set_smbase(ctxt, temp);
return rsm_enter_protected_mode(ctxt, cr0, cr4);
}
@@ -2417,45 +2489,56 @@ static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, u64 smbase)
u32 base3;
u16 selector;
int i, r;
+ u64 temp64;
+ u32 temp = 0;
for (i = 0; i < 16; i++)
- *reg_write(ctxt, i) = GET_SMSTATE(u64, smbase, 0x7ff8 - i * 8);
+ GET_SMSTATE(u64, smbase, 0x7ff8 - i * 8, *reg_write(ctxt, i));
- ctxt->_eip = GET_SMSTATE(u64, smbase, 0x7f78);
- ctxt->eflags = GET_SMSTATE(u32, smbase, 0x7f70) | X86_EFLAGS_FIXED;
+ GET_SMSTATE(u64, smbase, 0x7f78, ctxt->_eip);
+ GET_SMSTATE(u32, smbase, 0x7f70, ctxt->eflags);
+ ctxt->eflags |= X86_EFLAGS_FIXED;
- val = GET_SMSTATE(u32, smbase, 0x7f68);
+ GET_SMSTATE(u32, smbase, 0x7f68, val);
ctxt->ops->set_dr(ctxt, 6, (val & DR6_VOLATILE) | DR6_FIXED_1);
- val = GET_SMSTATE(u32, smbase, 0x7f60);
+ GET_SMSTATE(u32, smbase, 0x7f60, val);
ctxt->ops->set_dr(ctxt, 7, (val & DR7_VOLATILE) | DR7_FIXED_1);
- cr0 = GET_SMSTATE(u64, smbase, 0x7f58);
- ctxt->ops->set_cr(ctxt, 3, GET_SMSTATE(u64, smbase, 0x7f50));
- cr4 = GET_SMSTATE(u64, smbase, 0x7f48);
- ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smbase, 0x7f00));
- val = GET_SMSTATE(u64, smbase, 0x7ed0);
+ GET_SMSTATE(u64, smbase, 0x7f58, cr0);
+ GET_SMSTATE(u64, smbase, 0x7f50, temp64);
+ ctxt->ops->set_cr(ctxt, 3, temp);
+ GET_SMSTATE(u64, smbase, 0x7f48, cr4);
+ GET_SMSTATE(u32, smbase, 0x7f00, temp);
+ ctxt->ops->set_smbase(ctxt, temp);
+ GET_SMSTATE(u64, smbase, 0x7ed0, val);
ctxt->ops->set_msr(ctxt, MSR_EFER, val & ~EFER_LMA);
- selector = GET_SMSTATE(u32, smbase, 0x7e90);
- rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smbase, 0x7e92) << 8);
- set_desc_limit(&desc, GET_SMSTATE(u32, smbase, 0x7e94));
- set_desc_base(&desc, GET_SMSTATE(u32, smbase, 0x7e98));
- base3 = GET_SMSTATE(u32, smbase, 0x7e9c);
+ GET_SMSTATE(u32, smbase, 0x7e90, selector);
+ GET_SMSTATE(u32, smbase, 0x7e92, temp);
+ rsm_set_desc_flags(&desc, temp << 8);
+ GET_SMSTATE(u32, smbase, 0x7e94, temp);
+ set_desc_limit(&desc, temp);
+ GET_SMSTATE(u32, smbase, 0x7e98, temp);
+ set_desc_base(&desc, temp);
+ GET_SMSTATE(u32, smbase, 0x7e9c, base3);
ctxt->ops->set_segment(ctxt, selector, &desc, base3, VCPU_SREG_TR);
- dt.size = GET_SMSTATE(u32, smbase, 0x7e84);
- dt.address = GET_SMSTATE(u64, smbase, 0x7e88);
+ GET_SMSTATE(u32, smbase, 0x7e84, dt.size);
+ GET_SMSTATE(u64, smbase, 0x7e88, dt.address);
ctxt->ops->set_idt(ctxt, &dt);
- selector = GET_SMSTATE(u32, smbase, 0x7e70);
- rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smbase, 0x7e72) << 8);
- set_desc_limit(&desc, GET_SMSTATE(u32, smbase, 0x7e74));
- set_desc_base(&desc, GET_SMSTATE(u32, smbase, 0x7e78));
- base3 = GET_SMSTATE(u32, smbase, 0x7e7c);
+ GET_SMSTATE(u32, smbase, 0x7e70, selector);
+ GET_SMSTATE(u32, smbase, 0x7e72, temp);
+ rsm_set_desc_flags(&desc, temp << 8);
+ GET_SMSTATE(u32, smbase, 0x7e74, temp);
+ set_desc_limit(&desc, temp);
+ GET_SMSTATE(u32, smbase, 0x7e78, temp);
+ set_desc_base(&desc, temp);
+ GET_SMSTATE(u32, smbase, 0x7e7c, base3);
ctxt->ops->set_segment(ctxt, selector, &desc, base3, VCPU_SREG_LDTR);
- dt.size = GET_SMSTATE(u32, smbase, 0x7e64);
- dt.address = GET_SMSTATE(u64, smbase, 0x7e68);
+ GET_SMSTATE(u32, smbase, 0x7e64, dt.size);
+ GET_SMSTATE(u64, smbase, 0x7e68, dt.address);
ctxt->ops->set_gdt(ctxt, &dt);
r = rsm_enter_protected_mode(ctxt, cr0, cr4);
@@ -2473,7 +2556,7 @@ static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, u64 smbase)
static int em_rsm(struct x86_emulate_ctxt *ctxt)
{
- unsigned long cr0, cr4, efer;
+ size_t cr0, cr4, efer;
u64 smbase;
int ret;
@@ -2806,7 +2889,7 @@ static bool emulator_io_port_access_allowed(struct x86_emulate_ctxt *ctxt,
int r;
u16 tr, io_bitmap_ptr, perm, bit_idx = port & 0x7;
unsigned mask = (1 << len) - 1;
- unsigned long base;
+ size_t base;
ops->get_segment(ctxt, &tr, &tr_seg, &base3, VCPU_SREG_TR);
if (!tr_seg.p)
@@ -3226,7 +3309,7 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt,
if (has_error_code) {
ctxt->op_bytes = ctxt->ad_bytes = (next_tss_desc.type & 8) ? 4 : 2;
ctxt->lock_prefix = 0;
- ctxt->src.val = (unsigned long) error_code;
+ ctxt->src.val = (size_t) error_code;
ret = em_push(ctxt);
}
@@ -3260,7 +3343,7 @@ int emulator_task_switch(struct x86_emulate_ctxt *ctxt,
static void string_addr_inc(struct x86_emulate_ctxt *ctxt, int reg,
struct operand *op)
{
- int df = (ctxt->eflags & X86_EFLAGS_DF) ? -op->count : op->count;
+ int df = (ctxt->eflags & X86_EFLAGS_DF) ? -(int)op->count : op->count;
register_address_increment(ctxt, reg, df * op->bytes);
op->addr.mem.ea = register_address(ctxt, reg);
@@ -3349,7 +3432,7 @@ static int em_call(struct x86_emulate_ctxt *ctxt)
int rc;
long rel = ctxt->src.val;
- ctxt->src.val = (unsigned long)ctxt->_eip;
+ ctxt->src.val = (size_t)ctxt->_eip;
rc = jmp_rel(ctxt, rel);
if (rc != X86EMUL_CONTINUE)
return rc;
@@ -3389,7 +3472,7 @@ static int em_call_far(struct x86_emulate_ctxt *ctxt)
/* If we failed, we tainted the memory, but the very least we should
restore cs */
if (rc != X86EMUL_CONTINUE) {
- pr_warn_once("faulting far call emulation tainted memory\n");
+ //pr_warn_once("faulting far call emulation tainted memory\n");
goto fail;
}
return rc;
@@ -3403,7 +3486,7 @@ fail:
static int em_ret_near_imm(struct x86_emulate_ctxt *ctxt)
{
int rc;
- unsigned long eip;
+ size_t eip;
rc = emulate_pop(ctxt, &eip, ctxt->op_bytes);
if (rc != X86EMUL_CONTINUE)
@@ -3496,7 +3579,7 @@ static int em_movbe(struct x86_emulate_ctxt *ctxt)
*/
tmp = (u16)ctxt->src.val;
ctxt->dst.val &= ~0xffffUL;
- ctxt->dst.val |= (unsigned long)swab16(tmp);
+ ctxt->dst.val |= (size_t)swab16(tmp);
break;
case 4:
ctxt->dst.val = swab32((u32)ctxt->src.val);
@@ -3522,7 +3605,7 @@ static int em_cr_write(struct x86_emulate_ctxt *ctxt)
static int em_dr_write(struct x86_emulate_ctxt *ctxt)
{
- unsigned long val;
+ size_t val;
if (ctxt->mode == X86EMUL_MODE_PROT64)
val = ctxt->src.val & ~0ULL;
@@ -3581,7 +3664,7 @@ static int em_mov_sreg_rm(struct x86_emulate_ctxt *ctxt)
return emulate_ud(ctxt);
if (ctxt->modrm_reg == VCPU_SREG_SS)
- ctxt->interruptibility = KVM_X86_SHADOW_INT_MOV_SS;
+ ctxt->interruptibility = GVM_X86_SHADOW_INT_MOV_SS;
/* Disable writeback. */
ctxt->dst.type = OP_NONE;
@@ -3672,6 +3755,8 @@ static int em_sidt(struct x86_emulate_ctxt *ctxt)
return emulate_store_desc_ptr(ctxt, ctxt->ops->get_idt);
}
+// Disable VC warning for unaligned access in desc_ptr
+#pragma warning(disable : 4366)
static int em_lgdt_lidt(struct x86_emulate_ctxt *ctxt, bool lgdt)
{
struct desc_ptr desc_ptr;
@@ -3695,6 +3780,7 @@ static int em_lgdt_lidt(struct x86_emulate_ctxt *ctxt, bool lgdt)
ctxt->dst.type = OP_NONE;
return X86EMUL_CONTINUE;
}
+#pragma warning(default : 4366)
static int em_lgdt(struct x86_emulate_ctxt *ctxt)
{
@@ -3776,7 +3862,7 @@ static int em_sti(struct x86_emulate_ctxt *ctxt)
if (emulator_bad_iopl(ctxt))
return emulate_gp(ctxt, 0);
- ctxt->interruptibility = KVM_X86_SHADOW_INT_STI;
+ ctxt->interruptibility = GVM_X86_SHADOW_INT_STI;
ctxt->eflags |= X86_EFLAGS_IF;
return X86EMUL_CONTINUE;
}
@@ -3820,11 +3906,11 @@ static int em_bswap(struct x86_emulate_ctxt *ctxt)
switch (ctxt->op_bytes) {
#ifdef CONFIG_X86_64
case 8:
- asm("bswap %0" : "+r"(ctxt->dst.val));
+ __bswap64(&ctxt->dst.val);
break;
#endif
default:
- asm("bswap %0" : "+r"(*(u32 *)&ctxt->dst.val));
+ __bswap32((u32 *)&ctxt->dst.val);
break;
}
return X86EMUL_CONTINUE;
@@ -3846,7 +3932,9 @@ static bool valid_cr(int nr)
{
switch (nr) {
case 0:
- case 2 ... 4:
+ case 2:
+ case 3:
+ case 4:
case 8:
return true;
default:
@@ -3925,7 +4013,7 @@ static int check_cr_write(struct x86_emulate_ctxt *ctxt)
static int check_dr7_gd(struct x86_emulate_ctxt *ctxt)
{
- unsigned long dr7;
+ size_t dr7;
ctxt->ops->get_dr(ctxt, 7, &dr7);
@@ -4575,16 +4663,24 @@ static int decode_imm(struct x86_emulate_ctxt *ctxt, struct operand *op,
/* NB. Immediates are sign-extended as necessary. */
switch (op->bytes) {
case 1:
- op->val = insn_fetch(s8, ctxt);
+ rc = insn_fetch(s8, ctxt, op->val);
+ if (rc != X86EMUL_CONTINUE)
+ goto done;
break;
case 2:
- op->val = insn_fetch(s16, ctxt);
+ rc = insn_fetch(s16, ctxt, op->val);
+ if (rc != X86EMUL_CONTINUE)
+ goto done;
break;
case 4:
- op->val = insn_fetch(s32, ctxt);
+ rc = insn_fetch(s32, ctxt, op->val);
+ if (rc != X86EMUL_CONTINUE)
+ goto done;
break;
case 8:
- op->val = insn_fetch(s64, ctxt);
+ rc = insn_fetch(s64, ctxt, (s64)op->val);
+ if (rc != X86EMUL_CONTINUE)
+ goto done;
break;
}
if (!sign_extension) {
@@ -4766,7 +4862,6 @@ static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op,
break;
}
-done:
return rc;
}
@@ -4817,7 +4912,10 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len)
/* Legacy prefixes. */
for (;;) {
- switch (ctxt->b = insn_fetch(u8, ctxt)) {
+ rc = insn_fetch(u8, ctxt, ctxt->b);
+ if (rc != X86EMUL_CONTINUE)
+ goto done;
+ switch (ctxt->b) {
case 0x66: /* operand-size override */
op_prefix = true;
/* switch between 2/4 bytes */
@@ -4843,7 +4941,22 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len)
has_seg_override = true;
ctxt->seg_override = ctxt->b & 7;
break;
- case 0x40 ... 0x4f: /* REX */
+ case 0x40:
+ case 0x41:
+ case 0x42:
+ case 0x43:
+ case 0x44:
+ case 0x45:
+ case 0x46:
+ case 0x47:
+ case 0x48:
+ case 0x49:
+ case 0x4a:
+ case 0x4b:
+ case 0x4c:
+ case 0x4d:
+ case 0x4e:
+ case 0x4f: /* REX */
if (mode != X86EMUL_MODE_PROT64)
goto done_prefixes;
ctxt->rex_prefix = ctxt->b;
@@ -4875,20 +4988,27 @@ done_prefixes:
/* Two-byte opcode? */
if (ctxt->b == 0x0f) {
ctxt->opcode_len = 2;
- ctxt->b = insn_fetch(u8, ctxt);
+ rc = insn_fetch(u8, ctxt, ctxt->b);
+ if (rc != X86EMUL_CONTINUE)
+ goto done;
opcode = twobyte_table[ctxt->b];
/* 0F_38 opcode map */
if (ctxt->b == 0x38) {
ctxt->opcode_len = 3;
- ctxt->b = insn_fetch(u8, ctxt);
+ rc = insn_fetch(u8, ctxt, ctxt->b);
+ if (rc != X86EMUL_CONTINUE)
+ goto done;
opcode = opcode_map_0f_38[ctxt->b];
}
}
ctxt->d = opcode.flags;
- if (ctxt->d & ModRM)
- ctxt->modrm = insn_fetch(u8, ctxt);
+ if (ctxt->d & ModRM) {
+ rc = insn_fetch(u8, ctxt, ctxt->modrm);
+ if (rc != X86EMUL_CONTINUE)
+ goto done;
+ }
/* vex-prefix instructions are not implemented */
if (ctxt->opcode_len == 1 && (ctxt->b == 0xc5 || ctxt->b == 0xc4) &&
@@ -5069,15 +5189,11 @@ static int flush_pending_x87_faults(struct x86_emulate_ctxt *ctxt)
bool fault = false;
ctxt->ops->get_fpu(ctxt);
- asm volatile("1: fwait \n\t"
- "2: \n\t"
- ".pushsection .fixup,\"ax\" \n\t"
- "3: \n\t"
- "movb $1, %[fault] \n\t"
- "jmp 2b \n\t"
- ".popsection \n\t"
- _ASM_EXTABLE(1b, 3b)
- : [fault]"+qm"(fault));
+ __try {
+ __fwait();
+ } __except(EXCEPTION_EXECUTE_HANDLER) {
+ fault = true;
+ }
ctxt->ops->put_fpu(ctxt);
if (unlikely(fault))
@@ -5093,18 +5209,17 @@ static void fetch_possible_mmx_operand(struct x86_emulate_ctxt *ctxt,
read_mmx_reg(ctxt, &op->mm_val, op->addr.mm);
}
+extern void __asm_fastop(size_t *flags,void *fop,
+ struct x86_emulate_ctxt *ctxt);
static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *))
{
- register void *__sp asm(_ASM_SP);
- ulong flags = (ctxt->eflags & EFLAGS_MASK) | X86_EFLAGS_IF;
+ size_t flags = (ctxt->eflags & EFLAGS_MASK) | X86_EFLAGS_IF;
+ char *__fop = (char *)fop;
if (!(ctxt->d & ByteOp))
- fop += __ffs(ctxt->dst.bytes) * FASTOP_SIZE;
+ __fop += __ffs(ctxt->dst.bytes) * FASTOP_SIZE;
- asm("push %[flags]; popf; call *%[fastop]; pushf; pop %[flags]\n"
- : "+a"(ctxt->dst.val), "+d"(ctxt->src.val), [flags]"+D"(flags),
- [fastop]"+S"(fop), "+r"(__sp)
- : "c"(ctxt->src2.val));
+ __asm_fastop(&flags, __fop, ctxt);
ctxt->eflags = (ctxt->eflags & ~EFLAGS_MASK) | (flags & EFLAGS_MASK);
if (!fop) /* exception is returned in fop variable */
@@ -5115,7 +5230,7 @@ static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *))
void init_decode_cache(struct x86_emulate_ctxt *ctxt)
{
memset(&ctxt->rip_relative, 0,
- (void *)&ctxt->modrm - (void *)&ctxt->rip_relative);
+ (char *)&ctxt->modrm - (char *)&ctxt->rip_relative);
ctxt->io_read.pos = 0;
ctxt->io_read.end = 0;
@@ -5289,14 +5404,36 @@ special_insn:
goto threebyte_insn;
switch (ctxt->b) {
- case 0x70 ... 0x7f: /* jcc (short) */
+ case 0x70: /* jcc (short) */
+ case 0x71:
+ case 0x72:
+ case 0x73:
+ case 0x74:
+ case 0x75:
+ case 0x76:
+ case 0x77:
+ case 0x78:
+ case 0x79:
+ case 0x7a:
+ case 0x7b:
+ case 0x7c:
+ case 0x7d:
+ case 0x7e:
+ case 0x7f:
if (test_cc(ctxt->b, ctxt->eflags))
rc = jmp_rel(ctxt, ctxt->src.val);
break;
case 0x8d: /* lea r16/r32, m */
ctxt->dst.val = ctxt->src.addr.mem.ea;
break;
- case 0x90 ... 0x97: /* nop / xchg reg, rax */
+ case 0x90: /* nop / xchg reg, rax */
+ case 0x91:
+ case 0x92:
+ case 0x93:
+ case 0x94:
+ case 0x95:
+ case 0x96:
+ case 0x97:
if (ctxt->dst.addr.reg == reg_rmw(ctxt, VCPU_REGS_RAX))
ctxt->dst.type = OP_NONE;
else
@@ -5382,7 +5519,7 @@ writeback:
count = ctxt->src.count;
else
count = ctxt->dst.count;
- register_address_increment(ctxt, VCPU_REGS_RCX, -count);
+ register_address_increment(ctxt, VCPU_REGS_RCX, -(int)count);
if (!string_insn_completed(ctxt)) {
/*
@@ -5436,25 +5573,72 @@ twobyte_insn:
case 0x21: /* mov from dr to reg */
ops->get_dr(ctxt, ctxt->modrm_reg, &ctxt->dst.val);
break;
- case 0x40 ... 0x4f: /* cmov */
+ case 0x40: /* cmov */
+ case 0x41:
+ case 0x42:
+ case 0x43:
+ case 0x44:
+ case 0x45:
+ case 0x46:
+ case 0x47:
+ case 0x48:
+ case 0x49:
+ case 0x4a:
+ case 0x4b:
+ case 0x4c:
+ case 0x4d:
+ case 0x4e:
+ case 0x4f:
if (test_cc(ctxt->b, ctxt->eflags))
ctxt->dst.val = ctxt->src.val;
else if (ctxt->op_bytes != 4)
ctxt->dst.type = OP_NONE; /* no writeback */
break;
- case 0x80 ... 0x8f: /* jnz rel, etc*/
+ case 0x80: /* jnz rel, etc*/
+ case 0x81:
+ case 0x82:
+ case 0x83:
+ case 0x84:
+ case 0x85:
+ case 0x86:
+ case 0x87:
+ case 0x88:
+ case 0x89:
+ case 0x8a:
+ case 0x8b:
+ case 0x8c:
+ case 0x8d:
+ case 0x8e:
+ case 0x8f:
if (test_cc(ctxt->b, ctxt->eflags))
rc = jmp_rel(ctxt, ctxt->src.val);
break;
- case 0x90 ... 0x9f: /* setcc r/m8 */
+ case 0x90: /* setcc r/m8 */
+ case 0x91:
+ case 0x92:
+ case 0x93:
+ case 0x94:
+ case 0x95:
+ case 0x96:
+ case 0x97:
+ case 0x98:
+ case 0x99:
+ case 0x9a:
+ case 0x9b:
+ case 0x9c:
+ case 0x9d:
+ case 0x9e:
+ case 0x9f:
ctxt->dst.val = test_cc(ctxt->b, ctxt->eflags);
break;
- case 0xb6 ... 0xb7: /* movzx */
+ case 0xb6: /* movzx */
+ case 0xb7:
ctxt->dst.bytes = ctxt->op_bytes;
ctxt->dst.val = (ctxt->src.bytes == 1) ? (u8) ctxt->src.val
: (u16) ctxt->src.val;
break;
- case 0xbe ... 0xbf: /* movsx */
+ case 0xbe: /* movsx */
+ case 0xbf:
ctxt->dst.bytes = ctxt->op_bytes;
ctxt->dst.val = (ctxt->src.bytes == 1) ? (s8) ctxt->src.val :
(s16) ctxt->src.val;
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
deleted file mode 100644
index 42b1c83..0000000
--- a/arch/x86/kvm/hyperv.c
+++ /dev/null
@@ -1,1266 +0,0 @@
-/*
- * KVM Microsoft Hyper-V emulation
- *
- * derived from arch/x86/kvm/x86.c
- *
- * Copyright (C) 2006 Qumranet, Inc.
- * Copyright (C) 2008 Qumranet, Inc.
- * Copyright IBM Corporation, 2008
- * Copyright 2010 Red Hat, Inc. and/or its affiliates.
- * Copyright (C) 2015 Andrey Smetanin <asmetanin@virtuozzo.com>
- *
- * Authors:
- * Avi Kivity <avi@qumranet.com>
- * Yaniv Kamay <yaniv@qumranet.com>
- * Amit Shah <amit.shah@qumranet.com>
- * Ben-Ami Yassour <benami@il.ibm.com>
- * Andrey Smetanin <asmetanin@virtuozzo.com>
- *
- * This work is licensed under the terms of the GNU GPL, version 2. See
- * the COPYING file in the top-level directory.
- *
- */
-
-#include "x86.h"
-#include "lapic.h"
-#include "ioapic.h"
-#include "hyperv.h"
-
-#include <linux/kvm_host.h>
-#include <linux/highmem.h>
-#include <asm/apicdef.h>
-#include <trace/events/kvm.h>
-
-#include "trace.h"
-
-static inline u64 synic_read_sint(struct kvm_vcpu_hv_synic *synic, int sint)
-{
- return atomic64_read(&synic->sint[sint]);
-}
-
-static inline int synic_get_sint_vector(u64 sint_value)
-{
- if (sint_value & HV_SYNIC_SINT_MASKED)
- return -1;
- return sint_value & HV_SYNIC_SINT_VECTOR_MASK;
-}
-
-static bool synic_has_vector_connected(struct kvm_vcpu_hv_synic *synic,
- int vector)
-{
- int i;
-
- for (i = 0; i < ARRAY_SIZE(synic->sint); i++) {
- if (synic_get_sint_vector(synic_read_sint(synic, i)) == vector)
- return true;
- }
- return false;
-}
-
-static bool synic_has_vector_auto_eoi(struct kvm_vcpu_hv_synic *synic,
- int vector)
-{
- int i;
- u64 sint_value;
-
- for (i = 0; i < ARRAY_SIZE(synic->sint); i++) {
- sint_value = synic_read_sint(synic, i);
- if (synic_get_sint_vector(sint_value) == vector &&
- sint_value & HV_SYNIC_SINT_AUTO_EOI)
- return true;
- }
- return false;
-}
-
-static int synic_set_sint(struct kvm_vcpu_hv_synic *synic, int sint,
- u64 data, bool host)
-{
- int vector;
-
- vector = data & HV_SYNIC_SINT_VECTOR_MASK;
- if (vector < 16 && !host)
- return 1;
- /*
- * Guest may configure multiple SINTs to use the same vector, so
- * we maintain a bitmap of vectors handled by synic, and a
- * bitmap of vectors with auto-eoi behavior. The bitmaps are
- * updated here, and atomically queried on fast paths.
- */
-
- atomic64_set(&synic->sint[sint], data);
-
- if (synic_has_vector_connected(synic, vector))
- __set_bit(vector, synic->vec_bitmap);
- else
- __clear_bit(vector, synic->vec_bitmap);
-
- if (synic_has_vector_auto_eoi(synic, vector))
- __set_bit(vector, synic->auto_eoi_bitmap);
- else
- __clear_bit(vector, synic->auto_eoi_bitmap);
-
- /* Load SynIC vectors into EOI exit bitmap */
- kvm_make_request(KVM_REQ_SCAN_IOAPIC, synic_to_vcpu(synic));
- return 0;
-}
-
-static struct kvm_vcpu_hv_synic *synic_get(struct kvm *kvm, u32 vcpu_id)
-{
- struct kvm_vcpu *vcpu;
- struct kvm_vcpu_hv_synic *synic;
-
- if (vcpu_id >= atomic_read(&kvm->online_vcpus))
- return NULL;
- vcpu = kvm_get_vcpu(kvm, vcpu_id);
- if (!vcpu)
- return NULL;
- synic = vcpu_to_synic(vcpu);
- return (synic->active) ? synic : NULL;
-}
-
-static void synic_clear_sint_msg_pending(struct kvm_vcpu_hv_synic *synic,
- u32 sint)
-{
- struct kvm_vcpu *vcpu = synic_to_vcpu(synic);
- struct page *page;
- gpa_t gpa;
- struct hv_message *msg;
- struct hv_message_page *msg_page;
-
- gpa = synic->msg_page & PAGE_MASK;
- page = kvm_vcpu_gfn_to_page(vcpu, gpa >> PAGE_SHIFT);
- if (is_error_page(page)) {
- vcpu_err(vcpu, "Hyper-V SynIC can't get msg page, gpa 0x%llx\n",
- gpa);
- return;
- }
- msg_page = kmap_atomic(page);
-
- msg = &msg_page->sint_message[sint];
- msg->header.message_flags.msg_pending = 0;
-
- kunmap_atomic(msg_page);
- kvm_release_page_dirty(page);
- kvm_vcpu_mark_page_dirty(vcpu, gpa >> PAGE_SHIFT);
-}
-
-static void kvm_hv_notify_acked_sint(struct kvm_vcpu *vcpu, u32 sint)
-{
- struct kvm *kvm = vcpu->kvm;
- struct kvm_vcpu_hv_synic *synic = vcpu_to_synic(vcpu);
- struct kvm_vcpu_hv *hv_vcpu = vcpu_to_hv_vcpu(vcpu);
- struct kvm_vcpu_hv_stimer *stimer;
- int gsi, idx, stimers_pending;
-
- trace_kvm_hv_notify_acked_sint(vcpu->vcpu_id, sint);
-
- if (synic->msg_page & HV_SYNIC_SIMP_ENABLE)
- synic_clear_sint_msg_pending(synic, sint);
-
- /* Try to deliver pending Hyper-V SynIC timers messages */
- stimers_pending = 0;
- for (idx = 0; idx < ARRAY_SIZE(hv_vcpu->stimer); idx++) {
- stimer = &hv_vcpu->stimer[idx];
- if (stimer->msg_pending &&
- (stimer->config & HV_STIMER_ENABLE) &&
- HV_STIMER_SINT(stimer->config) == sint) {
- set_bit(stimer->index,
- hv_vcpu->stimer_pending_bitmap);
- stimers_pending++;
- }
- }
- if (stimers_pending)
- kvm_make_request(KVM_REQ_HV_STIMER, vcpu);
-
- idx = srcu_read_lock(&kvm->irq_srcu);
- gsi = atomic_read(&synic->sint_to_gsi[sint]);
- if (gsi != -1)
- kvm_notify_acked_gsi(kvm, gsi);
- srcu_read_unlock(&kvm->irq_srcu, idx);
-}
-
-static void synic_exit(struct kvm_vcpu_hv_synic *synic, u32 msr)
-{
- struct kvm_vcpu *vcpu = synic_to_vcpu(synic);
- struct kvm_vcpu_hv *hv_vcpu = &vcpu->arch.hyperv;
-
- hv_vcpu->exit.type = KVM_EXIT_HYPERV_SYNIC;
- hv_vcpu->exit.u.synic.msr = msr;
- hv_vcpu->exit.u.synic.control = synic->control;
- hv_vcpu->exit.u.synic.evt_page = synic->evt_page;
- hv_vcpu->exit.u.synic.msg_page = synic->msg_page;
-
- kvm_make_request(KVM_REQ_HV_EXIT, vcpu);
-}
-
-static int synic_set_msr(struct kvm_vcpu_hv_synic *synic,
- u32 msr, u64 data, bool host)
-{
- struct kvm_vcpu *vcpu = synic_to_vcpu(synic);
- int ret;
-
- if (!synic->active)
- return 1;
-
- trace_kvm_hv_synic_set_msr(vcpu->vcpu_id, msr, data, host);
-
- ret = 0;
- switch (msr) {
- case HV_X64_MSR_SCONTROL:
- synic->control = data;
- if (!host)
- synic_exit(synic, msr);
- break;
- case HV_X64_MSR_SVERSION:
- if (!host) {
- ret = 1;
- break;
- }
- synic->version = data;
- break;
- case HV_X64_MSR_SIEFP:
- if (data & HV_SYNIC_SIEFP_ENABLE)
- if (kvm_clear_guest(vcpu->kvm,
- data & PAGE_MASK, PAGE_SIZE)) {
- ret = 1;
- break;
- }
- synic->evt_page = data;
- if (!host)
- synic_exit(synic, msr);
- break;
- case HV_X64_MSR_SIMP:
- if (data & HV_SYNIC_SIMP_ENABLE)
- if (kvm_clear_guest(vcpu->kvm,
- data & PAGE_MASK, PAGE_SIZE)) {
- ret = 1;
- break;
- }
- synic->msg_page = data;
- if (!host)
- synic_exit(synic, msr);
- break;
- case HV_X64_MSR_EOM: {
- int i;
-
- for (i = 0; i < ARRAY_SIZE(synic->sint); i++)
- kvm_hv_notify_acked_sint(vcpu, i);
- break;
- }
- case HV_X64_MSR_SINT0 ... HV_X64_MSR_SINT15:
- ret = synic_set_sint(synic, msr - HV_X64_MSR_SINT0, data, host);
- break;
- default:
- ret = 1;
- break;
- }
- return ret;
-}
-
-static int synic_get_msr(struct kvm_vcpu_hv_synic *synic, u32 msr, u64 *pdata)
-{
- int ret;
-
- if (!synic->active)
- return 1;
-
- ret = 0;
- switch (msr) {
- case HV_X64_MSR_SCONTROL:
- *pdata = synic->control;
- break;
- case HV_X64_MSR_SVERSION:
- *pdata = synic->version;
- break;
- case HV_X64_MSR_SIEFP:
- *pdata = synic->evt_page;
- break;
- case HV_X64_MSR_SIMP:
- *pdata = synic->msg_page;
- break;
- case HV_X64_MSR_EOM:
- *pdata = 0;
- break;
- case HV_X64_MSR_SINT0 ... HV_X64_MSR_SINT15:
- *pdata = atomic64_read(&synic->sint[msr - HV_X64_MSR_SINT0]);
- break;
- default:
- ret = 1;
- break;
- }
- return ret;
-}
-
-int synic_set_irq(struct kvm_vcpu_hv_synic *synic, u32 sint)
-{
- struct kvm_vcpu *vcpu = synic_to_vcpu(synic);
- struct kvm_lapic_irq irq;
- int ret, vector;
-
- if (sint >= ARRAY_SIZE(synic->sint))
- return -EINVAL;
-
- vector = synic_get_sint_vector(synic_read_sint(synic, sint));
- if (vector < 0)
- return -ENOENT;
-
- memset(&irq, 0, sizeof(irq));
- irq.dest_id = kvm_apic_id(vcpu->arch.apic);
- irq.dest_mode = APIC_DEST_PHYSICAL;
- irq.delivery_mode = APIC_DM_FIXED;
- irq.vector = vector;
- irq.level = 1;
-
- ret = kvm_irq_delivery_to_apic(vcpu->kvm, NULL, &irq, NULL);
- trace_kvm_hv_synic_set_irq(vcpu->vcpu_id, sint, irq.vector, ret);
- return ret;
-}
-
-int kvm_hv_synic_set_irq(struct kvm *kvm, u32 vcpu_id, u32 sint)
-{
- struct kvm_vcpu_hv_synic *synic;
-
- synic = synic_get(kvm, vcpu_id);
- if (!synic)
- return -EINVAL;
-
- return synic_set_irq(synic, sint);
-}
-
-void kvm_hv_synic_send_eoi(struct kvm_vcpu *vcpu, int vector)
-{
- struct kvm_vcpu_hv_synic *synic = vcpu_to_synic(vcpu);
- int i;
-
- trace_kvm_hv_synic_send_eoi(vcpu->vcpu_id, vector);
-
- for (i = 0; i < ARRAY_SIZE(synic->sint); i++)
- if (synic_get_sint_vector(synic_read_sint(synic, i)) == vector)
- kvm_hv_notify_acked_sint(vcpu, i);
-}
-
-static int kvm_hv_set_sint_gsi(struct kvm *kvm, u32 vcpu_id, u32 sint, int gsi)
-{
- struct kvm_vcpu_hv_synic *synic;
-
- synic = synic_get(kvm, vcpu_id);
- if (!synic)
- return -EINVAL;
-
- if (sint >= ARRAY_SIZE(synic->sint_to_gsi))
- return -EINVAL;
-
- atomic_set(&synic->sint_to_gsi[sint], gsi);
- return 0;
-}
-
-void kvm_hv_irq_routing_update(struct kvm *kvm)
-{
- struct kvm_irq_routing_table *irq_rt;
- struct kvm_kernel_irq_routing_entry *e;
- u32 gsi;
-
- irq_rt = srcu_dereference_check(kvm->irq_routing, &kvm->irq_srcu,
- lockdep_is_held(&kvm->irq_lock));
-
- for (gsi = 0; gsi < irq_rt->nr_rt_entries; gsi++) {
- hlist_for_each_entry(e, &irq_rt->map[gsi], link) {
- if (e->type == KVM_IRQ_ROUTING_HV_SINT)
- kvm_hv_set_sint_gsi(kvm, e->hv_sint.vcpu,
- e->hv_sint.sint, gsi);
- }
- }
-}
-
-static void synic_init(struct kvm_vcpu_hv_synic *synic)
-{
- int i;
-
- memset(synic, 0, sizeof(*synic));
- synic->version = HV_SYNIC_VERSION_1;
- for (i = 0; i < ARRAY_SIZE(synic->sint); i++) {
- atomic64_set(&synic->sint[i], HV_SYNIC_SINT_MASKED);
- atomic_set(&synic->sint_to_gsi[i], -1);
- }
-}
-
-static u64 get_time_ref_counter(struct kvm *kvm)
-{
- struct kvm_hv *hv = &kvm->arch.hyperv;
- struct kvm_vcpu *vcpu;
- u64 tsc;
-
- /*
- * The guest has not set up the TSC page or the clock isn't
- * stable, fall back to get_kvmclock_ns.
- */
- if (!hv->tsc_ref.tsc_sequence)
- return div_u64(get_kvmclock_ns(kvm), 100);
-
- vcpu = kvm_get_vcpu(kvm, 0);
- tsc = kvm_read_l1_tsc(vcpu, rdtsc());
- return mul_u64_u64_shr(tsc, hv->tsc_ref.tsc_scale, 64)
- + hv->tsc_ref.tsc_offset;
-}
-
-static void stimer_mark_pending(struct kvm_vcpu_hv_stimer *stimer,
- bool vcpu_kick)
-{
- struct kvm_vcpu *vcpu = stimer_to_vcpu(stimer);
-
- set_bit(stimer->index,
- vcpu_to_hv_vcpu(vcpu)->stimer_pending_bitmap);
- kvm_make_request(KVM_REQ_HV_STIMER, vcpu);
- if (vcpu_kick)
- kvm_vcpu_kick(vcpu);
-}
-
-static void stimer_cleanup(struct kvm_vcpu_hv_stimer *stimer)
-{
- struct kvm_vcpu *vcpu = stimer_to_vcpu(stimer);
-
- trace_kvm_hv_stimer_cleanup(stimer_to_vcpu(stimer)->vcpu_id,
- stimer->index);
-
- hrtimer_cancel(&stimer->timer);
- clear_bit(stimer->index,
- vcpu_to_hv_vcpu(vcpu)->stimer_pending_bitmap);
- stimer->msg_pending = false;
- stimer->exp_time = 0;
-}
-
-static enum hrtimer_restart stimer_timer_callback(struct hrtimer *timer)
-{
- struct kvm_vcpu_hv_stimer *stimer;
-
- stimer = container_of(timer, struct kvm_vcpu_hv_stimer, timer);
- trace_kvm_hv_stimer_callback(stimer_to_vcpu(stimer)->vcpu_id,
- stimer->index);
- stimer_mark_pending(stimer, true);
-
- return HRTIMER_NORESTART;
-}
-
-/*
- * stimer_start() assumptions:
- * a) stimer->count is not equal to 0
- * b) stimer->config has HV_STIMER_ENABLE flag
- */
-static int stimer_start(struct kvm_vcpu_hv_stimer *stimer)
-{
- u64 time_now;
- ktime_t ktime_now;
-
- time_now = get_time_ref_counter(stimer_to_vcpu(stimer)->kvm);
- ktime_now = ktime_get();
-
- if (stimer->config & HV_STIMER_PERIODIC) {
- if (stimer->exp_time) {
- if (time_now >= stimer->exp_time) {
- u64 remainder;
-
- div64_u64_rem(time_now - stimer->exp_time,
- stimer->count, &remainder);
- stimer->exp_time =
- time_now + (stimer->count - remainder);
- }
- } else
- stimer->exp_time = time_now + stimer->count;
-
- trace_kvm_hv_stimer_start_periodic(
- stimer_to_vcpu(stimer)->vcpu_id,
- stimer->index,
- time_now, stimer->exp_time);
-
- hrtimer_start(&stimer->timer,
- ktime_add_ns(ktime_now,
- 100 * (stimer->exp_time - time_now)),
- HRTIMER_MODE_ABS);
- return 0;
- }
- stimer->exp_time = stimer->count;
- if (time_now >= stimer->count) {
- /*
- * Expire timer according to Hypervisor Top-Level Functional
- * specification v4(15.3.1):
- * "If a one shot is enabled and the specified count is in
- * the past, it will expire immediately."
- */
- stimer_mark_pending(stimer, false);
- return 0;
- }
-
- trace_kvm_hv_stimer_start_one_shot(stimer_to_vcpu(stimer)->vcpu_id,
- stimer->index,
- time_now, stimer->count);
-
- hrtimer_start(&stimer->timer,
- ktime_add_ns(ktime_now, 100 * (stimer->count - time_now)),
- HRTIMER_MODE_ABS);
- return 0;
-}
-
-static int stimer_set_config(struct kvm_vcpu_hv_stimer *stimer, u64 config,
- bool host)
-{
- trace_kvm_hv_stimer_set_config(stimer_to_vcpu(stimer)->vcpu_id,
- stimer->index, config, host);
-
- stimer_cleanup(stimer);
- if ((stimer->config & HV_STIMER_ENABLE) && HV_STIMER_SINT(config) == 0)
- config &= ~HV_STIMER_ENABLE;
- stimer->config = config;
- stimer_mark_pending(stimer, false);
- return 0;
-}
-
-static int stimer_set_count(struct kvm_vcpu_hv_stimer *stimer, u64 count,
- bool host)
-{
- trace_kvm_hv_stimer_set_count(stimer_to_vcpu(stimer)->vcpu_id,
- stimer->index, count, host);
-
- stimer_cleanup(stimer);
- stimer->count = count;
- if (stimer->count == 0)
- stimer->config &= ~HV_STIMER_ENABLE;
- else if (stimer->config & HV_STIMER_AUTOENABLE)
- stimer->config |= HV_STIMER_ENABLE;
- stimer_mark_pending(stimer, false);
- return 0;
-}
-
-static int stimer_get_config(struct kvm_vcpu_hv_stimer *stimer, u64 *pconfig)
-{
- *pconfig = stimer->config;
- return 0;
-}
-
-static int stimer_get_count(struct kvm_vcpu_hv_stimer *stimer, u64 *pcount)
-{
- *pcount = stimer->count;
- return 0;
-}
-
-static int synic_deliver_msg(struct kvm_vcpu_hv_synic *synic, u32 sint,
- struct hv_message *src_msg)
-{
- struct kvm_vcpu *vcpu = synic_to_vcpu(synic);
- struct page *page;
- gpa_t gpa;
- struct hv_message *dst_msg;
- int r;
- struct hv_message_page *msg_page;
-
- if (!(synic->msg_page & HV_SYNIC_SIMP_ENABLE))
- return -ENOENT;
-
- gpa = synic->msg_page & PAGE_MASK;
- page = kvm_vcpu_gfn_to_page(vcpu, gpa >> PAGE_SHIFT);
- if (is_error_page(page))
- return -EFAULT;
-
- msg_page = kmap_atomic(page);
- dst_msg = &msg_page->sint_message[sint];
- if (sync_cmpxchg(&dst_msg->header.message_type, HVMSG_NONE,
- src_msg->header.message_type) != HVMSG_NONE) {
- dst_msg->header.message_flags.msg_pending = 1;
- r = -EAGAIN;
- } else {
- memcpy(&dst_msg->u.payload, &src_msg->u.payload,
- src_msg->header.payload_size);
- dst_msg->header.message_type = src_msg->header.message_type;
- dst_msg->header.payload_size = src_msg->header.payload_size;
- r = synic_set_irq(synic, sint);
- if (r >= 1)
- r = 0;
- else if (r == 0)
- r = -EFAULT;
- }
- kunmap_atomic(msg_page);
- kvm_release_page_dirty(page);
- kvm_vcpu_mark_page_dirty(vcpu, gpa >> PAGE_SHIFT);
- return r;
-}
-
-static int stimer_send_msg(struct kvm_vcpu_hv_stimer *stimer)
-{
- struct kvm_vcpu *vcpu = stimer_to_vcpu(stimer);
- struct hv_message *msg = &stimer->msg;
- struct hv_timer_message_payload *payload =
- (struct hv_timer_message_payload *)&msg->u.payload;
-
- payload->expiration_time = stimer->exp_time;
- payload->delivery_time = get_time_ref_counter(vcpu->kvm);
- return synic_deliver_msg(vcpu_to_synic(vcpu),
- HV_STIMER_SINT(stimer->config), msg);
-}
-
-static void stimer_expiration(struct kvm_vcpu_hv_stimer *stimer)
-{
- int r;
-
- stimer->msg_pending = true;
- r = stimer_send_msg(stimer);
- trace_kvm_hv_stimer_expiration(stimer_to_vcpu(stimer)->vcpu_id,
- stimer->index, r);
- if (!r) {
- stimer->msg_pending = false;
- if (!(stimer->config & HV_STIMER_PERIODIC))
- stimer->config &= ~HV_STIMER_ENABLE;
- }
-}
-
-void kvm_hv_process_stimers(struct kvm_vcpu *vcpu)
-{
- struct kvm_vcpu_hv *hv_vcpu = vcpu_to_hv_vcpu(vcpu);
- struct kvm_vcpu_hv_stimer *stimer;
- u64 time_now, exp_time;
- int i;
-
- for (i = 0; i < ARRAY_SIZE(hv_vcpu->stimer); i++)
- if (test_and_clear_bit(i, hv_vcpu->stimer_pending_bitmap)) {
- stimer = &hv_vcpu->stimer[i];
- if (stimer->config & HV_STIMER_ENABLE) {
- exp_time = stimer->exp_time;
-
- if (exp_time) {
- time_now =
- get_time_ref_counter(vcpu->kvm);
- if (time_now >= exp_time)
- stimer_expiration(stimer);
- }
-
- if ((stimer->config & HV_STIMER_ENABLE) &&
- stimer->count)
- stimer_start(stimer);
- else
- stimer_cleanup(stimer);
- }
- }
-}
-
-void kvm_hv_vcpu_uninit(struct kvm_vcpu *vcpu)
-{
- struct kvm_vcpu_hv *hv_vcpu = vcpu_to_hv_vcpu(vcpu);
- int i;
-
- for (i = 0; i < ARRAY_SIZE(hv_vcpu->stimer); i++)
- stimer_cleanup(&hv_vcpu->stimer[i]);
-}
-
-static void stimer_prepare_msg(struct kvm_vcpu_hv_stimer *stimer)
-{
- struct hv_message *msg = &stimer->msg;
- struct hv_timer_message_payload *payload =
- (struct hv_timer_message_payload *)&msg->u.payload;
-
- memset(&msg->header, 0, sizeof(msg->header));
- msg->header.message_type = HVMSG_TIMER_EXPIRED;
- msg->header.payload_size = sizeof(*payload);
-
- payload->timer_index = stimer->index;
- payload->expiration_time = 0;
- payload->delivery_time = 0;
-}
-
-static void stimer_init(struct kvm_vcpu_hv_stimer *stimer, int timer_index)
-{
- memset(stimer, 0, sizeof(*stimer));
- stimer->index = timer_index;
- hrtimer_init(&stimer->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
- stimer->timer.function = stimer_timer_callback;
- stimer_prepare_msg(stimer);
-}
-
-void kvm_hv_vcpu_init(struct kvm_vcpu *vcpu)
-{
- struct kvm_vcpu_hv *hv_vcpu = vcpu_to_hv_vcpu(vcpu);
- int i;
-
- synic_init(&hv_vcpu->synic);
-
- bitmap_zero(hv_vcpu->stimer_pending_bitmap, HV_SYNIC_STIMER_COUNT);
- for (i = 0; i < ARRAY_SIZE(hv_vcpu->stimer); i++)
- stimer_init(&hv_vcpu->stimer[i], i);
-}
-
-int kvm_hv_activate_synic(struct kvm_vcpu *vcpu)
-{
- /*
- * Hyper-V SynIC auto EOI SINT's are
- * not compatible with APICV, so deactivate APICV
- */
- kvm_vcpu_deactivate_apicv(vcpu);
- vcpu_to_synic(vcpu)->active = true;
- return 0;
-}
-
-static bool kvm_hv_msr_partition_wide(u32 msr)
-{
- bool r = false;
-
- switch (msr) {
- case HV_X64_MSR_GUEST_OS_ID:
- case HV_X64_MSR_HYPERCALL:
- case HV_X64_MSR_REFERENCE_TSC:
- case HV_X64_MSR_TIME_REF_COUNT:
- case HV_X64_MSR_CRASH_CTL:
- case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4:
- case HV_X64_MSR_RESET:
- r = true;
- break;
- }
-
- return r;
-}
-
-static int kvm_hv_msr_get_crash_data(struct kvm_vcpu *vcpu,
- u32 index, u64 *pdata)
-{
- struct kvm_hv *hv = &vcpu->kvm->arch.hyperv;
-
- if (WARN_ON_ONCE(index >= ARRAY_SIZE(hv->hv_crash_param)))
- return -EINVAL;
-
- *pdata = hv->hv_crash_param[index];
- return 0;
-}
-
-static int kvm_hv_msr_get_crash_ctl(struct kvm_vcpu *vcpu, u64 *pdata)
-{
- struct kvm_hv *hv = &vcpu->kvm->arch.hyperv;
-
- *pdata = hv->hv_crash_ctl;
- return 0;
-}
-
-static int kvm_hv_msr_set_crash_ctl(struct kvm_vcpu *vcpu, u64 data, bool host)
-{
- struct kvm_hv *hv = &vcpu->kvm->arch.hyperv;
-
- if (host)
- hv->hv_crash_ctl = data & HV_X64_MSR_CRASH_CTL_NOTIFY;
-
- if (!host && (data & HV_X64_MSR_CRASH_CTL_NOTIFY)) {
-
- vcpu_debug(vcpu, "hv crash (0x%llx 0x%llx 0x%llx 0x%llx 0x%llx)\n",
- hv->hv_crash_param[0],
- hv->hv_crash_param[1],
- hv->hv_crash_param[2],
- hv->hv_crash_param[3],
- hv->hv_crash_param[4]);
-
- /* Send notification about crash to user space */
- kvm_make_request(KVM_REQ_HV_CRASH, vcpu);
- }
-
- return 0;
-}
-
-static int kvm_hv_msr_set_crash_data(struct kvm_vcpu *vcpu,
- u32 index, u64 data)
-{
- struct kvm_hv *hv = &vcpu->kvm->arch.hyperv;
-
- if (WARN_ON_ONCE(index >= ARRAY_SIZE(hv->hv_crash_param)))
- return -EINVAL;
-
- hv->hv_crash_param[index] = data;
- return 0;
-}
-
-/*
- * The kvmclock and Hyper-V TSC page use similar formulas, and converting
- * between them is possible:
- *
- * kvmclock formula:
- * nsec = (ticks - tsc_timestamp) * tsc_to_system_mul * 2^(tsc_shift-32)
- * + system_time
- *
- * Hyper-V formula:
- * nsec/100 = ticks * scale / 2^64 + offset
- *
- * When tsc_timestamp = system_time = 0, offset is zero in the Hyper-V formula.
- * By dividing the kvmclock formula by 100 and equating what's left we get:
- * ticks * scale / 2^64 = ticks * tsc_to_system_mul * 2^(tsc_shift-32) / 100
- * scale / 2^64 = tsc_to_system_mul * 2^(tsc_shift-32) / 100
- * scale = tsc_to_system_mul * 2^(32+tsc_shift) / 100
- *
- * Now expand the kvmclock formula and divide by 100:
- * nsec = ticks * tsc_to_system_mul * 2^(tsc_shift-32)
- * - tsc_timestamp * tsc_to_system_mul * 2^(tsc_shift-32)
- * + system_time
- * nsec/100 = ticks * tsc_to_system_mul * 2^(tsc_shift-32) / 100
- * - tsc_timestamp * tsc_to_system_mul * 2^(tsc_shift-32) / 100
- * + system_time / 100
- *
- * Replace tsc_to_system_mul * 2^(tsc_shift-32) / 100 by scale / 2^64:
- * nsec/100 = ticks * scale / 2^64
- * - tsc_timestamp * scale / 2^64
- * + system_time / 100
- *
- * Equate with the Hyper-V formula so that ticks * scale / 2^64 cancels out:
- * offset = system_time / 100 - tsc_timestamp * scale / 2^64
- *
- * These two equivalencies are implemented in this function.
- */
-static bool compute_tsc_page_parameters(struct pvclock_vcpu_time_info *hv_clock,
- HV_REFERENCE_TSC_PAGE *tsc_ref)
-{
- u64 max_mul;
-
- if (!(hv_clock->flags & PVCLOCK_TSC_STABLE_BIT))
- return false;
-
- /*
- * check if scale would overflow, if so we use the time ref counter
- * tsc_to_system_mul * 2^(tsc_shift+32) / 100 >= 2^64
- * tsc_to_system_mul / 100 >= 2^(32-tsc_shift)
- * tsc_to_system_mul >= 100 * 2^(32-tsc_shift)
- */
- max_mul = 100ull << (32 - hv_clock->tsc_shift);
- if (hv_clock->tsc_to_system_mul >= max_mul)
- return false;
-
- /*
- * Otherwise compute the scale and offset according to the formulas
- * derived above.
- */
- tsc_ref->tsc_scale =
- mul_u64_u32_div(1ULL << (32 + hv_clock->tsc_shift),
- hv_clock->tsc_to_system_mul,
- 100);
-
- tsc_ref->tsc_offset = hv_clock->system_time;
- do_div(tsc_ref->tsc_offset, 100);
- tsc_ref->tsc_offset -=
- mul_u64_u64_shr(hv_clock->tsc_timestamp, tsc_ref->tsc_scale, 64);
- return true;
-}
-
-void kvm_hv_setup_tsc_page(struct kvm *kvm,
- struct pvclock_vcpu_time_info *hv_clock)
-{
- struct kvm_hv *hv = &kvm->arch.hyperv;
- u32 tsc_seq;
- u64 gfn;
-
- BUILD_BUG_ON(sizeof(tsc_seq) != sizeof(hv->tsc_ref.tsc_sequence));
- BUILD_BUG_ON(offsetof(HV_REFERENCE_TSC_PAGE, tsc_sequence) != 0);
-
- if (!(hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE))
- return;
-
- gfn = hv->hv_tsc_page >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT;
- /*
- * Because the TSC parameters only vary when there is a
- * change in the master clock, do not bother with caching.
- */
- if (unlikely(kvm_read_guest(kvm, gfn_to_gpa(gfn),
- &tsc_seq, sizeof(tsc_seq))))
- return;
-
- /*
- * While we're computing and writing the parameters, force the
- * guest to use the time reference count MSR.
- */
- hv->tsc_ref.tsc_sequence = 0;
- if (kvm_write_guest(kvm, gfn_to_gpa(gfn),
- &hv->tsc_ref, sizeof(hv->tsc_ref.tsc_sequence)))
- return;
-
- if (!compute_tsc_page_parameters(hv_clock, &hv->tsc_ref))
- return;
-
- /* Ensure sequence is zero before writing the rest of the struct. */
- smp_wmb();
- if (kvm_write_guest(kvm, gfn_to_gpa(gfn), &hv->tsc_ref, sizeof(hv->tsc_ref)))
- return;
-
- /*
- * Now switch to the TSC page mechanism by writing the sequence.
- */
- tsc_seq++;
- if (tsc_seq == 0xFFFFFFFF || tsc_seq == 0)
- tsc_seq = 1;
-
- /* Write the struct entirely before the non-zero sequence. */
- smp_wmb();
-
- hv->tsc_ref.tsc_sequence = tsc_seq;
- kvm_write_guest(kvm, gfn_to_gpa(gfn),
- &hv->tsc_ref, sizeof(hv->tsc_ref.tsc_sequence));
-}
-
-static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data,
- bool host)
-{
- struct kvm *kvm = vcpu->kvm;
- struct kvm_hv *hv = &kvm->arch.hyperv;
-
- switch (msr) {
- case HV_X64_MSR_GUEST_OS_ID:
- hv->hv_guest_os_id = data;
- /* setting guest os id to zero disables hypercall page */
- if (!hv->hv_guest_os_id)
- hv->hv_hypercall &= ~HV_X64_MSR_HYPERCALL_ENABLE;
- break;
- case HV_X64_MSR_HYPERCALL: {
- u64 gfn;
- unsigned long addr;
- u8 instructions[4];
-
- /* if guest os id is not set hypercall should remain disabled */
- if (!hv->hv_guest_os_id)
- break;
- if (!(data & HV_X64_MSR_HYPERCALL_ENABLE)) {
- hv->hv_hypercall = data;
- break;
- }
- gfn = data >> HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT;
- addr = gfn_to_hva(kvm, gfn);
- if (kvm_is_error_hva(addr))
- return 1;
- kvm_x86_ops->patch_hypercall(vcpu, instructions);
- ((unsigned char *)instructions)[3] = 0xc3; /* ret */
- if (__copy_to_user((void __user *)addr, instructions, 4))
- return 1;
- hv->hv_hypercall = data;
- mark_page_dirty(kvm, gfn);
- break;
- }
- case HV_X64_MSR_REFERENCE_TSC:
- hv->hv_tsc_page = data;
- if (hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE)
- kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu);
- break;
- case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4:
- return kvm_hv_msr_set_crash_data(vcpu,
- msr - HV_X64_MSR_CRASH_P0,
- data);
- case HV_X64_MSR_CRASH_CTL:
- return kvm_hv_msr_set_crash_ctl(vcpu, data, host);
- case HV_X64_MSR_RESET:
- if (data == 1) {
- vcpu_debug(vcpu, "hyper-v reset requested\n");
- kvm_make_request(KVM_REQ_HV_RESET, vcpu);
- }
- break;
- default:
- vcpu_unimpl(vcpu, "Hyper-V uhandled wrmsr: 0x%x data 0x%llx\n",
- msr, data);
- return 1;
- }
- return 0;
-}
-
-/* Calculate cpu time spent by current task in 100ns units */
-static u64 current_task_runtime_100ns(void)
-{
- cputime_t utime, stime;
-
- task_cputime_adjusted(current, &utime, &stime);
- return div_u64(cputime_to_nsecs(utime + stime), 100);
-}
-
-static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host)
-{
- struct kvm_vcpu_hv *hv = &vcpu->arch.hyperv;
-
- switch (msr) {
- case HV_X64_MSR_APIC_ASSIST_PAGE: {
- u64 gfn;
- unsigned long addr;
-
- if (!(data & HV_X64_MSR_APIC_ASSIST_PAGE_ENABLE)) {
- hv->hv_vapic = data;
- if (kvm_lapic_enable_pv_eoi(vcpu, 0))
- return 1;
- break;
- }
- gfn = data >> HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT;
- addr = kvm_vcpu_gfn_to_hva(vcpu, gfn);
- if (kvm_is_error_hva(addr))
- return 1;
- if (__clear_user((void __user *)addr, PAGE_SIZE))
- return 1;
- hv->hv_vapic = data;
- kvm_vcpu_mark_page_dirty(vcpu, gfn);
- if (kvm_lapic_enable_pv_eoi(vcpu,
- gfn_to_gpa(gfn) | KVM_MSR_ENABLED))
- return 1;
- break;
- }
- case HV_X64_MSR_EOI:
- return kvm_hv_vapic_msr_write(vcpu, APIC_EOI, data);
- case HV_X64_MSR_ICR:
- return kvm_hv_vapic_msr_write(vcpu, APIC_ICR, data);
- case HV_X64_MSR_TPR:
- return kvm_hv_vapic_msr_write(vcpu, APIC_TASKPRI, data);
- case HV_X64_MSR_VP_RUNTIME:
- if (!host)
- return 1;
- hv->runtime_offset = data - current_task_runtime_100ns();
- break;
- case HV_X64_MSR_SCONTROL:
- case HV_X64_MSR_SVERSION:
- case HV_X64_MSR_SIEFP:
- case HV_X64_MSR_SIMP:
- case HV_X64_MSR_EOM:
- case HV_X64_MSR_SINT0 ... HV_X64_MSR_SINT15:
- return synic_set_msr(vcpu_to_synic(vcpu), msr, data, host);
- case HV_X64_MSR_STIMER0_CONFIG:
- case HV_X64_MSR_STIMER1_CONFIG:
- case HV_X64_MSR_STIMER2_CONFIG:
- case HV_X64_MSR_STIMER3_CONFIG: {
- int timer_index = (msr - HV_X64_MSR_STIMER0_CONFIG)/2;
-
- return stimer_set_config(vcpu_to_stimer(vcpu, timer_index),
- data, host);
- }
- case HV_X64_MSR_STIMER0_COUNT:
- case HV_X64_MSR_STIMER1_COUNT:
- case HV_X64_MSR_STIMER2_COUNT:
- case HV_X64_MSR_STIMER3_COUNT: {
- int timer_index = (msr - HV_X64_MSR_STIMER0_COUNT)/2;
-
- return stimer_set_count(vcpu_to_stimer(vcpu, timer_index),
- data, host);
- }
- default:
- vcpu_unimpl(vcpu, "Hyper-V uhandled wrmsr: 0x%x data 0x%llx\n",
- msr, data);
- return 1;
- }
-
- return 0;
-}
-
-static int kvm_hv_get_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
-{
- u64 data = 0;
- struct kvm *kvm = vcpu->kvm;
- struct kvm_hv *hv = &kvm->arch.hyperv;
-
- switch (msr) {
- case HV_X64_MSR_GUEST_OS_ID:
- data = hv->hv_guest_os_id;
- break;
- case HV_X64_MSR_HYPERCALL:
- data = hv->hv_hypercall;
- break;
- case HV_X64_MSR_TIME_REF_COUNT:
- data = get_time_ref_counter(kvm);
- break;
- case HV_X64_MSR_REFERENCE_TSC:
- data = hv->hv_tsc_page;
- break;
- case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4:
- return kvm_hv_msr_get_crash_data(vcpu,
- msr - HV_X64_MSR_CRASH_P0,
- pdata);
- case HV_X64_MSR_CRASH_CTL:
- return kvm_hv_msr_get_crash_ctl(vcpu, pdata);
- case HV_X64_MSR_RESET:
- data = 0;
- break;
- default:
- vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr);
- return 1;
- }
-
- *pdata = data;
- return 0;
-}
-
-static int kvm_hv_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
-{
- u64 data = 0;
- struct kvm_vcpu_hv *hv = &vcpu->arch.hyperv;
-
- switch (msr) {
- case HV_X64_MSR_VP_INDEX: {
- int r;
- struct kvm_vcpu *v;
-
- kvm_for_each_vcpu(r, v, vcpu->kvm) {
- if (v == vcpu) {
- data = r;
- break;
- }
- }
- break;
- }
- case HV_X64_MSR_EOI:
- return kvm_hv_vapic_msr_read(vcpu, APIC_EOI, pdata);
- case HV_X64_MSR_ICR:
- return kvm_hv_vapic_msr_read(vcpu, APIC_ICR, pdata);
- case HV_X64_MSR_TPR:
- return kvm_hv_vapic_msr_read(vcpu, APIC_TASKPRI, pdata);
- case HV_X64_MSR_APIC_ASSIST_PAGE:
- data = hv->hv_vapic;
- break;
- case HV_X64_MSR_VP_RUNTIME:
- data = current_task_runtime_100ns() + hv->runtime_offset;
- break;
- case HV_X64_MSR_SCONTROL:
- case HV_X64_MSR_SVERSION:
- case HV_X64_MSR_SIEFP:
- case HV_X64_MSR_SIMP:
- case HV_X64_MSR_EOM:
- case HV_X64_MSR_SINT0 ... HV_X64_MSR_SINT15:
- return synic_get_msr(vcpu_to_synic(vcpu), msr, pdata);
- case HV_X64_MSR_STIMER0_CONFIG:
- case HV_X64_MSR_STIMER1_CONFIG:
- case HV_X64_MSR_STIMER2_CONFIG:
- case HV_X64_MSR_STIMER3_CONFIG: {
- int timer_index = (msr - HV_X64_MSR_STIMER0_CONFIG)/2;
-
- return stimer_get_config(vcpu_to_stimer(vcpu, timer_index),
- pdata);
- }
- case HV_X64_MSR_STIMER0_COUNT:
- case HV_X64_MSR_STIMER1_COUNT:
- case HV_X64_MSR_STIMER2_COUNT:
- case HV_X64_MSR_STIMER3_COUNT: {
- int timer_index = (msr - HV_X64_MSR_STIMER0_COUNT)/2;
-
- return stimer_get_count(vcpu_to_stimer(vcpu, timer_index),
- pdata);
- }
- default:
- vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr);
- return 1;
- }
- *pdata = data;
- return 0;
-}
-
-int kvm_hv_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host)
-{
- if (kvm_hv_msr_partition_wide(msr)) {
- int r;
-
- mutex_lock(&vcpu->kvm->lock);
- r = kvm_hv_set_msr_pw(vcpu, msr, data, host);
- mutex_unlock(&vcpu->kvm->lock);
- return r;
- } else
- return kvm_hv_set_msr(vcpu, msr, data, host);
-}
-
-int kvm_hv_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
-{
- if (kvm_hv_msr_partition_wide(msr)) {
- int r;
-
- mutex_lock(&vcpu->kvm->lock);
- r = kvm_hv_get_msr_pw(vcpu, msr, pdata);
- mutex_unlock(&vcpu->kvm->lock);
- return r;
- } else
- return kvm_hv_get_msr(vcpu, msr, pdata);
-}
-
-bool kvm_hv_hypercall_enabled(struct kvm *kvm)
-{
- return kvm->arch.hyperv.hv_hypercall & HV_X64_MSR_HYPERCALL_ENABLE;
-}
-
-static void kvm_hv_hypercall_set_result(struct kvm_vcpu *vcpu, u64 result)
-{
- bool longmode;
-
- longmode = is_64_bit_mode(vcpu);
- if (longmode)
- kvm_register_write(vcpu, VCPU_REGS_RAX, result);
- else {
- kvm_register_write(vcpu, VCPU_REGS_RDX, result >> 32);
- kvm_register_write(vcpu, VCPU_REGS_RAX, result & 0xffffffff);
- }
-}
-
-static int kvm_hv_hypercall_complete_userspace(struct kvm_vcpu *vcpu)
-{
- struct kvm_run *run = vcpu->run;
-
- kvm_hv_hypercall_set_result(vcpu, run->hyperv.u.hcall.result);
- return 1;
-}
-
-int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
-{
- u64 param, ingpa, outgpa, ret;
- uint16_t code, rep_idx, rep_cnt, res = HV_STATUS_SUCCESS, rep_done = 0;
- bool fast, longmode;
-
- /*
- * hypercall generates UD from non zero cpl and real mode
- * per HYPER-V spec
- */
- if (kvm_x86_ops->get_cpl(vcpu) != 0 || !is_protmode(vcpu)) {
- kvm_queue_exception(vcpu, UD_VECTOR);
- return 1;
- }
-
- longmode = is_64_bit_mode(vcpu);
-
- if (!longmode) {
- param = ((u64)kvm_register_read(vcpu, VCPU_REGS_RDX) << 32) |
- (kvm_register_read(vcpu, VCPU_REGS_RAX) & 0xffffffff);
- ingpa = ((u64)kvm_register_read(vcpu, VCPU_REGS_RBX) << 32) |
- (kvm_register_read(vcpu, VCPU_REGS_RCX) & 0xffffffff);
- outgpa = ((u64)kvm_register_read(vcpu, VCPU_REGS_RDI) << 32) |
- (kvm_register_read(vcpu, VCPU_REGS_RSI) & 0xffffffff);
- }
-#ifdef CONFIG_X86_64
- else {
- param = kvm_register_read(vcpu, VCPU_REGS_RCX);
- ingpa = kvm_register_read(vcpu, VCPU_REGS_RDX);
- outgpa = kvm_register_read(vcpu, VCPU_REGS_R8);
- }
-#endif
-
- code = param & 0xffff;
- fast = (param >> 16) & 0x1;
- rep_cnt = (param >> 32) & 0xfff;
- rep_idx = (param >> 48) & 0xfff;
-
- trace_kvm_hv_hypercall(code, fast, rep_cnt, rep_idx, ingpa, outgpa);
-
- /* Hypercall continuation is not supported yet */
- if (rep_cnt || rep_idx) {
- res = HV_STATUS_INVALID_HYPERCALL_CODE;
- goto set_result;
- }
-
- switch (code) {
- case HVCALL_NOTIFY_LONG_SPIN_WAIT:
- kvm_vcpu_on_spin(vcpu);
- break;
- case HVCALL_POST_MESSAGE:
- case HVCALL_SIGNAL_EVENT:
- /* don't bother userspace if it has no way to handle it */
- if (!vcpu_to_synic(vcpu)->active) {
- res = HV_STATUS_INVALID_HYPERCALL_CODE;
- break;
- }
- vcpu->run->exit_reason = KVM_EXIT_HYPERV;
- vcpu->run->hyperv.type = KVM_EXIT_HYPERV_HCALL;
- vcpu->run->hyperv.u.hcall.input = param;
- vcpu->run->hyperv.u.hcall.params[0] = ingpa;
- vcpu->run->hyperv.u.hcall.params[1] = outgpa;
- vcpu->arch.complete_userspace_io =
- kvm_hv_hypercall_complete_userspace;
- return 0;
- default:
- res = HV_STATUS_INVALID_HYPERCALL_CODE;
- break;
- }
-
-set_result:
- ret = res | (((u64)rep_done & 0xfff) << 32);
- kvm_hv_hypercall_set_result(vcpu, ret);
- return 1;
-}
diff --git a/arch/x86/kvm/hyperv.h b/arch/x86/kvm/hyperv.h
deleted file mode 100644
index cd11195..0000000
--- a/arch/x86/kvm/hyperv.h
+++ /dev/null
@@ -1,90 +0,0 @@
-/*
- * KVM Microsoft Hyper-V emulation
- *
- * derived from arch/x86/kvm/x86.c
- *
- * Copyright (C) 2006 Qumranet, Inc.
- * Copyright (C) 2008 Qumranet, Inc.
- * Copyright IBM Corporation, 2008
- * Copyright 2010 Red Hat, Inc. and/or its affiliates.
- * Copyright (C) 2015 Andrey Smetanin <asmetanin@virtuozzo.com>
- *
- * Authors:
- * Avi Kivity <avi@qumranet.com>
- * Yaniv Kamay <yaniv@qumranet.com>
- * Amit Shah <amit.shah@qumranet.com>
- * Ben-Ami Yassour <benami@il.ibm.com>
- * Andrey Smetanin <asmetanin@virtuozzo.com>
- *
- * This work is licensed under the terms of the GNU GPL, version 2. See
- * the COPYING file in the top-level directory.
- *
- */
-
-#ifndef __ARCH_X86_KVM_HYPERV_H__
-#define __ARCH_X86_KVM_HYPERV_H__
-
-static inline struct kvm_vcpu_hv *vcpu_to_hv_vcpu(struct kvm_vcpu *vcpu)
-{
- return &vcpu->arch.hyperv;
-}
-
-static inline struct kvm_vcpu *hv_vcpu_to_vcpu(struct kvm_vcpu_hv *hv_vcpu)
-{
- struct kvm_vcpu_arch *arch;
-
- arch = container_of(hv_vcpu, struct kvm_vcpu_arch, hyperv);
- return container_of(arch, struct kvm_vcpu, arch);
-}
-
-static inline struct kvm_vcpu_hv_synic *vcpu_to_synic(struct kvm_vcpu *vcpu)
-{
- return &vcpu->arch.hyperv.synic;
-}
-
-static inline struct kvm_vcpu *synic_to_vcpu(struct kvm_vcpu_hv_synic *synic)
-{
- return hv_vcpu_to_vcpu(container_of(synic, struct kvm_vcpu_hv, synic));
-}
-
-int kvm_hv_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host);
-int kvm_hv_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata);
-
-bool kvm_hv_hypercall_enabled(struct kvm *kvm);
-int kvm_hv_hypercall(struct kvm_vcpu *vcpu);
-
-void kvm_hv_irq_routing_update(struct kvm *kvm);
-int kvm_hv_synic_set_irq(struct kvm *kvm, u32 vcpu_id, u32 sint);
-void kvm_hv_synic_send_eoi(struct kvm_vcpu *vcpu, int vector);
-int kvm_hv_activate_synic(struct kvm_vcpu *vcpu);
-
-void kvm_hv_vcpu_init(struct kvm_vcpu *vcpu);
-void kvm_hv_vcpu_uninit(struct kvm_vcpu *vcpu);
-
-static inline struct kvm_vcpu_hv_stimer *vcpu_to_stimer(struct kvm_vcpu *vcpu,
- int timer_index)
-{
- return &vcpu_to_hv_vcpu(vcpu)->stimer[timer_index];
-}
-
-static inline struct kvm_vcpu *stimer_to_vcpu(struct kvm_vcpu_hv_stimer *stimer)
-{
- struct kvm_vcpu_hv *hv_vcpu;
-
- hv_vcpu = container_of(stimer - stimer->index, struct kvm_vcpu_hv,
- stimer[0]);
- return hv_vcpu_to_vcpu(hv_vcpu);
-}
-
-static inline bool kvm_hv_has_stimer_pending(struct kvm_vcpu *vcpu)
-{
- return !bitmap_empty(vcpu->arch.hyperv.stimer_pending_bitmap,
- HV_SYNIC_STIMER_COUNT);
-}
-
-void kvm_hv_process_stimers(struct kvm_vcpu *vcpu);
-
-void kvm_hv_setup_tsc_page(struct kvm *kvm,
- struct pvclock_vcpu_time_info *hv_clock);
-
-#endif
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
deleted file mode 100644
index 16a7134..0000000
--- a/arch/x86/kvm/i8254.c
+++ /dev/null
@@ -1,738 +0,0 @@
-/*
- * 8253/8254 interval timer emulation
- *
- * Copyright (c) 2003-2004 Fabrice Bellard
- * Copyright (c) 2006 Intel Corporation
- * Copyright (c) 2007 Keir Fraser, XenSource Inc
- * Copyright (c) 2008 Intel Corporation
- * Copyright 2009 Red Hat, Inc. and/or its affiliates.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- *
- * Authors:
- * Sheng Yang <sheng.yang@intel.com>
- * Based on QEMU and Xen.
- */
-
-#define pr_fmt(fmt) "pit: " fmt
-
-#include <linux/kvm_host.h>
-#include <linux/slab.h>
-
-#include "ioapic.h"
-#include "irq.h"
-#include "i8254.h"
-#include "x86.h"
-
-#ifndef CONFIG_X86_64
-#define mod_64(x, y) ((x) - (y) * div64_u64(x, y))
-#else
-#define mod_64(x, y) ((x) % (y))
-#endif
-
-#define RW_STATE_LSB 1
-#define RW_STATE_MSB 2
-#define RW_STATE_WORD0 3
-#define RW_STATE_WORD1 4
-
-static void pit_set_gate(struct kvm_pit *pit, int channel, u32 val)
-{
- struct kvm_kpit_channel_state *c = &pit->pit_state.channels[channel];
-
- switch (c->mode) {
- default:
- case 0:
- case 4:
- /* XXX: just disable/enable counting */
- break;
- case 1:
- case 2:
- case 3:
- case 5:
- /* Restart counting on rising edge. */
- if (c->gate < val)
- c->count_load_time = ktime_get();
- break;
- }
-
- c->gate = val;
-}
-
-static int pit_get_gate(struct kvm_pit *pit, int channel)
-{
- return pit->pit_state.channels[channel].gate;
-}
-
-static s64 __kpit_elapsed(struct kvm_pit *pit)
-{
- s64 elapsed;
- ktime_t remaining;
- struct kvm_kpit_state *ps = &pit->pit_state;
-
- if (!ps->period)
- return 0;
-
- /*
- * The Counter does not stop when it reaches zero. In
- * Modes 0, 1, 4, and 5 the Counter ``wraps around'' to
- * the highest count, either FFFF hex for binary counting
- * or 9999 for BCD counting, and continues counting.
- * Modes 2 and 3 are periodic; the Counter reloads
- * itself with the initial count and continues counting
- * from there.
- */
- remaining = hrtimer_get_remaining(&ps->timer);
- elapsed = ps->period - ktime_to_ns(remaining);
-
- return elapsed;
-}
-
-static s64 kpit_elapsed(struct kvm_pit *pit, struct kvm_kpit_channel_state *c,
- int channel)
-{
- if (channel == 0)
- return __kpit_elapsed(pit);
-
- return ktime_to_ns(ktime_sub(ktime_get(), c->count_load_time));
-}
-
-static int pit_get_count(struct kvm_pit *pit, int channel)
-{
- struct kvm_kpit_channel_state *c = &pit->pit_state.channels[channel];
- s64 d, t;
- int counter;
-
- t = kpit_elapsed(pit, c, channel);
- d = mul_u64_u32_div(t, KVM_PIT_FREQ, NSEC_PER_SEC);
-
- switch (c->mode) {
- case 0:
- case 1:
- case 4:
- case 5:
- counter = (c->count - d) & 0xffff;
- break;
- case 3:
- /* XXX: may be incorrect for odd counts */
- counter = c->count - (mod_64((2 * d), c->count));
- break;
- default:
- counter = c->count - mod_64(d, c->count);
- break;
- }
- return counter;
-}
-
-static int pit_get_out(struct kvm_pit *pit, int channel)
-{
- struct kvm_kpit_channel_state *c = &pit->pit_state.channels[channel];
- s64 d, t;
- int out;
-
- t = kpit_elapsed(pit, c, channel);
- d = mul_u64_u32_div(t, KVM_PIT_FREQ, NSEC_PER_SEC);
-
- switch (c->mode) {
- default:
- case 0:
- out = (d >= c->count);
- break;
- case 1:
- out = (d < c->count);
- break;
- case 2:
- out = ((mod_64(d, c->count) == 0) && (d != 0));
- break;
- case 3:
- out = (mod_64(d, c->count) < ((c->count + 1) >> 1));
- break;
- case 4:
- case 5:
- out = (d == c->count);
- break;
- }
-
- return out;
-}
-
-static void pit_latch_count(struct kvm_pit *pit, int channel)
-{
- struct kvm_kpit_channel_state *c = &pit->pit_state.channels[channel];
-
- if (!c->count_latched) {
- c->latched_count = pit_get_count(pit, channel);
- c->count_latched = c->rw_mode;
- }
-}
-
-static void pit_latch_status(struct kvm_pit *pit, int channel)
-{
- struct kvm_kpit_channel_state *c = &pit->pit_state.channels[channel];
-
- if (!c->status_latched) {
- /* TODO: Return NULL COUNT (bit 6). */
- c->status = ((pit_get_out(pit, channel) << 7) |
- (c->rw_mode << 4) |
- (c->mode << 1) |
- c->bcd);
- c->status_latched = 1;
- }
-}
-
-static inline struct kvm_pit *pit_state_to_pit(struct kvm_kpit_state *ps)
-{
- return container_of(ps, struct kvm_pit, pit_state);
-}
-
-static void kvm_pit_ack_irq(struct kvm_irq_ack_notifier *kian)
-{
- struct kvm_kpit_state *ps = container_of(kian, struct kvm_kpit_state,
- irq_ack_notifier);
- struct kvm_pit *pit = pit_state_to_pit(ps);
-
- atomic_set(&ps->irq_ack, 1);
- /* irq_ack should be set before pending is read. Order accesses with
- * inc(pending) in pit_timer_fn and xchg(irq_ack, 0) in pit_do_work.
- */
- smp_mb();
- if (atomic_dec_if_positive(&ps->pending) > 0)
- kthread_queue_work(&pit->worker, &pit->expired);
-}
-
-void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu)
-{
- struct kvm_pit *pit = vcpu->kvm->arch.vpit;
- struct hrtimer *timer;
-
- if (!kvm_vcpu_is_bsp(vcpu) || !pit)
- return;
-
- timer = &pit->pit_state.timer;
- mutex_lock(&pit->pit_state.lock);
- if (hrtimer_cancel(timer))
- hrtimer_start_expires(timer, HRTIMER_MODE_ABS);
- mutex_unlock(&pit->pit_state.lock);
-}
-
-static void destroy_pit_timer(struct kvm_pit *pit)
-{
- hrtimer_cancel(&pit->pit_state.timer);
- kthread_flush_work(&pit->expired);
-}
-
-static void pit_do_work(struct kthread_work *work)
-{
- struct kvm_pit *pit = container_of(work, struct kvm_pit, expired);
- struct kvm *kvm = pit->kvm;
- struct kvm_vcpu *vcpu;
- int i;
- struct kvm_kpit_state *ps = &pit->pit_state;
-
- if (atomic_read(&ps->reinject) && !atomic_xchg(&ps->irq_ack, 0))
- return;
-
- kvm_set_irq(kvm, pit->irq_source_id, 0, 1, false);
- kvm_set_irq(kvm, pit->irq_source_id, 0, 0, false);
-
- /*
- * Provides NMI watchdog support via Virtual Wire mode.
- * The route is: PIT -> LVT0 in NMI mode.
- *
- * Note: Our Virtual Wire implementation does not follow
- * the MP specification. We propagate a PIT interrupt to all
- * VCPUs and only when LVT0 is in NMI mode. The interrupt can
- * also be simultaneously delivered through PIC and IOAPIC.
- */
- if (atomic_read(&kvm->arch.vapics_in_nmi_mode) > 0)
- kvm_for_each_vcpu(i, vcpu, kvm)
- kvm_apic_nmi_wd_deliver(vcpu);
-}
-
-static enum hrtimer_restart pit_timer_fn(struct hrtimer *data)
-{
- struct kvm_kpit_state *ps = container_of(data, struct kvm_kpit_state, timer);
- struct kvm_pit *pt = pit_state_to_pit(ps);
-
- if (atomic_read(&ps->reinject))
- atomic_inc(&ps->pending);
-
- kthread_queue_work(&pt->worker, &pt->expired);
-
- if (ps->is_periodic) {
- hrtimer_add_expires_ns(&ps->timer, ps->period);
- return HRTIMER_RESTART;
- } else
- return HRTIMER_NORESTART;
-}
-
-static inline void kvm_pit_reset_reinject(struct kvm_pit *pit)
-{
- atomic_set(&pit->pit_state.pending, 0);
- atomic_set(&pit->pit_state.irq_ack, 1);
-}
-
-void kvm_pit_set_reinject(struct kvm_pit *pit, bool reinject)
-{
- struct kvm_kpit_state *ps = &pit->pit_state;
- struct kvm *kvm = pit->kvm;
-
- if (atomic_read(&ps->reinject) == reinject)
- return;
-
- if (reinject) {
- /* The initial state is preserved while ps->reinject == 0. */
- kvm_pit_reset_reinject(pit);
- kvm_register_irq_ack_notifier(kvm, &ps->irq_ack_notifier);
- kvm_register_irq_mask_notifier(kvm, 0, &pit->mask_notifier);
- } else {
- kvm_unregister_irq_ack_notifier(kvm, &ps->irq_ack_notifier);
- kvm_unregister_irq_mask_notifier(kvm, 0, &pit->mask_notifier);
- }
-
- atomic_set(&ps->reinject, reinject);
-}
-
-static void create_pit_timer(struct kvm_pit *pit, u32 val, int is_period)
-{
- struct kvm_kpit_state *ps = &pit->pit_state;
- struct kvm *kvm = pit->kvm;
- s64 interval;
-
- if (!ioapic_in_kernel(kvm) ||
- ps->flags & KVM_PIT_FLAGS_HPET_LEGACY)
- return;
-
- interval = mul_u64_u32_div(val, NSEC_PER_SEC, KVM_PIT_FREQ);
-
- pr_debug("create pit timer, interval is %llu nsec\n", interval);
-
- /* TODO The new value only affected after the retriggered */
- hrtimer_cancel(&ps->timer);
- kthread_flush_work(&pit->expired);
- ps->period = interval;
- ps->is_periodic = is_period;
-
- kvm_pit_reset_reinject(pit);
-
- /*
- * Do not allow the guest to program periodic timers with small
- * interval, since the hrtimers are not throttled by the host
- * scheduler.
- */
- if (ps->is_periodic) {
- s64 min_period = min_timer_period_us * 1000LL;
-
- if (ps->period < min_period) {
- pr_info_ratelimited(
- "kvm: requested %lld ns "
- "i8254 timer period limited to %lld ns\n",
- ps->period, min_period);
- ps->period = min_period;
- }
- }
-
- hrtimer_start(&ps->timer, ktime_add_ns(ktime_get(), interval),
- HRTIMER_MODE_ABS);
-}
-
-static void pit_load_count(struct kvm_pit *pit, int channel, u32 val)
-{
- struct kvm_kpit_state *ps = &pit->pit_state;
-
- pr_debug("load_count val is %d, channel is %d\n", val, channel);
-
- /*
- * The largest possible initial count is 0; this is equivalent
- * to 216 for binary counting and 104 for BCD counting.
- */
- if (val == 0)
- val = 0x10000;
-
- ps->channels[channel].count = val;
-
- if (channel != 0) {
- ps->channels[channel].count_load_time = ktime_get();
- return;
- }
-
- /* Two types of timer
- * mode 1 is one shot, mode 2 is period, otherwise del timer */
- switch (ps->channels[0].mode) {
- case 0:
- case 1:
- /* FIXME: enhance mode 4 precision */
- case 4:
- create_pit_timer(pit, val, 0);
- break;
- case 2:
- case 3:
- create_pit_timer(pit, val, 1);
- break;
- default:
- destroy_pit_timer(pit);
- }
-}
-
-void kvm_pit_load_count(struct kvm_pit *pit, int channel, u32 val,
- int hpet_legacy_start)
-{
- u8 saved_mode;
-
- WARN_ON_ONCE(!mutex_is_locked(&pit->pit_state.lock));
-
- if (hpet_legacy_start) {
- /* save existing mode for later reenablement */
- WARN_ON(channel != 0);
- saved_mode = pit->pit_state.channels[0].mode;
- pit->pit_state.channels[0].mode = 0xff; /* disable timer */
- pit_load_count(pit, channel, val);
- pit->pit_state.channels[0].mode = saved_mode;
- } else {
- pit_load_count(pit, channel, val);
- }
-}
-
-static inline struct kvm_pit *dev_to_pit(struct kvm_io_device *dev)
-{
- return container_of(dev, struct kvm_pit, dev);
-}
-
-static inline struct kvm_pit *speaker_to_pit(struct kvm_io_device *dev)
-{
- return container_of(dev, struct kvm_pit, speaker_dev);
-}
-
-static inline int pit_in_range(gpa_t addr)
-{
- return ((addr >= KVM_PIT_BASE_ADDRESS) &&
- (addr < KVM_PIT_BASE_ADDRESS + KVM_PIT_MEM_LENGTH));
-}
-
-static int pit_ioport_write(struct kvm_vcpu *vcpu,
- struct kvm_io_device *this,
- gpa_t addr, int len, const void *data)
-{
- struct kvm_pit *pit = dev_to_pit(this);
- struct kvm_kpit_state *pit_state = &pit->pit_state;
- int channel, access;
- struct kvm_kpit_channel_state *s;
- u32 val = *(u32 *) data;
- if (!pit_in_range(addr))
- return -EOPNOTSUPP;
-
- val &= 0xff;
- addr &= KVM_PIT_CHANNEL_MASK;
-
- mutex_lock(&pit_state->lock);
-
- if (val != 0)
- pr_debug("write addr is 0x%x, len is %d, val is 0x%x\n",
- (unsigned int)addr, len, val);
-
- if (addr == 3) {
- channel = val >> 6;
- if (channel == 3) {
- /* Read-Back Command. */
- for (channel = 0; channel < 3; channel++) {
- s = &pit_state->channels[channel];
- if (val & (2 << channel)) {
- if (!(val & 0x20))
- pit_latch_count(pit, channel);
- if (!(val & 0x10))
- pit_latch_status(pit, channel);
- }
- }
- } else {
- /* Select Counter <channel>. */
- s = &pit_state->channels[channel];
- access = (val >> 4) & KVM_PIT_CHANNEL_MASK;
- if (access == 0) {
- pit_latch_count(pit, channel);
- } else {
- s->rw_mode = access;
- s->read_state = access;
- s->write_state = access;
- s->mode = (val >> 1) & 7;
- if (s->mode > 5)
- s->mode -= 4;
- s->bcd = val & 1;
- }
- }
- } else {
- /* Write Count. */
- s = &pit_state->channels[addr];
- switch (s->write_state) {
- default:
- case RW_STATE_LSB:
- pit_load_count(pit, addr, val);
- break;
- case RW_STATE_MSB:
- pit_load_count(pit, addr, val << 8);
- break;
- case RW_STATE_WORD0:
- s->write_latch = val;
- s->write_state = RW_STATE_WORD1;
- break;
- case RW_STATE_WORD1:
- pit_load_count(pit, addr, s->write_latch | (val << 8));
- s->write_state = RW_STATE_WORD0;
- break;
- }
- }
-
- mutex_unlock(&pit_state->lock);
- return 0;
-}
-
-static int pit_ioport_read(struct kvm_vcpu *vcpu,
- struct kvm_io_device *this,
- gpa_t addr, int len, void *data)
-{
- struct kvm_pit *pit = dev_to_pit(this);
- struct kvm_kpit_state *pit_state = &pit->pit_state;
- int ret, count;
- struct kvm_kpit_channel_state *s;
- if (!pit_in_range(addr))
- return -EOPNOTSUPP;
-
- addr &= KVM_PIT_CHANNEL_MASK;
- if (addr == 3)
- return 0;
-
- s = &pit_state->channels[addr];
-
- mutex_lock(&pit_state->lock);
-
- if (s->status_latched) {
- s->status_latched = 0;
- ret = s->status;
- } else if (s->count_latched) {
- switch (s->count_latched) {
- default:
- case RW_STATE_LSB:
- ret = s->latched_count & 0xff;
- s->count_latched = 0;
- break;
- case RW_STATE_MSB:
- ret = s->latched_count >> 8;
- s->count_latched = 0;
- break;
- case RW_STATE_WORD0:
- ret = s->latched_count & 0xff;
- s->count_latched = RW_STATE_MSB;
- break;
- }
- } else {
- switch (s->read_state) {
- default:
- case RW_STATE_LSB:
- count = pit_get_count(pit, addr);
- ret = count & 0xff;
- break;
- case RW_STATE_MSB:
- count = pit_get_count(pit, addr);
- ret = (count >> 8) & 0xff;
- break;
- case RW_STATE_WORD0:
- count = pit_get_count(pit, addr);
- ret = count & 0xff;
- s->read_state = RW_STATE_WORD1;
- break;
- case RW_STATE_WORD1:
- count = pit_get_count(pit, addr);
- ret = (count >> 8) & 0xff;
- s->read_state = RW_STATE_WORD0;
- break;
- }
- }
-
- if (len > sizeof(ret))
- len = sizeof(ret);
- memcpy(data, (char *)&ret, len);
-
- mutex_unlock(&pit_state->lock);
- return 0;
-}
-
-static int speaker_ioport_write(struct kvm_vcpu *vcpu,
- struct kvm_io_device *this,
- gpa_t addr, int len, const void *data)
-{
- struct kvm_pit *pit = speaker_to_pit(this);
- struct kvm_kpit_state *pit_state = &pit->pit_state;
- u32 val = *(u32 *) data;
- if (addr != KVM_SPEAKER_BASE_ADDRESS)
- return -EOPNOTSUPP;
-
- mutex_lock(&pit_state->lock);
- pit_state->speaker_data_on = (val >> 1) & 1;
- pit_set_gate(pit, 2, val & 1);
- mutex_unlock(&pit_state->lock);
- return 0;
-}
-
-static int speaker_ioport_read(struct kvm_vcpu *vcpu,
- struct kvm_io_device *this,
- gpa_t addr, int len, void *data)
-{
- struct kvm_pit *pit = speaker_to_pit(this);
- struct kvm_kpit_state *pit_state = &pit->pit_state;
- unsigned int refresh_clock;
- int ret;
- if (addr != KVM_SPEAKER_BASE_ADDRESS)
- return -EOPNOTSUPP;
-
- /* Refresh clock toggles at about 15us. We approximate as 2^14ns. */
- refresh_clock = ((unsigned int)ktime_to_ns(ktime_get()) >> 14) & 1;
-
- mutex_lock(&pit_state->lock);
- ret = ((pit_state->speaker_data_on << 1) | pit_get_gate(pit, 2) |
- (pit_get_out(pit, 2) << 5) | (refresh_clock << 4));
- if (len > sizeof(ret))
- len = sizeof(ret);
- memcpy(data, (char *)&ret, len);
- mutex_unlock(&pit_state->lock);
- return 0;
-}
-
-static void kvm_pit_reset(struct kvm_pit *pit)
-{
- int i;
- struct kvm_kpit_channel_state *c;
-
- pit->pit_state.flags = 0;
- for (i = 0; i < 3; i++) {
- c = &pit->pit_state.channels[i];
- c->mode = 0xff;
- c->gate = (i != 2);
- pit_load_count(pit, i, 0);
- }
-
- kvm_pit_reset_reinject(pit);
-}
-
-static void pit_mask_notifer(struct kvm_irq_mask_notifier *kimn, bool mask)
-{
- struct kvm_pit *pit = container_of(kimn, struct kvm_pit, mask_notifier);
-
- if (!mask)
- kvm_pit_reset_reinject(pit);
-}
-
-static const struct kvm_io_device_ops pit_dev_ops = {
- .read = pit_ioport_read,
- .write = pit_ioport_write,
-};
-
-static const struct kvm_io_device_ops speaker_dev_ops = {
- .read = speaker_ioport_read,
- .write = speaker_ioport_write,
-};
-
-struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags)
-{
- struct kvm_pit *pit;
- struct kvm_kpit_state *pit_state;
- struct pid *pid;
- pid_t pid_nr;
- int ret;
-
- pit = kzalloc(sizeof(struct kvm_pit), GFP_KERNEL);
- if (!pit)
- return NULL;
-
- pit->irq_source_id = kvm_request_irq_source_id(kvm);
- if (pit->irq_source_id < 0)
- goto fail_request;
-
- mutex_init(&pit->pit_state.lock);
-
- pid = get_pid(task_tgid(current));
- pid_nr = pid_vnr(pid);
- put_pid(pid);
-
- kthread_init_worker(&pit->worker);
- pit->worker_task = kthread_run(kthread_worker_fn, &pit->worker,
- "kvm-pit/%d", pid_nr);
- if (IS_ERR(pit->worker_task))
- goto fail_kthread;
-
- kthread_init_work(&pit->expired, pit_do_work);
-
- pit->kvm = kvm;
-
- pit_state = &pit->pit_state;
- hrtimer_init(&pit_state->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
- pit_state->timer.function = pit_timer_fn;
-
- pit_state->irq_ack_notifier.gsi = 0;
- pit_state->irq_ack_notifier.irq_acked = kvm_pit_ack_irq;
- pit->mask_notifier.func = pit_mask_notifer;
-
- kvm_pit_reset(pit);
-
- kvm_pit_set_reinject(pit, true);
-
- mutex_lock(&kvm->slots_lock);
- kvm_iodevice_init(&pit->dev, &pit_dev_ops);
- ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS, KVM_PIT_BASE_ADDRESS,
- KVM_PIT_MEM_LENGTH, &pit->dev);
- if (ret < 0)
- goto fail_register_pit;
-
- if (flags & KVM_PIT_SPEAKER_DUMMY) {
- kvm_iodevice_init(&pit->speaker_dev, &speaker_dev_ops);
- ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS,
- KVM_SPEAKER_BASE_ADDRESS, 4,
- &pit->speaker_dev);
- if (ret < 0)
- goto fail_register_speaker;
- }
- mutex_unlock(&kvm->slots_lock);
-
- return pit;
-
-fail_register_speaker:
- kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &pit->dev);
-fail_register_pit:
- mutex_unlock(&kvm->slots_lock);
- kvm_pit_set_reinject(pit, false);
- kthread_stop(pit->worker_task);
-fail_kthread:
- kvm_free_irq_source_id(kvm, pit->irq_source_id);
-fail_request:
- kfree(pit);
- return NULL;
-}
-
-void kvm_free_pit(struct kvm *kvm)
-{
- struct kvm_pit *pit = kvm->arch.vpit;
-
- if (pit) {
- kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &pit->dev);
- kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &pit->speaker_dev);
- kvm_pit_set_reinject(pit, false);
- hrtimer_cancel(&pit->pit_state.timer);
- kthread_flush_work(&pit->expired);
- kthread_stop(pit->worker_task);
- kvm_free_irq_source_id(kvm, pit->irq_source_id);
- kfree(pit);
- }
-}
diff --git a/arch/x86/kvm/i8254.h b/arch/x86/kvm/i8254.h
deleted file mode 100644
index 2f5af07..0000000
--- a/arch/x86/kvm/i8254.h
+++ /dev/null
@@ -1,66 +0,0 @@
-#ifndef __I8254_H
-#define __I8254_H
-
-#include <linux/kthread.h>
-
-#include <kvm/iodev.h>
-
-struct kvm_kpit_channel_state {
- u32 count; /* can be 65536 */
- u16 latched_count;
- u8 count_latched;
- u8 status_latched;
- u8 status;
- u8 read_state;
- u8 write_state;
- u8 write_latch;
- u8 rw_mode;
- u8 mode;
- u8 bcd; /* not supported */
- u8 gate; /* timer start */
- ktime_t count_load_time;
-};
-
-struct kvm_kpit_state {
- /* All members before "struct mutex lock" are protected by the lock. */
- struct kvm_kpit_channel_state channels[3];
- u32 flags;
- bool is_periodic;
- s64 period; /* unit: ns */
- struct hrtimer timer;
- u32 speaker_data_on;
-
- struct mutex lock;
- atomic_t reinject;
- atomic_t pending; /* accumulated triggered timers */
- atomic_t irq_ack;
- struct kvm_irq_ack_notifier irq_ack_notifier;
-};
-
-struct kvm_pit {
- struct kvm_io_device dev;
- struct kvm_io_device speaker_dev;
- struct kvm *kvm;
- struct kvm_kpit_state pit_state;
- int irq_source_id;
- struct kvm_irq_mask_notifier mask_notifier;
- struct kthread_worker worker;
- struct task_struct *worker_task;
- struct kthread_work expired;
-};
-
-#define KVM_PIT_BASE_ADDRESS 0x40
-#define KVM_SPEAKER_BASE_ADDRESS 0x61
-#define KVM_PIT_MEM_LENGTH 4
-#define KVM_PIT_FREQ 1193181
-#define KVM_MAX_PIT_INTR_INTERVAL HZ / 100
-#define KVM_PIT_CHANNEL_MASK 0x3
-
-struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags);
-void kvm_free_pit(struct kvm *kvm);
-
-void kvm_pit_load_count(struct kvm_pit *pit, int channel, u32 val,
- int hpet_legacy_start);
-void kvm_pit_set_reinject(struct kvm_pit *pit, bool reinject);
-
-#endif
diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c
index 7cc2360..c178239 100644..100755
--- a/arch/x86/kvm/i8259.c
+++ b/arch/x86/kvm/i8259.c
@@ -4,6 +4,7 @@
* Copyright (c) 2003-2004 Fabrice Bellard
* Copyright (c) 2007 Intel Corporation
* Copyright 2009 Red Hat, Inc. and/or its affiliates.
+ * Copyright 2019 Google LLC
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
@@ -26,13 +27,11 @@
* Yaozu (Eddie) Dong <Eddie.dong@intel.com>
* Port from Qemu.
*/
-#include <linux/mm.h>
-#include <linux/slab.h>
-#include <linux/bitops.h>
#include "irq.h"
-
#include <linux/kvm_host.h>
-#include "trace.h"
+
+#include <ntddk.h>
+#include <gvm_types.h>
#define pr_pic_unimpl(fmt, ...) \
pr_err_ratelimited("kvm: pic: " fmt, ## __VA_ARGS__)
@@ -40,13 +39,11 @@
static void pic_irq_request(struct kvm *kvm, int level);
static void pic_lock(struct kvm_pic *s)
- __acquires(&s->lock)
{
spin_lock(&s->lock);
}
static void pic_unlock(struct kvm_pic *s)
- __releases(&s->lock)
{
bool wakeup = s->wakeup_needed;
struct kvm_vcpu *vcpu, *found = NULL;
@@ -67,7 +64,7 @@ static void pic_unlock(struct kvm_pic *s)
if (!found)
return;
- kvm_make_request(KVM_REQ_EVENT, found);
+ kvm_make_request(GVM_REQ_EVENT, found);
kvm_vcpu_kick(found);
}
}
@@ -84,7 +81,7 @@ static void pic_clear_isr(struct kvm_kpic_state *s, int irq)
* it should be safe since PIC state is already updated at this stage.
*/
pic_unlock(s->pics_state);
- kvm_notify_acked_irq(s->pics_state->kvm, SELECT_PIC(irq), irq);
+ //kvm_notify_acked_irq(s->pics_state->kvm, SELECT_PIC(irq), irq);
pic_lock(s->pics_state);
}
@@ -199,8 +196,6 @@ int kvm_pic_set_irq(struct kvm_pic *s, int irq, int irq_source_id, int level)
irq_source_id, level);
ret = pic_set_irq1(&s->pics[irq >> 3], irq & 7, irq_level);
pic_update_irq(s);
- trace_kvm_pic_set_irq(irq >> 3, irq & 7, s->pics[irq >> 3].elcr,
- s->pics[irq >> 3].imr, ret == 0);
pic_unlock(s);
return ret;
@@ -620,16 +615,16 @@ struct kvm_pic *kvm_create_pic(struct kvm *kvm)
kvm_iodevice_init(&s->dev_slave, &picdev_slave_ops);
kvm_iodevice_init(&s->dev_eclr, &picdev_eclr_ops);
mutex_lock(&kvm->slots_lock);
- ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS, 0x20, 2,
+ ret = kvm_io_bus_register_dev(kvm, GVM_PIO_BUS, 0x20, 2,
&s->dev_master);
if (ret < 0)
goto fail_unlock;
- ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS, 0xa0, 2, &s->dev_slave);
+ ret = kvm_io_bus_register_dev(kvm, GVM_PIO_BUS, 0xa0, 2, &s->dev_slave);
if (ret < 0)
goto fail_unreg_2;
- ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS, 0x4d0, 2, &s->dev_eclr);
+ ret = kvm_io_bus_register_dev(kvm, GVM_PIO_BUS, 0x4d0, 2, &s->dev_eclr);
if (ret < 0)
goto fail_unreg_1;
@@ -638,10 +633,10 @@ struct kvm_pic *kvm_create_pic(struct kvm *kvm)
return s;
fail_unreg_1:
- kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &s->dev_slave);
+ kvm_io_bus_unregister_dev(kvm, GVM_PIO_BUS, &s->dev_slave);
fail_unreg_2:
- kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &s->dev_master);
+ kvm_io_bus_unregister_dev(kvm, GVM_PIO_BUS, &s->dev_master);
fail_unlock:
mutex_unlock(&kvm->slots_lock);
@@ -653,8 +648,8 @@ fail_unlock:
void kvm_destroy_pic(struct kvm_pic *vpic)
{
- kvm_io_bus_unregister_dev(vpic->kvm, KVM_PIO_BUS, &vpic->dev_master);
- kvm_io_bus_unregister_dev(vpic->kvm, KVM_PIO_BUS, &vpic->dev_slave);
- kvm_io_bus_unregister_dev(vpic->kvm, KVM_PIO_BUS, &vpic->dev_eclr);
+ kvm_io_bus_unregister_dev(vpic->kvm, GVM_PIO_BUS, &vpic->dev_master);
+ kvm_io_bus_unregister_dev(vpic->kvm, GVM_PIO_BUS, &vpic->dev_slave);
+ kvm_io_bus_unregister_dev(vpic->kvm, GVM_PIO_BUS, &vpic->dev_eclr);
kfree(vpic);
}
diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c
index 6e219e5..4e2c62b 100644..100755
--- a/arch/x86/kvm/ioapic.c
+++ b/arch/x86/kvm/ioapic.c
@@ -1,6 +1,7 @@
/*
* Copyright (C) 2001 MandrakeSoft S.A.
* Copyright 2010 Red Hat, Inc. and/or its affiliates.
+ * Copyright 2019 Google LLC
*
* MandrakeSoft S.A.
* 43, rue d'Aboukir
@@ -28,36 +29,26 @@
*/
#include <linux/kvm_host.h>
-#include <linux/kvm.h>
-#include <linux/mm.h>
-#include <linux/highmem.h>
-#include <linux/smp.h>
-#include <linux/hrtimer.h>
-#include <linux/io.h>
-#include <linux/slab.h>
-#include <linux/export.h>
-#include <asm/processor.h>
-#include <asm/page.h>
-#include <asm/current.h>
-#include <trace/events/kvm.h>
+#include <uapi/linux/kvm.h>
#include "ioapic.h"
#include "lapic.h"
#include "irq.h"
-#if 0
-#define ioapic_debug(fmt,arg...) printk(KERN_WARNING fmt,##arg)
+#ifdef KVM_IOAPIC_DEBUG
+#define ioapic_debug DbgPrint
#else
-#define ioapic_debug(fmt, arg...)
+#define ioapic_debug(fmt,...)
#endif
+
static int ioapic_service(struct kvm_ioapic *vioapic, int irq,
bool line_status);
-static unsigned long ioapic_read_indirect(struct kvm_ioapic *ioapic,
- unsigned long addr,
- unsigned long length)
+static size_t ioapic_read_indirect(struct kvm_ioapic *ioapic,
+ size_t addr,
+ size_t length)
{
- unsigned long result = 0;
+ size_t result = 0;
switch (ioapic->ioregsel) {
case IOAPIC_REG_VERSION:
@@ -94,7 +85,7 @@ static unsigned long ioapic_read_indirect(struct kvm_ioapic *ioapic,
static void rtc_irq_eoi_tracking_reset(struct kvm_ioapic *ioapic)
{
ioapic->rtc_status.pending_eoi = 0;
- bitmap_zero(ioapic->rtc_status.dest_map.map, KVM_MAX_VCPU_ID);
+ bitmap_zero(ioapic->rtc_status.dest_map.map, GVM_MAX_VCPU_ID);
}
static void kvm_rtc_eoi_tracking_restore_all(struct kvm_ioapic *ioapic);
@@ -148,9 +139,6 @@ static void kvm_rtc_eoi_tracking_restore_all(struct kvm_ioapic *ioapic)
struct kvm_vcpu *vcpu;
int i;
- if (RTC_GSI >= IOAPIC_NUM_PINS)
- return;
-
rtc_irq_eoi_tracking_reset(ioapic);
kvm_for_each_vcpu(i, vcpu, ioapic->kvm)
__rtc_irq_eoi_tracking_restore_one(vcpu);
@@ -220,11 +208,10 @@ static int ioapic_set_irq(struct kvm_ioapic *ioapic, unsigned int irq,
ret = ioapic_service(ioapic, irq, line_status);
out:
- trace_kvm_ioapic_set_irq(entry.bits, irq, ret == 0);
return ret;
}
-static void kvm_ioapic_inject_all(struct kvm_ioapic *ioapic, unsigned long irr)
+static void kvm_ioapic_inject_all(struct kvm_ioapic *ioapic, size_t irr)
{
u32 idx;
@@ -253,7 +240,7 @@ void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, ulong *ioapic_handled_vectors)
for (index = 0; index < IOAPIC_NUM_PINS; index++) {
e = &ioapic->redirtbl[index];
if (e->fields.trig_mode == IOAPIC_LEVEL_TRIG ||
- kvm_irq_has_notifier(ioapic->kvm, KVM_IRQCHIP_IOAPIC, index) ||
+ //kvm_irq_has_notifier(ioapic->kvm, GVM_IRQCHIP_IOAPIC, index) ||
index == RTC_GSI) {
if (kvm_apic_match_dest(vcpu, NULL, 0,
e->fields.dest_id, e->fields.dest_mode) ||
@@ -311,7 +298,7 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
}
mask_after = e->fields.mask;
if (mask_before != mask_after)
- kvm_fire_mask_notifiers(ioapic->kvm, KVM_IRQCHIP_IOAPIC, index, mask_after);
+ kvm_fire_mask_notifiers(ioapic->kvm, GVM_IRQCHIP_IOAPIC, index, mask_after);
if (e->fields.trig_mode == IOAPIC_LEVEL_TRIG
&& ioapic->irr & (1 << index))
ioapic_service(ioapic, index, false);
@@ -389,31 +376,11 @@ void kvm_ioapic_clear_all(struct kvm_ioapic *ioapic, int irq_source_id)
int i;
spin_lock(&ioapic->lock);
- for (i = 0; i < KVM_IOAPIC_NUM_PINS; i++)
+ for (i = 0; i < GVM_IOAPIC_NUM_PINS; i++)
__clear_bit(irq_source_id, &ioapic->irq_states[i]);
spin_unlock(&ioapic->lock);
}
-static void kvm_ioapic_eoi_inject_work(struct work_struct *work)
-{
- int i;
- struct kvm_ioapic *ioapic = container_of(work, struct kvm_ioapic,
- eoi_inject.work);
- spin_lock(&ioapic->lock);
- for (i = 0; i < IOAPIC_NUM_PINS; i++) {
- union kvm_ioapic_redirect_entry *ent = &ioapic->redirtbl[i];
-
- if (ent->fields.trig_mode != IOAPIC_LEVEL_TRIG)
- continue;
-
- if (ioapic->irr & (1 << i) && !ent->fields.remote_irr)
- ioapic_service(ioapic, i, false);
- }
- spin_unlock(&ioapic->lock);
-}
-
-#define IOAPIC_SUCCESSIVE_IRQ_MAX_COUNT 10000
-
static void __kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu,
struct kvm_ioapic *ioapic, int vector, int trigger_mode)
{
@@ -441,7 +408,7 @@ static void __kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu,
* after ack notifier returns.
*/
spin_unlock(&ioapic->lock);
- kvm_notify_acked_irq(ioapic->kvm, KVM_IRQCHIP_IOAPIC, i);
+ //kvm_notify_acked_irq(ioapic->kvm, GVM_IRQCHIP_IOAPIC, i);
spin_lock(&ioapic->lock);
if (trigger_mode != IOAPIC_LEVEL_TRIG ||
@@ -452,21 +419,7 @@ static void __kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu,
ent->fields.remote_irr = 0;
if (!ent->fields.mask && (ioapic->irr & (1 << i))) {
++ioapic->irq_eoi[i];
- if (ioapic->irq_eoi[i] == IOAPIC_SUCCESSIVE_IRQ_MAX_COUNT) {
- /*
- * Real hardware does not deliver the interrupt
- * immediately during eoi broadcast, and this
- * lets a buggy guest make slow progress
- * even if it does not correctly handle a
- * level-triggered interrupt. Emulate this
- * behavior if we detect an interrupt storm.
- */
- schedule_delayed_work(&ioapic->eoi_inject, HZ / 100);
- ioapic->irq_eoi[i] = 0;
- trace_kvm_ioapic_delayed_eoi_inj(ent->bits);
- } else {
- ioapic_service(ioapic, i, false);
- }
+ ioapic_service(ioapic, i, false);
} else {
ioapic->irq_eoi[i] = 0;
}
@@ -501,7 +454,7 @@ static int ioapic_mmio_read(struct kvm_vcpu *vcpu, struct kvm_io_device *this,
if (!ioapic_in_range(ioapic, addr))
return -EOPNOTSUPP;
- ioapic_debug("addr %lx\n", (unsigned long)addr);
+ ioapic_debug("addr %lx\n", (size_t)addr);
ASSERT(!(addr & 0xf)); /* check alignment */
addr &= 0xff;
@@ -586,7 +539,6 @@ static void kvm_ioapic_reset(struct kvm_ioapic *ioapic)
{
int i;
- cancel_delayed_work_sync(&ioapic->eoi_inject);
for (i = 0; i < IOAPIC_NUM_PINS; i++)
ioapic->redirtbl[i].fields.mask = 1;
ioapic->base_address = IOAPIC_DEFAULT_BASE_ADDRESS;
@@ -612,13 +564,12 @@ int kvm_ioapic_init(struct kvm *kvm)
if (!ioapic)
return -ENOMEM;
spin_lock_init(&ioapic->lock);
- INIT_DELAYED_WORK(&ioapic->eoi_inject, kvm_ioapic_eoi_inject_work);
kvm->arch.vioapic = ioapic;
kvm_ioapic_reset(ioapic);
kvm_iodevice_init(&ioapic->dev, &ioapic_mmio_ops);
ioapic->kvm = kvm;
mutex_lock(&kvm->slots_lock);
- ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, ioapic->base_address,
+ ret = kvm_io_bus_register_dev(kvm, GVM_MMIO_BUS, ioapic->base_address,
IOAPIC_MEM_LENGTH, &ioapic->dev);
mutex_unlock(&kvm->slots_lock);
if (ret < 0) {
@@ -635,8 +586,7 @@ void kvm_ioapic_destroy(struct kvm *kvm)
{
struct kvm_ioapic *ioapic = kvm->arch.vioapic;
- cancel_delayed_work_sync(&ioapic->eoi_inject);
- kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, &ioapic->dev);
+ kvm_io_bus_unregister_dev(kvm, GVM_MMIO_BUS, &ioapic->dev);
kvm->arch.vioapic = NULL;
kfree(ioapic);
}
diff --git a/arch/x86/kvm/ioapic.h b/arch/x86/kvm/ioapic.h
index 1cc6e54..854f770 100644..100755
--- a/arch/x86/kvm/ioapic.h
+++ b/arch/x86/kvm/ioapic.h
@@ -1,3 +1,7 @@
+/*
+ * Copyright 2019 Google LLC
+ */
+
#ifndef __KVM_IO_APIC_H
#define __KVM_IO_APIC_H
@@ -5,11 +9,13 @@
#include <kvm/iodev.h>
+#include <gvm_types.h>
+
struct kvm;
struct kvm_vcpu;
-#define IOAPIC_NUM_PINS KVM_IOAPIC_NUM_PINS
-#define MAX_NR_RESERVED_IOAPIC_PINS KVM_MAX_IRQ_ROUTES
+#define IOAPIC_NUM_PINS GVM_IOAPIC_NUM_PINS
+#define MAX_NR_RESERVED_IOAPIC_PINS GVM_MAX_IRQ_ROUTES
#define IOAPIC_VERSION_ID 0x11 /* IOAPIC version */
#define IOAPIC_EDGE_TRIG 0
#define IOAPIC_LEVEL_TRIG 1
@@ -34,21 +40,17 @@ struct kvm_vcpu;
#define IOAPIC_INIT 0x5
#define IOAPIC_EXTINT 0x7
-#ifdef CONFIG_X86
#define RTC_GSI 8
-#else
-#define RTC_GSI -1U
-#endif
struct dest_map {
/* vcpu bitmap where IRQ has been sent */
- DECLARE_BITMAP(map, KVM_MAX_VCPU_ID);
+ DECLARE_BITMAP(map, GVM_MAX_VCPU_ID);
/*
* Vector sent to a given vcpu, only valid when
* the vcpu's bit in map is set
*/
- u8 vectors[KVM_MAX_VCPU_ID];
+ u8 vectors[GVM_MAX_VCPU_ID];
};
@@ -81,30 +83,16 @@ struct kvm_ioapic {
u32 irr;
u32 pad;
union kvm_ioapic_redirect_entry redirtbl[IOAPIC_NUM_PINS];
- unsigned long irq_states[IOAPIC_NUM_PINS];
+ size_t irq_states[IOAPIC_NUM_PINS];
struct kvm_io_device dev;
struct kvm *kvm;
void (*ack_notifier)(void *opaque, int irq);
spinlock_t lock;
struct rtc_status rtc_status;
- struct delayed_work eoi_inject;
u32 irq_eoi[IOAPIC_NUM_PINS];
u32 irr_delivered;
};
-#ifdef DEBUG
-#define ASSERT(x) \
-do { \
- if (!(x)) { \
- printk(KERN_EMERG "assertion failed %s: %d: %s\n", \
- __FILE__, __LINE__, #x); \
- BUG(); \
- } \
-} while (0)
-#else
-#define ASSERT(x) do { } while (0)
-#endif
-
static inline struct kvm_ioapic *ioapic_irqchip(struct kvm *kvm)
{
return kvm->arch.vioapic;
diff --git a/arch/x86/kvm/iommu.c b/arch/x86/kvm/iommu.c
deleted file mode 100644
index b181426..0000000
--- a/arch/x86/kvm/iommu.c
+++ /dev/null
@@ -1,356 +0,0 @@
-/*
- * Copyright (c) 2006, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
- * Copyright (C) 2006-2008 Intel Corporation
- * Copyright IBM Corporation, 2008
- * Copyright 2010 Red Hat, Inc. and/or its affiliates.
- *
- * Author: Allen M. Kay <allen.m.kay@intel.com>
- * Author: Weidong Han <weidong.han@intel.com>
- * Author: Ben-Ami Yassour <benami@il.ibm.com>
- */
-
-#include <linux/list.h>
-#include <linux/kvm_host.h>
-#include <linux/moduleparam.h>
-#include <linux/pci.h>
-#include <linux/stat.h>
-#include <linux/iommu.h>
-#include "assigned-dev.h"
-
-static bool allow_unsafe_assigned_interrupts;
-module_param_named(allow_unsafe_assigned_interrupts,
- allow_unsafe_assigned_interrupts, bool, S_IRUGO | S_IWUSR);
-MODULE_PARM_DESC(allow_unsafe_assigned_interrupts,
- "Enable device assignment on platforms without interrupt remapping support.");
-
-static int kvm_iommu_unmap_memslots(struct kvm *kvm);
-static void kvm_iommu_put_pages(struct kvm *kvm,
- gfn_t base_gfn, unsigned long npages);
-
-static kvm_pfn_t kvm_pin_pages(struct kvm_memory_slot *slot, gfn_t gfn,
- unsigned long npages)
-{
- gfn_t end_gfn;
- kvm_pfn_t pfn;
-
- pfn = gfn_to_pfn_memslot(slot, gfn);
- end_gfn = gfn + npages;
- gfn += 1;
-
- if (is_error_noslot_pfn(pfn))
- return pfn;
-
- while (gfn < end_gfn)
- gfn_to_pfn_memslot(slot, gfn++);
-
- return pfn;
-}
-
-static void kvm_unpin_pages(struct kvm *kvm, kvm_pfn_t pfn,
- unsigned long npages)
-{
- unsigned long i;
-
- for (i = 0; i < npages; ++i)
- kvm_release_pfn_clean(pfn + i);
-}
-
-int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot)
-{
- gfn_t gfn, end_gfn;
- kvm_pfn_t pfn;
- int r = 0;
- struct iommu_domain *domain = kvm->arch.iommu_domain;
- int flags;
-
- /* check if iommu exists and in use */
- if (!domain)
- return 0;
-
- gfn = slot->base_gfn;
- end_gfn = gfn + slot->npages;
-
- flags = IOMMU_READ;
- if (!(slot->flags & KVM_MEM_READONLY))
- flags |= IOMMU_WRITE;
- if (!kvm->arch.iommu_noncoherent)
- flags |= IOMMU_CACHE;
-
-
- while (gfn < end_gfn) {
- unsigned long page_size;
-
- /* Check if already mapped */
- if (iommu_iova_to_phys(domain, gfn_to_gpa(gfn))) {
- gfn += 1;
- continue;
- }
-
- /* Get the page size we could use to map */
- page_size = kvm_host_page_size(kvm, gfn);
-
- /* Make sure the page_size does not exceed the memslot */
- while ((gfn + (page_size >> PAGE_SHIFT)) > end_gfn)
- page_size >>= 1;
-
- /* Make sure gfn is aligned to the page size we want to map */
- while ((gfn << PAGE_SHIFT) & (page_size - 1))
- page_size >>= 1;
-
- /* Make sure hva is aligned to the page size we want to map */
- while (__gfn_to_hva_memslot(slot, gfn) & (page_size - 1))
- page_size >>= 1;
-
- /*
- * Pin all pages we are about to map in memory. This is
- * important because we unmap and unpin in 4kb steps later.
- */
- pfn = kvm_pin_pages(slot, gfn, page_size >> PAGE_SHIFT);
- if (is_error_noslot_pfn(pfn)) {
- gfn += 1;
- continue;
- }
-
- /* Map into IO address space */
- r = iommu_map(domain, gfn_to_gpa(gfn), pfn_to_hpa(pfn),
- page_size, flags);
- if (r) {
- printk(KERN_ERR "kvm_iommu_map_address:"
- "iommu failed to map pfn=%llx\n", pfn);
- kvm_unpin_pages(kvm, pfn, page_size >> PAGE_SHIFT);
- goto unmap_pages;
- }
-
- gfn += page_size >> PAGE_SHIFT;
-
- cond_resched();
- }
-
- return 0;
-
-unmap_pages:
- kvm_iommu_put_pages(kvm, slot->base_gfn, gfn - slot->base_gfn);
- return r;
-}
-
-static int kvm_iommu_map_memslots(struct kvm *kvm)
-{
- int idx, r = 0;
- struct kvm_memslots *slots;
- struct kvm_memory_slot *memslot;
-
- if (kvm->arch.iommu_noncoherent)
- kvm_arch_register_noncoherent_dma(kvm);
-
- idx = srcu_read_lock(&kvm->srcu);
- slots = kvm_memslots(kvm);
-
- kvm_for_each_memslot(memslot, slots) {
- r = kvm_iommu_map_pages(kvm, memslot);
- if (r)
- break;
- }
- srcu_read_unlock(&kvm->srcu, idx);
-
- return r;
-}
-
-int kvm_assign_device(struct kvm *kvm, struct pci_dev *pdev)
-{
- struct iommu_domain *domain = kvm->arch.iommu_domain;
- int r;
- bool noncoherent;
-
- /* check if iommu exists and in use */
- if (!domain)
- return 0;
-
- if (pdev == NULL)
- return -ENODEV;
-
- r = iommu_attach_device(domain, &pdev->dev);
- if (r) {
- dev_err(&pdev->dev, "kvm assign device failed ret %d", r);
- return r;
- }
-
- noncoherent = !iommu_capable(&pci_bus_type, IOMMU_CAP_CACHE_COHERENCY);
-
- /* Check if need to update IOMMU page table for guest memory */
- if (noncoherent != kvm->arch.iommu_noncoherent) {
- kvm_iommu_unmap_memslots(kvm);
- kvm->arch.iommu_noncoherent = noncoherent;
- r = kvm_iommu_map_memslots(kvm);
- if (r)
- goto out_unmap;
- }
-
- kvm_arch_start_assignment(kvm);
- pci_set_dev_assigned(pdev);
-
- dev_info(&pdev->dev, "kvm assign device\n");
-
- return 0;
-out_unmap:
- kvm_iommu_unmap_memslots(kvm);
- return r;
-}
-
-int kvm_deassign_device(struct kvm *kvm, struct pci_dev *pdev)
-{
- struct iommu_domain *domain = kvm->arch.iommu_domain;
-
- /* check if iommu exists and in use */
- if (!domain)
- return 0;
-
- if (pdev == NULL)
- return -ENODEV;
-
- iommu_detach_device(domain, &pdev->dev);
-
- pci_clear_dev_assigned(pdev);
- kvm_arch_end_assignment(kvm);
-
- dev_info(&pdev->dev, "kvm deassign device\n");
-
- return 0;
-}
-
-int kvm_iommu_map_guest(struct kvm *kvm)
-{
- int r;
-
- if (!iommu_present(&pci_bus_type)) {
- printk(KERN_ERR "%s: iommu not found\n", __func__);
- return -ENODEV;
- }
-
- mutex_lock(&kvm->slots_lock);
-
- kvm->arch.iommu_domain = iommu_domain_alloc(&pci_bus_type);
- if (!kvm->arch.iommu_domain) {
- r = -ENOMEM;
- goto out_unlock;
- }
-
- if (!allow_unsafe_assigned_interrupts &&
- !iommu_capable(&pci_bus_type, IOMMU_CAP_INTR_REMAP)) {
- printk(KERN_WARNING "%s: No interrupt remapping support,"
- " disallowing device assignment."
- " Re-enable with \"allow_unsafe_assigned_interrupts=1\""
- " module option.\n", __func__);
- iommu_domain_free(kvm->arch.iommu_domain);
- kvm->arch.iommu_domain = NULL;
- r = -EPERM;
- goto out_unlock;
- }
-
- r = kvm_iommu_map_memslots(kvm);
- if (r)
- kvm_iommu_unmap_memslots(kvm);
-
-out_unlock:
- mutex_unlock(&kvm->slots_lock);
- return r;
-}
-
-static void kvm_iommu_put_pages(struct kvm *kvm,
- gfn_t base_gfn, unsigned long npages)
-{
- struct iommu_domain *domain;
- gfn_t end_gfn, gfn;
- kvm_pfn_t pfn;
- u64 phys;
-
- domain = kvm->arch.iommu_domain;
- end_gfn = base_gfn + npages;
- gfn = base_gfn;
-
- /* check if iommu exists and in use */
- if (!domain)
- return;
-
- while (gfn < end_gfn) {
- unsigned long unmap_pages;
- size_t size;
-
- /* Get physical address */
- phys = iommu_iova_to_phys(domain, gfn_to_gpa(gfn));
-
- if (!phys) {
- gfn++;
- continue;
- }
-
- pfn = phys >> PAGE_SHIFT;
-
- /* Unmap address from IO address space */
- size = iommu_unmap(domain, gfn_to_gpa(gfn), PAGE_SIZE);
- unmap_pages = 1ULL << get_order(size);
-
- /* Unpin all pages we just unmapped to not leak any memory */
- kvm_unpin_pages(kvm, pfn, unmap_pages);
-
- gfn += unmap_pages;
-
- cond_resched();
- }
-}
-
-void kvm_iommu_unmap_pages(struct kvm *kvm, struct kvm_memory_slot *slot)
-{
- kvm_iommu_put_pages(kvm, slot->base_gfn, slot->npages);
-}
-
-static int kvm_iommu_unmap_memslots(struct kvm *kvm)
-{
- int idx;
- struct kvm_memslots *slots;
- struct kvm_memory_slot *memslot;
-
- idx = srcu_read_lock(&kvm->srcu);
- slots = kvm_memslots(kvm);
-
- kvm_for_each_memslot(memslot, slots)
- kvm_iommu_unmap_pages(kvm, memslot);
-
- srcu_read_unlock(&kvm->srcu, idx);
-
- if (kvm->arch.iommu_noncoherent)
- kvm_arch_unregister_noncoherent_dma(kvm);
-
- return 0;
-}
-
-int kvm_iommu_unmap_guest(struct kvm *kvm)
-{
- struct iommu_domain *domain = kvm->arch.iommu_domain;
-
- /* check if iommu exists and in use */
- if (!domain)
- return 0;
-
- mutex_lock(&kvm->slots_lock);
- kvm_iommu_unmap_memslots(kvm);
- kvm->arch.iommu_domain = NULL;
- kvm->arch.iommu_noncoherent = false;
- mutex_unlock(&kvm->slots_lock);
-
- iommu_domain_free(domain);
- return 0;
-}
diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c
index 60d91c9..ba0db8f 100644..100755
--- a/arch/x86/kvm/irq.c
+++ b/arch/x86/kvm/irq.c
@@ -2,6 +2,7 @@
* irq.c: API for in kernel interrupt controller
* Copyright (c) 2007, Intel Corporation.
* Copyright 2009 Red Hat, Inc. and/or its affiliates.
+ * Copyright 2019 Google LLC
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
@@ -20,11 +21,9 @@
*
*/
-#include <linux/export.h>
#include <linux/kvm_host.h>
#include "irq.h"
-#include "i8254.h"
#include "x86.h"
/*
@@ -38,7 +37,6 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
return 0;
}
-EXPORT_SYMBOL(kvm_cpu_has_pending_timer);
/*
* check if there is a pending userspace external interrupt
@@ -57,10 +55,7 @@ static int kvm_cpu_has_extint(struct kvm_vcpu *v)
u8 accept = kvm_apic_accept_pic_intr(v);
if (accept) {
- if (irqchip_split(v->kvm))
- return pending_userspace_extint(v);
- else
- return pic_irqchip(v->kvm)->output;
+ return pic_irqchip(v->kvm)->output;
} else
return 0;
}
@@ -99,7 +94,6 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *v)
return kvm_apic_has_interrupt(v) != -1; /* LAPIC */
}
-EXPORT_SYMBOL_GPL(kvm_cpu_has_interrupt);
/*
* Read pending interrupt(from non-APIC source)
@@ -108,13 +102,7 @@ EXPORT_SYMBOL_GPL(kvm_cpu_has_interrupt);
static int kvm_cpu_get_extint(struct kvm_vcpu *v)
{
if (kvm_cpu_has_extint(v)) {
- if (irqchip_split(v->kvm)) {
- int vector = v->arch.pending_external_vector;
-
- v->arch.pending_external_vector = -1;
- return vector;
- } else
- return kvm_pic_read_irq(v->kvm); /* PIC */
+ return kvm_pic_read_irq(v->kvm); /* PIC */
} else
return -1;
}
@@ -136,17 +124,9 @@ int kvm_cpu_get_interrupt(struct kvm_vcpu *v)
return kvm_get_apic_interrupt(v); /* APIC */
}
-EXPORT_SYMBOL_GPL(kvm_cpu_get_interrupt);
void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu)
{
if (lapic_in_kernel(vcpu))
kvm_inject_apic_timer_irqs(vcpu);
}
-EXPORT_SYMBOL_GPL(kvm_inject_pending_timer_irqs);
-
-void __kvm_migrate_timers(struct kvm_vcpu *vcpu)
-{
- __kvm_migrate_apic_timer(vcpu);
- __kvm_migrate_pit_timer(vcpu);
-}
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h
index 035731e..b51da4d 100644..100755
--- a/arch/x86/kvm/irq.h
+++ b/arch/x86/kvm/irq.h
@@ -1,6 +1,7 @@
/*
* irq.h: in kernel interrupt controller related definitions
* Copyright (c) 2007, Intel Corporation.
+ * Copyright 2019 Google LLC
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
@@ -22,10 +23,7 @@
#ifndef __IRQ_H
#define __IRQ_H
-#include <linux/mm_types.h>
-#include <linux/hrtimer.h>
#include <linux/kvm_host.h>
-#include <linux/spinlock.h>
#include <kvm/iodev.h>
#include "ioapic.h"
@@ -33,7 +31,7 @@
#define PIC_NUM_PINS 16
#define SELECT_PIC(irq) \
- ((irq) < 8 ? KVM_IRQCHIP_PIC_MASTER : KVM_IRQCHIP_PIC_SLAVE)
+ ((irq) < 8 ? GVM_IRQCHIP_PIC_MASTER : GVM_IRQCHIP_PIC_SLAVE)
struct kvm;
struct kvm_vcpu;
@@ -70,7 +68,7 @@ struct kvm_pic {
struct kvm_io_device dev_slave;
struct kvm_io_device dev_eclr;
void (*ack_notifier)(void *opaque, int irq);
- unsigned long irq_states[PIC_NUM_PINS];
+ size_t irq_states[PIC_NUM_PINS];
};
struct kvm_pic *kvm_create_pic(struct kvm *kvm);
@@ -91,18 +89,12 @@ static inline int pic_in_kernel(struct kvm *kvm)
return ret;
}
-static inline int irqchip_split(struct kvm *kvm)
-{
- return kvm->arch.irqchip_split;
-}
-
static inline int irqchip_in_kernel(struct kvm *kvm)
{
struct kvm_pic *vpic = pic_irqchip(kvm);
bool ret;
ret = (vpic != NULL);
- ret |= irqchip_split(kvm);
/* Read vpic before kvm->irq_routing. */
smp_rmb();
@@ -114,9 +106,6 @@ void kvm_pic_reset(struct kvm_kpic_state *s);
void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu);
void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu);
void kvm_apic_nmi_wd_deliver(struct kvm_vcpu *vcpu);
-void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu);
-void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu);
-void __kvm_migrate_timers(struct kvm_vcpu *vcpu);
int apic_has_pending_timer(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c
index 6c01916..1fd7c73 100644..100755
--- a/arch/x86/kvm/irq_comm.c
+++ b/arch/x86/kvm/irq_comm.c
@@ -1,6 +1,7 @@
/*
* irq_comm.c: Common API for in kernel interrupt controller
* Copyright (c) 2007, Intel Corporation.
+ * Copyright 2019 Google LLC
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
@@ -21,21 +22,15 @@
*/
#include <linux/kvm_host.h>
-#include <linux/slab.h>
-#include <linux/export.h>
-#include <trace/events/kvm.h>
-
#include <asm/msidef.h>
-
#include "irq.h"
#include "ioapic.h"
-
#include "lapic.h"
-
-#include "hyperv.h"
#include "x86.h"
+#include <gvm_types.h>
+
static int kvm_set_pic_irq(struct kvm_kernel_irq_routing_entry *e,
struct kvm *kvm, int irq_source_id, int level,
bool line_status)
@@ -45,7 +40,7 @@ static int kvm_set_pic_irq(struct kvm_kernel_irq_routing_entry *e,
/*
* XXX: rejecting pic routes when pic isn't in use would be better,
* but the default routing table is installed while kvm->arch.vpic is
- * NULL and KVM_CREATE_IRQCHIP can race with KVM_IRQ_LINE.
+ * NULL and GVM_CREATE_IRQCHIP can race with GVM_IRQ_LINE.
*/
if (!pic)
return -1;
@@ -71,7 +66,7 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
{
int i, r = -1;
struct kvm_vcpu *vcpu, *lowest = NULL;
- unsigned long dest_vcpu_bitmap[BITS_TO_LONGS(KVM_MAX_VCPUS)];
+ size_t dest_vcpu_bitmap[BITS_TO_LONGS(GVM_MAX_VCPUS)];
unsigned int dest_vcpus = 0;
if (irq->dest_mode == 0 && irq->dest_id == 0xff &&
@@ -112,7 +107,7 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
if (dest_vcpus != 0) {
int idx = kvm_vector_to_index(irq->vector, dest_vcpus,
- dest_vcpu_bitmap, KVM_MAX_VCPUS);
+ dest_vcpu_bitmap, GVM_MAX_VCPUS);
lowest = kvm_get_vcpu(kvm, idx);
}
@@ -126,10 +121,6 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
void kvm_set_msi_irq(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e,
struct kvm_lapic_irq *irq)
{
- trace_kvm_msi_set_irq(e->msi.address_lo | (kvm->arch.x2apic_format ?
- (u64)e->msi.address_hi << 32 : 0),
- e->msi.data);
-
irq->dest_id = (e->msi.address_lo &
MSI_ADDR_DEST_ID_MASK) >> MSI_ADDR_DEST_ID_SHIFT;
if (kvm->arch.x2apic_format)
@@ -144,7 +135,6 @@ void kvm_set_msi_irq(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e,
irq->level = 1;
irq->shorthand = 0;
}
-EXPORT_SYMBOL_GPL(kvm_set_msi_irq);
static inline bool kvm_msi_route_invalid(struct kvm *kvm,
struct kvm_kernel_irq_routing_entry *e)
@@ -169,16 +159,6 @@ int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
}
-static int kvm_hv_set_sint(struct kvm_kernel_irq_routing_entry *e,
- struct kvm *kvm, int irq_source_id, int level,
- bool line_status)
-{
- if (!level)
- return -1;
-
- return kvm_hv_synic_set_irq(kvm, e->hv_sint.vcpu, e->hv_sint.sint);
-}
-
int kvm_arch_set_irq_inatomic(struct kvm_kernel_irq_routing_entry *e,
struct kvm *kvm, int irq_source_id, int level,
bool line_status)
@@ -187,11 +167,7 @@ int kvm_arch_set_irq_inatomic(struct kvm_kernel_irq_routing_entry *e,
int r;
switch (e->type) {
- case KVM_IRQ_ROUTING_HV_SINT:
- return kvm_hv_set_sint(e, kvm, irq_source_id, level,
- line_status);
-
- case KVM_IRQ_ROUTING_MSI:
+ case GVM_IRQ_ROUTING_MSI:
if (kvm_msi_route_invalid(kvm, e))
return -EINVAL;
@@ -210,7 +186,7 @@ int kvm_arch_set_irq_inatomic(struct kvm_kernel_irq_routing_entry *e,
int kvm_request_irq_source_id(struct kvm *kvm)
{
- unsigned long *bitmap = &kvm->arch.irq_sources_bitmap;
+ size_t *bitmap = &kvm->arch.irq_sources_bitmap;
int irq_source_id;
mutex_lock(&kvm->irq_lock);
@@ -222,8 +198,7 @@ int kvm_request_irq_source_id(struct kvm *kvm)
goto unlock;
}
- ASSERT(irq_source_id != KVM_USERSPACE_IRQ_SOURCE_ID);
- ASSERT(irq_source_id != KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID);
+ ASSERT(irq_source_id != GVM_USERSPACE_IRQ_SOURCE_ID);
set_bit(irq_source_id, bitmap);
unlock:
mutex_unlock(&kvm->irq_lock);
@@ -233,8 +208,7 @@ unlock:
void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id)
{
- ASSERT(irq_source_id != KVM_USERSPACE_IRQ_SOURCE_ID);
- ASSERT(irq_source_id != KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID);
+ ASSERT(irq_source_id != GVM_USERSPACE_IRQ_SOURCE_ID);
mutex_lock(&kvm->irq_lock);
if (irq_source_id < 0 ||
@@ -257,7 +231,7 @@ void kvm_register_irq_mask_notifier(struct kvm *kvm, int irq,
{
mutex_lock(&kvm->irq_lock);
kimn->irq = irq;
- hlist_add_head_rcu(&kimn->link, &kvm->arch.mask_notifier_list);
+ hlist_add_head(&kimn->link, &kvm->arch.mask_notifier_list);
mutex_unlock(&kvm->irq_lock);
}
@@ -265,24 +239,25 @@ void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq,
struct kvm_irq_mask_notifier *kimn)
{
mutex_lock(&kvm->irq_lock);
- hlist_del_rcu(&kimn->link);
+ hlist_del(&kimn->link);
mutex_unlock(&kvm->irq_lock);
- synchronize_srcu(&kvm->irq_srcu);
}
void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin,
bool mask)
{
struct kvm_irq_mask_notifier *kimn;
- int idx, gsi;
+ int gsi;
- idx = srcu_read_lock(&kvm->irq_srcu);
+ mutex_lock(&kvm->irq_lock);
gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin);
if (gsi != -1)
- hlist_for_each_entry_rcu(kimn, &kvm->arch.mask_notifier_list, link)
+#define LIST_ENTRY_TYPE_INFO struct kvm_irq_mask_notifier
+ hlist_for_each_entry(kimn, &kvm->arch.mask_notifier_list, link)
if (kimn->irq == gsi)
kimn->func(kimn, mask);
- srcu_read_unlock(&kvm->irq_srcu, idx);
+#undef LIST_ENTRY_TYPE_INFO
+ mutex_unlock(&kvm->irq_lock);
}
int kvm_set_routing_entry(struct kvm *kvm,
@@ -294,20 +269,20 @@ int kvm_set_routing_entry(struct kvm *kvm,
unsigned max_pin;
switch (ue->type) {
- case KVM_IRQ_ROUTING_IRQCHIP:
+ case GVM_IRQ_ROUTING_IRQCHIP:
delta = 0;
switch (ue->u.irqchip.irqchip) {
- case KVM_IRQCHIP_PIC_MASTER:
+ case GVM_IRQCHIP_PIC_MASTER:
e->set = kvm_set_pic_irq;
max_pin = PIC_NUM_PINS;
break;
- case KVM_IRQCHIP_PIC_SLAVE:
+ case GVM_IRQCHIP_PIC_SLAVE:
e->set = kvm_set_pic_irq;
max_pin = PIC_NUM_PINS;
delta = 8;
break;
- case KVM_IRQCHIP_IOAPIC:
- max_pin = KVM_IOAPIC_NUM_PINS;
+ case GVM_IRQCHIP_IOAPIC:
+ max_pin = GVM_IOAPIC_NUM_PINS;
e->set = kvm_set_ioapic_irq;
break;
default:
@@ -318,7 +293,7 @@ int kvm_set_routing_entry(struct kvm *kvm,
if (e->irqchip.pin >= max_pin)
goto out;
break;
- case KVM_IRQ_ROUTING_MSI:
+ case GVM_IRQ_ROUTING_MSI:
e->set = kvm_set_msi;
e->msi.address_lo = ue->u.msi.address_lo;
e->msi.address_hi = ue->u.msi.address_hi;
@@ -327,11 +302,6 @@ int kvm_set_routing_entry(struct kvm *kvm,
if (kvm_msi_route_invalid(kvm, e))
goto out;
break;
- case KVM_IRQ_ROUTING_HV_SINT:
- e->set = kvm_hv_set_sint;
- e->hv_sint.vcpu = ue->u.hv_sint.vcpu;
- e->hv_sint.sint = ue->u.hv_sint.sint;
- break;
default:
goto out;
}
@@ -366,15 +336,14 @@ bool kvm_intr_is_single_vcpu(struct kvm *kvm, struct kvm_lapic_irq *irq,
return r == 1;
}
-EXPORT_SYMBOL_GPL(kvm_intr_is_single_vcpu);
#define IOAPIC_ROUTING_ENTRY(irq) \
- { .gsi = irq, .type = KVM_IRQ_ROUTING_IRQCHIP, \
- .u.irqchip = { .irqchip = KVM_IRQCHIP_IOAPIC, .pin = (irq) } }
+ { .gsi = irq, .type = GVM_IRQ_ROUTING_IRQCHIP, \
+ .u.irqchip = { .irqchip = GVM_IRQCHIP_IOAPIC, .pin = (irq) } }
#define ROUTING_ENTRY1(irq) IOAPIC_ROUTING_ENTRY(irq)
#define PIC_ROUTING_ENTRY(irq) \
- { .gsi = irq, .type = KVM_IRQ_ROUTING_IRQCHIP, \
+ { .gsi = irq, .type = GVM_IRQ_ROUTING_IRQCHIP, \
.u.irqchip = { .irqchip = SELECT_PIC(irq), .pin = (irq) % 8 } }
#define ROUTING_ENTRY2(irq) \
IOAPIC_ROUTING_ENTRY(irq), PIC_ROUTING_ENTRY(irq)
@@ -400,13 +369,6 @@ int kvm_setup_default_irq_routing(struct kvm *kvm)
ARRAY_SIZE(default_routing), 0);
}
-static const struct kvm_irq_routing_entry empty_routing[] = {};
-
-int kvm_setup_empty_irq_routing(struct kvm *kvm)
-{
- return kvm_set_irq_routing(kvm, empty_routing, 0, 0);
-}
-
void kvm_arch_post_irq_routing_update(struct kvm *kvm)
{
if (ioapic_in_kernel(kvm) || !irqchip_in_kernel(kvm))
@@ -414,37 +376,3 @@ void kvm_arch_post_irq_routing_update(struct kvm *kvm)
kvm_make_scan_ioapic_request(kvm);
}
-void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu,
- ulong *ioapic_handled_vectors)
-{
- struct kvm *kvm = vcpu->kvm;
- struct kvm_kernel_irq_routing_entry *entry;
- struct kvm_irq_routing_table *table;
- u32 i, nr_ioapic_pins;
- int idx;
-
- idx = srcu_read_lock(&kvm->irq_srcu);
- table = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu);
- nr_ioapic_pins = min_t(u32, table->nr_rt_entries,
- kvm->arch.nr_reserved_ioapic_pins);
- for (i = 0; i < nr_ioapic_pins; ++i) {
- hlist_for_each_entry(entry, &table->map[i], link) {
- struct kvm_lapic_irq irq;
-
- if (entry->type != KVM_IRQ_ROUTING_MSI)
- continue;
-
- kvm_set_msi_irq(vcpu->kvm, entry, &irq);
-
- if (irq.level && kvm_apic_match_dest(vcpu, NULL, 0,
- irq.dest_id, irq.dest_mode))
- __set_bit(irq.vector, ioapic_handled_vectors);
- }
- }
- srcu_read_unlock(&kvm->irq_srcu, idx);
-}
-
-void kvm_arch_irq_routing_update(struct kvm *kvm)
-{
- kvm_hv_irq_routing_update(kvm);
-}
diff --git a/arch/x86/kvm/kvm_cache_regs.h b/arch/x86/kvm/kvm_cache_regs.h
index 762cdf2..2ca26a9 100644..100755
--- a/arch/x86/kvm/kvm_cache_regs.h
+++ b/arch/x86/kvm/kvm_cache_regs.h
@@ -1,15 +1,21 @@
+/*
+ * Copyright 2019 Google LLC
+ */
+
#ifndef ASM_KVM_CACHE_REGS_H
#define ASM_KVM_CACHE_REGS_H
+#include <uapi/asm/processor-flags.h>
+
#define KVM_POSSIBLE_CR0_GUEST_BITS X86_CR0_TS
#define KVM_POSSIBLE_CR4_GUEST_BITS \
(X86_CR4_PVI | X86_CR4_DE | X86_CR4_PCE | X86_CR4_OSFXSR \
| X86_CR4_OSXMMEXCPT | X86_CR4_PGE)
-static inline unsigned long kvm_register_read(struct kvm_vcpu *vcpu,
+static inline size_t kvm_register_read(struct kvm_vcpu *vcpu,
enum kvm_reg reg)
{
- if (!test_bit(reg, (unsigned long *)&vcpu->arch.regs_avail))
+ if (!test_bit(reg, (size_t *)&vcpu->arch.regs_avail))
kvm_x86_ops->cache_reg(vcpu, reg);
return vcpu->arch.regs[reg];
@@ -17,19 +23,19 @@ static inline unsigned long kvm_register_read(struct kvm_vcpu *vcpu,
static inline void kvm_register_write(struct kvm_vcpu *vcpu,
enum kvm_reg reg,
- unsigned long val)
+ size_t val)
{
vcpu->arch.regs[reg] = val;
- __set_bit(reg, (unsigned long *)&vcpu->arch.regs_dirty);
- __set_bit(reg, (unsigned long *)&vcpu->arch.regs_avail);
+ __set_bit(reg, (size_t *)&vcpu->arch.regs_dirty);
+ __set_bit(reg, (size_t *)&vcpu->arch.regs_avail);
}
-static inline unsigned long kvm_rip_read(struct kvm_vcpu *vcpu)
+static inline size_t kvm_rip_read(struct kvm_vcpu *vcpu)
{
return kvm_register_read(vcpu, VCPU_REGS_RIP);
}
-static inline void kvm_rip_write(struct kvm_vcpu *vcpu, unsigned long val)
+static inline void kvm_rip_write(struct kvm_vcpu *vcpu, size_t val)
{
kvm_register_write(vcpu, VCPU_REGS_RIP, val);
}
@@ -39,54 +45,49 @@ static inline u64 kvm_pdptr_read(struct kvm_vcpu *vcpu, int index)
might_sleep(); /* on svm */
if (!test_bit(VCPU_EXREG_PDPTR,
- (unsigned long *)&vcpu->arch.regs_avail))
+ (size_t *)&vcpu->arch.regs_avail))
kvm_x86_ops->cache_reg(vcpu, VCPU_EXREG_PDPTR);
return vcpu->arch.walk_mmu->pdptrs[index];
}
-static inline ulong kvm_read_cr0_bits(struct kvm_vcpu *vcpu, ulong mask)
+static inline size_t kvm_read_cr0_bits(struct kvm_vcpu *vcpu, size_t mask)
{
- ulong tmask = mask & KVM_POSSIBLE_CR0_GUEST_BITS;
+ size_t tmask = mask & KVM_POSSIBLE_CR0_GUEST_BITS;
if (tmask & vcpu->arch.cr0_guest_owned_bits)
kvm_x86_ops->decache_cr0_guest_bits(vcpu);
return vcpu->arch.cr0 & mask;
}
-static inline ulong kvm_read_cr0(struct kvm_vcpu *vcpu)
+static inline size_t kvm_read_cr0(struct kvm_vcpu *vcpu)
{
- return kvm_read_cr0_bits(vcpu, ~0UL);
+ return kvm_read_cr0_bits(vcpu, ~(size_t)0);
}
-static inline ulong kvm_read_cr4_bits(struct kvm_vcpu *vcpu, ulong mask)
+static inline size_t kvm_read_cr4_bits(struct kvm_vcpu *vcpu, size_t mask)
{
- ulong tmask = mask & KVM_POSSIBLE_CR4_GUEST_BITS;
+ size_t tmask = mask & KVM_POSSIBLE_CR4_GUEST_BITS;
if (tmask & vcpu->arch.cr4_guest_owned_bits)
kvm_x86_ops->decache_cr4_guest_bits(vcpu);
return vcpu->arch.cr4 & mask;
}
-static inline ulong kvm_read_cr3(struct kvm_vcpu *vcpu)
+static inline size_t kvm_read_cr3(struct kvm_vcpu *vcpu)
{
- if (!test_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail))
+ if (!test_bit(VCPU_EXREG_CR3, (size_t *)&vcpu->arch.regs_avail))
kvm_x86_ops->decache_cr3(vcpu);
return vcpu->arch.cr3;
}
-static inline ulong kvm_read_cr4(struct kvm_vcpu *vcpu)
+static inline size_t kvm_read_cr4(struct kvm_vcpu *vcpu)
{
- return kvm_read_cr4_bits(vcpu, ~0UL);
+ return kvm_read_cr4_bits(vcpu, ~(size_t)0);
}
static inline u64 kvm_read_edx_eax(struct kvm_vcpu *vcpu)
{
- return (kvm_register_read(vcpu, VCPU_REGS_RAX) & -1u)
- | ((u64)(kvm_register_read(vcpu, VCPU_REGS_RDX) & -1u) << 32);
-}
-
-static inline u32 kvm_read_pkru(struct kvm_vcpu *vcpu)
-{
- return kvm_x86_ops->get_pkru(vcpu);
+ return (kvm_register_read(vcpu, VCPU_REGS_RAX) & (unsigned)-1)
+ | ((u64)(kvm_register_read(vcpu, VCPU_REGS_RDX) & (unsigned)-1) << 32);
}
static inline void enter_guest_mode(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 6f69340..7a156d4 100644..100755
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -6,6 +6,7 @@
* Copyright (C) 2007 Novell
* Copyright (C) 2007 Intel
* Copyright 2009 Red Hat, Inc. and/or its affiliates.
+ * Copyright 2019 Google LLC
*
* Authors:
* Dor Laor <dor.laor@qumranet.com>
@@ -19,29 +20,15 @@
*/
#include <linux/kvm_host.h>
-#include <linux/kvm.h>
-#include <linux/mm.h>
-#include <linux/highmem.h>
-#include <linux/smp.h>
-#include <linux/hrtimer.h>
-#include <linux/io.h>
-#include <linux/export.h>
-#include <linux/math64.h>
-#include <linux/slab.h>
-#include <asm/processor.h>
-#include <asm/msr.h>
-#include <asm/page.h>
-#include <asm/current.h>
+#include <uapi/linux/kvm.h>
#include <asm/apicdef.h>
-#include <asm/delay.h>
-#include <linux/atomic.h>
-#include <linux/jump_label.h>
#include "kvm_cache_regs.h"
#include "irq.h"
-#include "trace.h"
#include "x86.h"
#include "cpuid.h"
-#include "hyperv.h"
+
+#include <gvm_types.h>
+
#ifndef CONFIG_X86_64
#define mod_64(x, y) ((x) - (y) * div64_u64(x, y))
@@ -57,10 +44,10 @@
#define APIC_BUS_CYCLE_NS 1
/* #define apic_debug(fmt,arg...) printk(KERN_WARNING fmt,##arg) */
-#define apic_debug(fmt, arg...)
+#define apic_debug(fmt, arg,...)
/* 14 is the version for Xeon and Pentium 8.4.8*/
-#define APIC_VERSION (0x14UL | ((KVM_APIC_LVT_NUM - 1) << 16))
+#define APIC_VERSION (0x14UL | ((GVM_APIC_LVT_NUM - 1) << 16))
#define LAPIC_MMIO_LENGTH (1 << 12)
/* followed define is not in apicdef.h */
#define APIC_SHORT_MASK 0xc0000
@@ -72,9 +59,33 @@
#define APIC_BROADCAST 0xFF
#define X2APIC_BROADCAST 0xFFFFFFFFul
+
+/**
+ * hweightN - returns the hamming weight of a N-bit word
+ * @x: the word to weigh
+ *
+ * The Hamming Weight of a number is the total number of bits set in it.
+ */
+
+static unsigned int hweight32(unsigned int w)
+{
+ w -= (w >> 1) & 0x55555555;
+ w = (w & 0x33333333) + ((w >> 2) & 0x33333333);
+ w = (w + (w >> 4)) & 0x0f0f0f0f;
+ return (w * 0x01010101) >> 24;
+}
+
+static unsigned int hweight16(unsigned int w)
+{
+ unsigned int res = w - ((w >> 1) & 0x5555);
+ res = (res & 0x3333) + ((res >> 2) & 0x3333);
+ res = (res + (res >> 4)) & 0x0F0F;
+ return (res + (res >> 8)) & 0x00FF;
+}
+
static inline int apic_test_vector(int vec, void *bitmap)
{
- return test_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
+ return test_bit(VEC_POS(vec), (size_t *)((char *)(bitmap)+REG_POS(vec)));
}
bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector)
@@ -87,22 +98,19 @@ bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector)
static inline void apic_clear_vector(int vec, void *bitmap)
{
- clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
+ clear_bit(VEC_POS(vec), (size_t *)((u8 *)(bitmap) + REG_POS(vec)));
}
static inline int __apic_test_and_set_vector(int vec, void *bitmap)
{
- return __test_and_set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
+ return __test_and_set_bit(VEC_POS(vec), (size_t *)((u8 *)(bitmap) + REG_POS(vec)));
}
static inline int __apic_test_and_clear_vector(int vec, void *bitmap)
{
- return __test_and_clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
+ return __test_and_clear_bit(VEC_POS(vec), (size_t *)((u8 *)(bitmap) + REG_POS(vec)));
}
-struct static_key_deferred apic_hw_disabled __read_mostly;
-struct static_key_deferred apic_sw_disabled __read_mostly;
-
static inline int apic_enabled(struct kvm_lapic *apic)
{
return kvm_apic_sw_enabled(apic) && kvm_apic_hw_enabled(apic);
@@ -118,7 +126,7 @@ static inline int apic_enabled(struct kvm_lapic *apic)
static inline bool kvm_apic_map_get_logical_dest(struct kvm_apic_map *map,
u32 dest_id, struct kvm_lapic ***cluster, u16 *mask) {
switch (map->mode) {
- case KVM_APIC_MODE_X2APIC: {
+ case GVM_APIC_MODE_X2APIC: {
u32 offset = (dest_id >> 16) * 16;
u32 max_apic_id = map->max_apic_id;
@@ -133,11 +141,11 @@ static inline bool kvm_apic_map_get_logical_dest(struct kvm_apic_map *map,
return true;
}
- case KVM_APIC_MODE_XAPIC_FLAT:
+ case GVM_APIC_MODE_XAPIC_FLAT:
*cluster = map->xapic_flat_map;
*mask = dest_id & 0xff;
return true;
- case KVM_APIC_MODE_XAPIC_CLUSTER:
+ case GVM_APIC_MODE_XAPIC_CLUSTER:
*cluster = map->xapic_cluster_map[(dest_id >> 4) & 0xf];
*mask = dest_id & 0xf;
return true;
@@ -147,13 +155,6 @@ static inline bool kvm_apic_map_get_logical_dest(struct kvm_apic_map *map,
}
}
-static void kvm_apic_map_free(struct rcu_head *rcu)
-{
- struct kvm_apic_map *map = container_of(rcu, struct kvm_apic_map, rcu);
-
- kvfree(map);
-}
-
static void recalculate_apic_map(struct kvm *kvm)
{
struct kvm_apic_map *new, *old = NULL;
@@ -191,13 +192,13 @@ static void recalculate_apic_map(struct kvm *kvm)
new->phys_map[aid] = apic;
if (apic_x2apic_mode(apic)) {
- new->mode |= KVM_APIC_MODE_X2APIC;
+ new->mode |= GVM_APIC_MODE_X2APIC;
} else if (ldr) {
ldr = GET_APIC_LOGICAL_ID(ldr);
if (kvm_lapic_get_reg(apic, APIC_DFR) == APIC_DFR_FLAT)
- new->mode |= KVM_APIC_MODE_XAPIC_FLAT;
+ new->mode |= GVM_APIC_MODE_XAPIC_FLAT;
else
- new->mode |= KVM_APIC_MODE_XAPIC_CLUSTER;
+ new->mode |= GVM_APIC_MODE_XAPIC_CLUSTER;
}
if (!kvm_apic_map_get_logical_dest(new, ldr, &cluster, &mask))
@@ -207,13 +208,12 @@ static void recalculate_apic_map(struct kvm *kvm)
cluster[ffs(mask) - 1] = apic;
}
out:
- old = rcu_dereference_protected(kvm->arch.apic_map,
- lockdep_is_held(&kvm->arch.apic_map_lock));
- rcu_assign_pointer(kvm->arch.apic_map, new);
+ old = kvm->arch.apic_map;
+ kvm->arch.apic_map = new;
mutex_unlock(&kvm->arch.apic_map_lock);
if (old)
- call_rcu(&old->rcu, kvm_apic_map_free);
+ kvfree(old);
kvm_make_scan_ioapic_request(kvm);
}
@@ -227,10 +227,8 @@ static inline void apic_set_spiv(struct kvm_lapic *apic, u32 val)
if (enabled != apic->sw_enabled) {
apic->sw_enabled = enabled;
if (enabled) {
- static_key_slow_dec_deferred(&apic_sw_disabled);
recalculate_apic_map(apic->vcpu->kvm);
- } else
- static_key_slow_inc(&apic_sw_disabled.key);
+ } //else
}
}
@@ -275,11 +273,6 @@ static inline int apic_lvtt_period(struct kvm_lapic *apic)
return apic->lapic_timer.timer_mode == APIC_LVT_TIMER_PERIODIC;
}
-static inline int apic_lvtt_tscdeadline(struct kvm_lapic *apic)
-{
- return apic->lapic_timer.timer_mode == APIC_LVT_TIMER_TSCDEADLINE;
-}
-
static inline int apic_lvt_nmi_mode(u32 lvt_val)
{
return (lvt_val & (APIC_MODE_MASK | APIC_LVT_MASKED)) == APIC_DM_NMI;
@@ -288,7 +281,7 @@ static inline int apic_lvt_nmi_mode(u32 lvt_val)
void kvm_apic_set_version(struct kvm_vcpu *vcpu)
{
struct kvm_lapic *apic = vcpu->arch.apic;
- struct kvm_cpuid_entry2 *feat;
+ struct kvm_cpuid_entry *feat;
u32 v = APIC_VERSION;
if (!lapic_in_kernel(vcpu))
@@ -300,7 +293,7 @@ void kvm_apic_set_version(struct kvm_vcpu *vcpu)
kvm_lapic_set_reg(apic, APIC_LVR, v);
}
-static const unsigned int apic_lvt_mask[KVM_APIC_LVT_NUM] = {
+static const unsigned int apic_lvt_mask[GVM_APIC_LVT_NUM] = {
LVT_MASK , /* part LVTT mask, timer mode mask added at runtime */
LVT_MASK | APIC_MODE_MASK, /* LVTTHMR */
LVT_MASK | APIC_MODE_MASK, /* LVTPC */
@@ -315,7 +308,7 @@ static int find_highest_vector(void *bitmap)
for (vec = MAX_APIC_VECTOR - APIC_VECTORS_PER_REG;
vec >= 0; vec -= APIC_VECTORS_PER_REG) {
- reg = bitmap + REG_POS(vec);
+ reg = (u32 *)((u8 *)bitmap + REG_POS(vec));
if (*reg)
return fls(*reg) - 1 + vec;
}
@@ -330,7 +323,7 @@ static u8 count_vectors(void *bitmap)
u8 count = 0;
for (vec = 0; vec < MAX_APIC_VECTOR; vec += APIC_VECTORS_PER_REG) {
- reg = bitmap + REG_POS(vec);
+ reg = (u32 *)((u8 *)bitmap + REG_POS(vec));
count += hweight32(*reg);
}
@@ -344,10 +337,9 @@ void __kvm_apic_update_irr(u32 *pir, void *regs)
for (i = 0; i <= 7; i++) {
pir_val = xchg(&pir[i], 0);
if (pir_val)
- *((u32 *)(regs + APIC_IRR + i * 0x10)) |= pir_val;
+ *((u32 *)((u8 *)regs + APIC_IRR + i * 0x10)) |= pir_val;
}
}
-EXPORT_SYMBOL_GPL(__kvm_apic_update_irr);
void kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir)
{
@@ -355,9 +347,8 @@ void kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir)
__kvm_apic_update_irr(pir, apic->regs);
- kvm_make_request(KVM_REQ_EVENT, vcpu);
+ kvm_make_request(GVM_REQ_EVENT, vcpu);
}
-EXPORT_SYMBOL_GPL(kvm_apic_update_irr);
static inline int apic_search_irr(struct kvm_lapic *apic)
{
@@ -375,8 +366,6 @@ static inline int apic_find_highest_irr(struct kvm_lapic *apic)
if (!apic->irr_pending)
return -1;
- if (apic->vcpu->arch.apicv_active)
- kvm_x86_ops->sync_pir_to_irr(apic->vcpu);
result = apic_search_irr(apic);
ASSERT(result == -1 || result >= 16);
@@ -392,7 +381,7 @@ static inline void apic_clear_irr(int vec, struct kvm_lapic *apic)
if (unlikely(vcpu->arch.apicv_active)) {
/* try to update RVI */
apic_clear_vector(vec, apic->regs + APIC_IRR);
- kvm_make_request(KVM_REQ_EVENT, vcpu);
+ kvm_make_request(GVM_REQ_EVENT, vcpu);
} else {
apic->irr_pending = false;
apic_clear_vector(vec, apic->regs + APIC_IRR);
@@ -496,54 +485,6 @@ int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq,
irq->level, irq->trig_mode, dest_map);
}
-static int pv_eoi_put_user(struct kvm_vcpu *vcpu, u8 val)
-{
-
- return kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.pv_eoi.data, &val,
- sizeof(val));
-}
-
-static int pv_eoi_get_user(struct kvm_vcpu *vcpu, u8 *val)
-{
-
- return kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.pv_eoi.data, val,
- sizeof(*val));
-}
-
-static inline bool pv_eoi_enabled(struct kvm_vcpu *vcpu)
-{
- return vcpu->arch.pv_eoi.msr_val & KVM_MSR_ENABLED;
-}
-
-static bool pv_eoi_get_pending(struct kvm_vcpu *vcpu)
-{
- u8 val;
- if (pv_eoi_get_user(vcpu, &val) < 0)
- apic_debug("Can't read EOI MSR value: 0x%llx\n",
- (unsigned long long)vcpu->arch.pv_eoi.msr_val);
- return val & 0x1;
-}
-
-static void pv_eoi_set_pending(struct kvm_vcpu *vcpu)
-{
- if (pv_eoi_put_user(vcpu, KVM_PV_EOI_ENABLED) < 0) {
- apic_debug("Can't set EOI MSR value: 0x%llx\n",
- (unsigned long long)vcpu->arch.pv_eoi.msr_val);
- return;
- }
- __set_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention);
-}
-
-static void pv_eoi_clr_pending(struct kvm_vcpu *vcpu)
-{
- if (pv_eoi_put_user(vcpu, KVM_PV_EOI_DISABLED) < 0) {
- apic_debug("Can't clear EOI MSR value: 0x%llx\n",
- (unsigned long long)vcpu->arch.pv_eoi.msr_val);
- return;
- }
- __clear_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention);
-}
-
static void apic_update_ppr(struct kvm_lapic *apic)
{
u32 tpr, isrv, ppr, old_ppr;
@@ -565,7 +506,7 @@ static void apic_update_ppr(struct kvm_lapic *apic)
if (old_ppr != ppr) {
kvm_lapic_set_reg(apic, APIC_PROCPRI, ppr);
if (ppr < old_ppr)
- kvm_make_request(KVM_REQ_EVENT, apic->vcpu);
+ kvm_make_request(GVM_REQ_EVENT, apic->vcpu);
}
}
@@ -623,7 +564,7 @@ static bool kvm_apic_match_logical_addr(struct kvm_lapic *apic, u32 mda)
}
}
-/* The KVM local APIC implementation has two quirks:
+/* The kvm local APIC implementation has two quirks:
*
* - the xAPIC MDA stores the destination at bits 24-31, while this
* is not true of struct kvm_lapic_irq's dest_id field. This is
@@ -635,7 +576,7 @@ static bool kvm_apic_match_logical_addr(struct kvm_lapic *apic, u32 mda)
* rewrites the destination of non-IPI messages from APIC_BROADCAST
* to X2APIC_BROADCAST.
*
- * The broadcast quirk can be disabled with KVM_CAP_X2APIC_API. This is
+ * The broadcast quirk can be disabled with GVM_CAP_X2APIC_API. This is
* important when userspace wants to use x2APIC-format MSIs, because
* APIC_BROADCAST (0xff) is a legal route for "cluster 0, CPUs 0-7".
*/
@@ -681,10 +622,9 @@ bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
return false;
}
}
-EXPORT_SYMBOL_GPL(kvm_apic_match_dest);
int kvm_vector_to_index(u32 vector, u32 dest_vcpus,
- const unsigned long *bitmap, u32 bitmap_size)
+ const size_t *bitmap, u32 bitmap_size)
{
u32 mod;
int i, idx = -1;
@@ -713,7 +653,7 @@ static bool kvm_apic_is_broadcast_dest(struct kvm *kvm, struct kvm_lapic **src,
{
if (kvm->arch.x2apic_broadcast_quirk_disabled) {
if ((irq->dest_id == APIC_BROADCAST &&
- map->mode != KVM_APIC_MODE_X2APIC))
+ map->mode != GVM_APIC_MODE_X2APIC))
return true;
if (irq->dest_id == X2APIC_BROADCAST)
return true;
@@ -737,7 +677,7 @@ static bool kvm_apic_is_broadcast_dest(struct kvm *kvm, struct kvm_lapic **src,
static inline bool kvm_apic_map_get_dest_lapic(struct kvm *kvm,
struct kvm_lapic **src, struct kvm_lapic_irq *irq,
struct kvm_apic_map *map, struct kvm_lapic ***dst,
- unsigned long *bitmap)
+ size_t *bitmap)
{
int i, lowest;
@@ -803,7 +743,7 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
struct kvm_lapic_irq *irq, int *r, struct dest_map *dest_map)
{
struct kvm_apic_map *map;
- unsigned long bitmap;
+ size_t bitmap;
struct kvm_lapic **dst = NULL;
int i;
bool ret;
@@ -850,7 +790,7 @@ bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, struct kvm_lapic_irq *irq,
struct kvm_vcpu **dest_vcpu)
{
struct kvm_apic_map *map;
- unsigned long bitmap;
+ size_t bitmap;
struct kvm_lapic **dst = NULL;
bool ret = false;
@@ -862,7 +802,7 @@ bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, struct kvm_lapic_irq *irq,
if (kvm_apic_map_get_dest_lapic(kvm, NULL, irq, map, &dst, &bitmap) &&
hweight16(bitmap) == 1) {
- unsigned long i = find_first_bit(&bitmap, 16);
+ size_t i = find_first_bit(&bitmap, 16);
if (dst[i]) {
*dest_vcpu = dst[i]->vcpu;
@@ -885,8 +825,6 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
int result = 0;
struct kvm_vcpu *vcpu = apic->vcpu;
- trace_kvm_apic_accept_irq(vcpu->vcpu_id, delivery_mode,
- trig_mode, vector);
switch (delivery_mode) {
case APIC_DM_LOWEST:
vcpu->arch.apic_arb_prio++;
@@ -912,26 +850,20 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
apic_clear_vector(vector, apic->regs + APIC_TMR);
}
- if (vcpu->arch.apicv_active)
+ if (vcpu->arch.apicv_active &&
+ kvm_x86_ops->deliver_posted_interrupt)
kvm_x86_ops->deliver_posted_interrupt(vcpu, vector);
else {
kvm_lapic_set_irr(vector, apic);
- kvm_make_request(KVM_REQ_EVENT, vcpu);
+ kvm_make_request(GVM_REQ_EVENT, vcpu);
kvm_vcpu_kick(vcpu);
}
break;
- case APIC_DM_REMRD:
- result = 1;
- vcpu->arch.pv.pv_unhalted = 1;
- kvm_make_request(KVM_REQ_EVENT, vcpu);
- kvm_vcpu_kick(vcpu);
- break;
-
case APIC_DM_SMI:
result = 1;
- kvm_make_request(KVM_REQ_SMI, vcpu);
+ kvm_make_request(GVM_REQ_SMI, vcpu);
kvm_vcpu_kick(vcpu);
break;
@@ -944,12 +876,12 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
case APIC_DM_INIT:
if (!trig_mode || level) {
result = 1;
- /* assumes that there are only KVM_APIC_INIT/SIPI */
- apic->pending_events = (1UL << KVM_APIC_INIT);
+ /* assumes that there are only GVM_APIC_INIT/SIPI */
+ apic->pending_events = (1ULL << GVM_APIC_INIT);
/* make sure pending_events is visible before sending
* the request */
smp_wmb();
- kvm_make_request(KVM_REQ_EVENT, vcpu);
+ kvm_make_request(GVM_REQ_EVENT, vcpu);
kvm_vcpu_kick(vcpu);
} else {
apic_debug("Ignoring de-assert INIT to vcpu %d\n",
@@ -964,8 +896,8 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
apic->sipi_vector = vector;
/* make sure sipi_vector is visible for the receiver */
smp_wmb();
- set_bit(KVM_APIC_SIPI, &apic->pending_events);
- kvm_make_request(KVM_REQ_EVENT, vcpu);
+ set_bit(GVM_APIC_SIPI, &apic->pending_events);
+ kvm_make_request(GVM_REQ_EVENT, vcpu);
kvm_vcpu_kick(vcpu);
break;
@@ -1003,13 +935,6 @@ static void kvm_ioapic_send_eoi(struct kvm_lapic *apic, int vector)
if (!kvm_ioapic_handles_vector(apic, vector))
return;
- /* Request a KVM exit to inform the userspace IOAPIC. */
- if (irqchip_split(apic->vcpu->kvm)) {
- apic->vcpu->arch.pending_ioapic_eoi = vector;
- kvm_make_request(KVM_REQ_IOAPIC_EOI_EXIT, apic->vcpu);
- return;
- }
-
if (apic_test_vector(vector, apic->regs + APIC_TMR))
trigger_mode = IOAPIC_LEVEL_TRIG;
else
@@ -1022,8 +947,6 @@ static int apic_set_eoi(struct kvm_lapic *apic)
{
int vector = apic_find_highest_isr(apic);
- trace_kvm_eoi(apic, vector);
-
/*
* Not every write EOI will has corresponding ISR,
* one example is when Kernel check timer on setup_IO_APIC
@@ -1034,11 +957,8 @@ static int apic_set_eoi(struct kvm_lapic *apic)
apic_clear_isr(vector, apic);
apic_update_ppr(apic);
- if (test_bit(vector, vcpu_to_synic(apic->vcpu)->vec_bitmap))
- kvm_hv_synic_send_eoi(apic->vcpu, vector);
-
kvm_ioapic_send_eoi(apic, vector);
- kvm_make_request(KVM_REQ_EVENT, apic->vcpu);
+ kvm_make_request(GVM_REQ_EVENT, apic->vcpu);
return vector;
}
@@ -1050,12 +970,9 @@ void kvm_apic_set_eoi_accelerated(struct kvm_vcpu *vcpu, int vector)
{
struct kvm_lapic *apic = vcpu->arch.apic;
- trace_kvm_eoi(apic, vector);
-
kvm_ioapic_send_eoi(apic, vector);
- kvm_make_request(KVM_REQ_EVENT, apic->vcpu);
+ kvm_make_request(GVM_REQ_EVENT, apic->vcpu);
}
-EXPORT_SYMBOL_GPL(kvm_apic_set_eoi_accelerated);
static void apic_send_ipi(struct kvm_lapic *apic)
{
@@ -1075,8 +992,6 @@ static void apic_send_ipi(struct kvm_lapic *apic)
else
irq.dest_id = GET_APIC_DEST_FIELD(icr_high);
- trace_kvm_apic_ipi(icr_low, irq.dest_id);
-
apic_debug("icr_high 0x%x, icr_low 0x%x, "
"short_hand 0x%x, dest 0x%x, trig_mode 0x%x, level 0x%x, "
"dest_mode 0x%x, delivery_mode 0x%x, vector 0x%x, "
@@ -1117,7 +1032,7 @@ static void __report_tpr_access(struct kvm_lapic *apic, bool write)
struct kvm_vcpu *vcpu = apic->vcpu;
struct kvm_run *run = vcpu->run;
- kvm_make_request(KVM_REQ_REPORT_TPR_ACCESS, vcpu);
+ kvm_make_request(GVM_REQ_REPORT_TPR_ACCESS, vcpu);
run->tpr_access.rip = kvm_rip_read(vcpu);
run->tpr_access.is_write = write;
}
@@ -1137,13 +1052,9 @@ static u32 __apic_read(struct kvm_lapic *apic, unsigned int offset)
switch (offset) {
case APIC_ARBPRI:
- apic_debug("Access APIC ARBPRI register which is for P6\n");
+ //apic_debug("Access APIC ARBPRI register which is for P6\n");
break;
-
case APIC_TMCCT: /* Timer CCR */
- if (apic_lvtt_tscdeadline(apic))
- return 0;
-
val = apic_get_tmcct(apic);
break;
case APIC_PROCPRI:
@@ -1175,21 +1086,19 @@ int kvm_lapic_reg_read(struct kvm_lapic *apic, u32 offset, int len,
static const u64 rmask = 0x43ff01ffffffe70cULL;
if ((alignment + len) > 4) {
- apic_debug("KVM_APIC_READ: alignment error %x %d\n",
+ apic_debug("GVM_APIC_READ: alignment error %x %d\n",
offset, len);
return 1;
}
if (offset > 0x3f0 || !(rmask & (1ULL << (offset >> 4)))) {
- apic_debug("KVM_APIC_READ: read reserved register %x\n",
+ apic_debug("GVM_APIC_READ: read reserved register %x\n",
offset);
return 1;
}
result = __apic_read(apic, offset & ~0xf);
- trace_kvm_apic_read(offset, result);
-
switch (len) {
case 1:
case 2:
@@ -1203,7 +1112,6 @@ int kvm_lapic_reg_read(struct kvm_lapic *apic, u32 offset, int len,
}
return 0;
}
-EXPORT_SYMBOL_GPL(kvm_lapic_reg_read);
static int apic_mmio_in_range(struct kvm_lapic *apic, gpa_t addr)
{
@@ -1253,8 +1161,7 @@ static void apic_update_lvtt(struct kvm_lapic *apic)
static void apic_timer_expired(struct kvm_lapic *apic)
{
struct kvm_vcpu *vcpu = apic->vcpu;
- struct swait_queue_head *q = &vcpu->wq;
- struct kvm_timer *ktimer = &apic->lapic_timer;
+ //struct swait_queue_head *q = &vcpu->wq;
if (atomic_read(&apic->lapic_timer.pending))
return;
@@ -1262,11 +1169,12 @@ static void apic_timer_expired(struct kvm_lapic *apic)
atomic_inc(&apic->lapic_timer.pending);
kvm_set_pending_timer(vcpu);
+ kvm_vcpu_kick(vcpu);
+
+#if 0
if (swait_active(q))
swake_up(q);
-
- if (apic_lvtt_tscdeadline(apic))
- ktimer->expired_tscdeadline = ktimer->tscdeadline;
+#endif
}
/*
@@ -1292,136 +1200,6 @@ static bool lapic_timer_int_injected(struct kvm_vcpu *vcpu)
return false;
}
-void wait_lapic_expire(struct kvm_vcpu *vcpu)
-{
- struct kvm_lapic *apic = vcpu->arch.apic;
- u64 guest_tsc, tsc_deadline;
-
- if (!lapic_in_kernel(vcpu))
- return;
-
- if (apic->lapic_timer.expired_tscdeadline == 0)
- return;
-
- if (!lapic_timer_int_injected(vcpu))
- return;
-
- tsc_deadline = apic->lapic_timer.expired_tscdeadline;
- apic->lapic_timer.expired_tscdeadline = 0;
- guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
- trace_kvm_wait_lapic_expire(vcpu->vcpu_id, guest_tsc - tsc_deadline);
-
- /* __delay is delay_tsc whenever the hardware has TSC, thus always. */
- if (guest_tsc < tsc_deadline)
- __delay(min(tsc_deadline - guest_tsc,
- nsec_to_cycles(vcpu, lapic_timer_advance_ns)));
-}
-
-static void start_sw_tscdeadline(struct kvm_lapic *apic)
-{
- u64 guest_tsc, tscdeadline = apic->lapic_timer.tscdeadline;
- u64 ns = 0;
- ktime_t expire;
- struct kvm_vcpu *vcpu = apic->vcpu;
- unsigned long this_tsc_khz = vcpu->arch.virtual_tsc_khz;
- unsigned long flags;
- ktime_t now;
-
- if (unlikely(!tscdeadline || !this_tsc_khz))
- return;
-
- local_irq_save(flags);
-
- now = apic->lapic_timer.timer.base->get_time();
- guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
- if (likely(tscdeadline > guest_tsc)) {
- ns = (tscdeadline - guest_tsc) * 1000000ULL;
- do_div(ns, this_tsc_khz);
- expire = ktime_add_ns(now, ns);
- expire = ktime_sub_ns(expire, lapic_timer_advance_ns);
- hrtimer_start(&apic->lapic_timer.timer,
- expire, HRTIMER_MODE_ABS_PINNED);
- } else
- apic_timer_expired(apic);
-
- local_irq_restore(flags);
-}
-
-bool kvm_lapic_hv_timer_in_use(struct kvm_vcpu *vcpu)
-{
- if (!lapic_in_kernel(vcpu))
- return false;
-
- return vcpu->arch.apic->lapic_timer.hv_timer_in_use;
-}
-EXPORT_SYMBOL_GPL(kvm_lapic_hv_timer_in_use);
-
-static void cancel_hv_tscdeadline(struct kvm_lapic *apic)
-{
- kvm_x86_ops->cancel_hv_timer(apic->vcpu);
- apic->lapic_timer.hv_timer_in_use = false;
-}
-
-void kvm_lapic_expired_hv_timer(struct kvm_vcpu *vcpu)
-{
- struct kvm_lapic *apic = vcpu->arch.apic;
-
- WARN_ON(!apic->lapic_timer.hv_timer_in_use);
- WARN_ON(swait_active(&vcpu->wq));
- cancel_hv_tscdeadline(apic);
- apic_timer_expired(apic);
-}
-EXPORT_SYMBOL_GPL(kvm_lapic_expired_hv_timer);
-
-static bool start_hv_tscdeadline(struct kvm_lapic *apic)
-{
- u64 tscdeadline = apic->lapic_timer.tscdeadline;
-
- if (atomic_read(&apic->lapic_timer.pending) ||
- kvm_x86_ops->set_hv_timer(apic->vcpu, tscdeadline)) {
- if (apic->lapic_timer.hv_timer_in_use)
- cancel_hv_tscdeadline(apic);
- } else {
- apic->lapic_timer.hv_timer_in_use = true;
- hrtimer_cancel(&apic->lapic_timer.timer);
-
- /* In case the sw timer triggered in the window */
- if (atomic_read(&apic->lapic_timer.pending))
- cancel_hv_tscdeadline(apic);
- }
- trace_kvm_hv_timer_state(apic->vcpu->vcpu_id,
- apic->lapic_timer.hv_timer_in_use);
- return apic->lapic_timer.hv_timer_in_use;
-}
-
-void kvm_lapic_switch_to_hv_timer(struct kvm_vcpu *vcpu)
-{
- struct kvm_lapic *apic = vcpu->arch.apic;
-
- WARN_ON(apic->lapic_timer.hv_timer_in_use);
-
- if (apic_lvtt_tscdeadline(apic))
- start_hv_tscdeadline(apic);
-}
-EXPORT_SYMBOL_GPL(kvm_lapic_switch_to_hv_timer);
-
-void kvm_lapic_switch_to_sw_timer(struct kvm_vcpu *vcpu)
-{
- struct kvm_lapic *apic = vcpu->arch.apic;
-
- /* Possibly the TSC deadline timer is not enabled yet */
- if (!apic->lapic_timer.hv_timer_in_use)
- return;
-
- cancel_hv_tscdeadline(apic);
-
- if (atomic_read(&apic->lapic_timer.pending))
- return;
-
- start_sw_tscdeadline(apic);
-}
-EXPORT_SYMBOL_GPL(kvm_lapic_switch_to_sw_timer);
-
static void start_apic_timer(struct kvm_lapic *apic)
{
ktime_t now;
@@ -1467,9 +1245,6 @@ static void start_apic_timer(struct kvm_lapic *apic)
apic->lapic_timer.period,
ktime_to_ns(ktime_add_ns(now,
apic->lapic_timer.period)));
- } else if (apic_lvtt_tscdeadline(apic)) {
- if (!(kvm_x86_ops->set_hv_timer && start_hv_tscdeadline(apic)))
- start_sw_tscdeadline(apic);
}
}
@@ -1492,8 +1267,6 @@ int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
{
int ret = 0;
- trace_kvm_apic_write(reg, val);
-
switch (reg) {
case APIC_ID: /* Local APIC ID */
if (!apic_x2apic_mode(apic))
@@ -1535,7 +1308,7 @@ int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
int i;
u32 lvt_val;
- for (i = 0; i < KVM_APIC_LVT_NUM; i++) {
+ for (i = 0; i < GVM_APIC_LVT_NUM; i++) {
lvt_val = kvm_lapic_get_reg(apic,
APIC_LVTT + 0x10 * i);
kvm_lapic_set_reg(apic, APIC_LVTT + 0x10 * i,
@@ -1583,9 +1356,6 @@ int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
break;
case APIC_TMICT:
- if (apic_lvtt_tscdeadline(apic))
- break;
-
hrtimer_cancel(&apic->lapic_timer.timer);
kvm_lapic_set_reg(apic, APIC_TMICT, val);
start_apic_timer(apic);
@@ -1593,14 +1363,14 @@ int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
case APIC_TDCR:
if (val & 4)
- apic_debug("KVM_WRITE:TDCR %x\n", val);
+ apic_debug("GVM_WRITE:TDCR %x\n", val);
kvm_lapic_set_reg(apic, APIC_TDCR, val);
update_divide_count(apic);
break;
case APIC_ESR:
if (apic_x2apic_mode(apic) && val != 0) {
- apic_debug("KVM_WRITE:ESR not zero %x\n", val);
+ apic_debug("GVM_WRITE:ESR not zero %x\n", val);
ret = 1;
}
break;
@@ -1619,7 +1389,6 @@ int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
apic_debug("Local APIC Write to read-only register %x\n", reg);
return ret;
}
-EXPORT_SYMBOL_GPL(kvm_lapic_reg_write);
static int apic_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *this,
gpa_t address, int len, const void *data)
@@ -1658,7 +1427,6 @@ void kvm_lapic_set_eoi(struct kvm_vcpu *vcpu)
{
kvm_lapic_reg_write(vcpu->arch.apic, APIC_EOI, 0);
}
-EXPORT_SYMBOL_GPL(kvm_lapic_set_eoi);
/* emulate APIC access in a trap manner */
void kvm_apic_write_nodecode(struct kvm_vcpu *vcpu, u32 offset)
@@ -1673,7 +1441,6 @@ void kvm_apic_write_nodecode(struct kvm_vcpu *vcpu, u32 offset)
/* TODO: optimize to just emulate side effect w/o one more write */
kvm_lapic_reg_write(vcpu->arch.apic, offset, val);
}
-EXPORT_SYMBOL_GPL(kvm_apic_write_nodecode);
void kvm_free_lapic(struct kvm_vcpu *vcpu)
{
@@ -1684,14 +1451,8 @@ void kvm_free_lapic(struct kvm_vcpu *vcpu)
hrtimer_cancel(&apic->lapic_timer.timer);
- if (!(vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE))
- static_key_slow_dec_deferred(&apic_hw_disabled);
-
- if (!apic->sw_enabled)
- static_key_slow_dec_deferred(&apic_sw_disabled);
-
if (apic->regs)
- free_page((unsigned long)apic->regs);
+ free_page((size_t)apic->regs);
kfree(apic);
}
@@ -1702,31 +1463,7 @@ void kvm_free_lapic(struct kvm_vcpu *vcpu)
*----------------------------------------------------------------------
*/
-u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu)
-{
- struct kvm_lapic *apic = vcpu->arch.apic;
-
- if (!lapic_in_kernel(vcpu) || apic_lvtt_oneshot(apic) ||
- apic_lvtt_period(apic))
- return 0;
-
- return apic->lapic_timer.tscdeadline;
-}
-
-void kvm_set_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu, u64 data)
-{
- struct kvm_lapic *apic = vcpu->arch.apic;
-
- if (!lapic_in_kernel(vcpu) || apic_lvtt_oneshot(apic) ||
- apic_lvtt_period(apic))
- return;
-
- hrtimer_cancel(&apic->lapic_timer.timer);
- apic->lapic_timer.tscdeadline = data;
- start_apic_timer(apic);
-}
-
-void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8)
+void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, size_t cr8)
{
struct kvm_lapic *apic = vcpu->arch.apic;
@@ -1760,9 +1497,7 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
if ((old_value ^ value) & MSR_IA32_APICBASE_ENABLE) {
if (value & MSR_IA32_APICBASE_ENABLE) {
kvm_apic_set_xapic_id(apic, vcpu->vcpu_id);
- static_key_slow_dec_deferred(&apic_hw_disabled);
} else {
- static_key_slow_inc(&apic_hw_disabled.key);
recalculate_apic_map(vcpu->kvm);
}
}
@@ -1780,7 +1515,7 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
if ((value & MSR_IA32_APICBASE_ENABLE) &&
apic->base_address != APIC_DEFAULT_PHYS_BASE)
- pr_warn_once("APIC base relocation is unsupported by KVM");
+ pr_warn_once("APIC base relocation is unsupported by kvm");
/* with FSB delivery interrupt, we can restart APIC functionality */
apic_debug("apic base msr is 0x%016" PRIx64 ", and base address is "
@@ -1809,10 +1544,10 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event)
}
kvm_apic_set_version(apic->vcpu);
- for (i = 0; i < KVM_APIC_LVT_NUM; i++)
+ for (i = 0; i < GVM_APIC_LVT_NUM; i++)
kvm_lapic_set_reg(apic, APIC_LVTT + 0x10 * i, APIC_LVT_MASKED);
apic_update_lvtt(apic);
- if (kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_LINT0_REENABLED))
+ if (kvm_check_has_quirk(vcpu->kvm, GVM_X86_QUIRK_LINT0_REENABLED))
kvm_lapic_set_reg(apic, APIC_LVT0,
SET_APIC_DELIVERY_MODE(0, APIC_MODE_EXTINT));
apic_manage_nmi_watchdog(apic, kvm_lapic_get_reg(apic, APIC_LVT0));
@@ -1840,7 +1575,6 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event)
if (kvm_vcpu_is_bsp(vcpu))
kvm_lapic_set_base(vcpu,
vcpu->arch.apic_base | MSR_IA32_APICBASE_BSP);
- vcpu->arch.pv_eoi.msr_val = 0;
apic_update_ppr(apic);
vcpu->arch.apic_arb_prio = 0;
@@ -1945,7 +1679,6 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu)
* thinking that APIC satet has changed.
*/
vcpu->arch.apic_base = MSR_IA32_APICBASE_ENABLE;
- static_key_slow_inc(&apic_sw_disabled.key); /* sw disabled at reset */
kvm_lapic_reset(vcpu, false);
kvm_iodevice_init(&apic->dev, &apic_mmio_ops);
@@ -1991,8 +1724,6 @@ void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu)
if (atomic_read(&apic->lapic_timer.pending) > 0) {
kvm_apic_local_deliver(apic, APIC_LVTT);
- if (apic_lvtt_tscdeadline(apic))
- apic->lapic_timer.tscdeadline = 0;
atomic_set(&apic->lapic_timer.pending, 0);
}
}
@@ -2016,11 +1747,6 @@ int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu)
apic_update_ppr(apic);
apic_clear_irr(vector, apic);
- if (test_bit(vector, vcpu_to_synic(vcpu)->auto_eoi_bitmap)) {
- apic_clear_isr(vector, apic);
- apic_update_ppr(apic);
- }
-
return vector;
}
@@ -2086,7 +1812,7 @@ int kvm_apic_set_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s)
kvm_x86_ops->hwapic_isr_update(vcpu,
apic_find_highest_isr(apic));
}
- kvm_make_request(KVM_REQ_EVENT, vcpu);
+ kvm_make_request(GVM_REQ_EVENT, vcpu);
if (ioapic_in_kernel(vcpu->kvm))
kvm_rtc_eoi_tracking_restore_one(vcpu);
@@ -2095,63 +1821,11 @@ int kvm_apic_set_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s)
return 0;
}
-void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu)
-{
- struct hrtimer *timer;
-
- if (!lapic_in_kernel(vcpu))
- return;
-
- timer = &vcpu->arch.apic->lapic_timer.timer;
- if (hrtimer_cancel(timer))
- hrtimer_start_expires(timer, HRTIMER_MODE_ABS_PINNED);
-}
-
-/*
- * apic_sync_pv_eoi_from_guest - called on vmexit or cancel interrupt
- *
- * Detect whether guest triggered PV EOI since the
- * last entry. If yes, set EOI on guests's behalf.
- * Clear PV EOI in guest memory in any case.
- */
-static void apic_sync_pv_eoi_from_guest(struct kvm_vcpu *vcpu,
- struct kvm_lapic *apic)
-{
- bool pending;
- int vector;
- /*
- * PV EOI state is derived from KVM_APIC_PV_EOI_PENDING in host
- * and KVM_PV_EOI_ENABLED in guest memory as follows:
- *
- * KVM_APIC_PV_EOI_PENDING is unset:
- * -> host disabled PV EOI.
- * KVM_APIC_PV_EOI_PENDING is set, KVM_PV_EOI_ENABLED is set:
- * -> host enabled PV EOI, guest did not execute EOI yet.
- * KVM_APIC_PV_EOI_PENDING is set, KVM_PV_EOI_ENABLED is unset:
- * -> host enabled PV EOI, guest executed EOI.
- */
- BUG_ON(!pv_eoi_enabled(vcpu));
- pending = pv_eoi_get_pending(vcpu);
- /*
- * Clear pending bit in any case: it will be set again on vmentry.
- * While this might not be ideal from performance point of view,
- * this makes sure pv eoi is only enabled when we know it's safe.
- */
- pv_eoi_clr_pending(vcpu);
- if (pending)
- return;
- vector = apic_set_eoi(apic);
- trace_kvm_pv_eoi(apic, vector);
-}
-
void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu)
{
u32 data;
- if (test_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention))
- apic_sync_pv_eoi_from_guest(vcpu, vcpu->arch.apic);
-
- if (!test_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention))
+ if (!test_bit(GVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention))
return;
if (kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.apic->vapic_cache, &data,
@@ -2161,41 +1835,13 @@ void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu)
apic_set_tpr(vcpu->arch.apic, data & 0xff);
}
-/*
- * apic_sync_pv_eoi_to_guest - called before vmentry
- *
- * Detect whether it's safe to enable PV EOI and
- * if yes do so.
- */
-static void apic_sync_pv_eoi_to_guest(struct kvm_vcpu *vcpu,
- struct kvm_lapic *apic)
-{
- if (!pv_eoi_enabled(vcpu) ||
- /* IRR set or many bits in ISR: could be nested. */
- apic->irr_pending ||
- /* Cache not set: could be safe but we don't bother. */
- apic->highest_isr_cache == -1 ||
- /* Need EOI to update ioapic. */
- kvm_ioapic_handles_vector(apic, apic->highest_isr_cache)) {
- /*
- * PV EOI was disabled by apic_sync_pv_eoi_from_guest
- * so we need not do anything here.
- */
- return;
- }
-
- pv_eoi_set_pending(apic->vcpu);
-}
-
void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu)
{
u32 data, tpr;
int max_irr, max_isr;
struct kvm_lapic *apic = vcpu->arch.apic;
- apic_sync_pv_eoi_to_guest(vcpu, apic);
-
- if (!test_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention))
+ if (!test_bit(GVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention))
return;
tpr = kvm_lapic_get_reg(apic, APIC_TASKPRI) & 0xff;
@@ -2218,9 +1864,9 @@ int kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr)
&vcpu->arch.apic->vapic_cache,
vapic_addr, sizeof(u32)))
return -EINVAL;
- __set_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention);
+ __set_bit(GVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention);
} else {
- __clear_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention);
+ __clear_bit(GVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention);
}
vcpu->arch.apic->vapic_addr = vapic_addr;
@@ -2253,7 +1899,7 @@ int kvm_x2apic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data)
return 1;
if (reg == APIC_DFR || reg == APIC_ICR2) {
- apic_debug("KVM_APIC_READ: read x2apic reserved register %x\n",
+ apic_debug("GVM_APIC_READ: read x2apic reserved register %x\n",
reg);
return 1;
}
@@ -2268,95 +1914,48 @@ int kvm_x2apic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data)
return 0;
}
-int kvm_hv_vapic_msr_write(struct kvm_vcpu *vcpu, u32 reg, u64 data)
-{
- struct kvm_lapic *apic = vcpu->arch.apic;
-
- if (!lapic_in_kernel(vcpu))
- return 1;
-
- /* if this is ICR write vector before command */
- if (reg == APIC_ICR)
- kvm_lapic_reg_write(apic, APIC_ICR2, (u32)(data >> 32));
- return kvm_lapic_reg_write(apic, reg, (u32)data);
-}
-
-int kvm_hv_vapic_msr_read(struct kvm_vcpu *vcpu, u32 reg, u64 *data)
-{
- struct kvm_lapic *apic = vcpu->arch.apic;
- u32 low, high = 0;
-
- if (!lapic_in_kernel(vcpu))
- return 1;
-
- if (kvm_lapic_reg_read(apic, reg, 4, &low))
- return 1;
- if (reg == APIC_ICR)
- kvm_lapic_reg_read(apic, APIC_ICR2, 4, &high);
-
- *data = (((u64)high) << 32) | low;
-
- return 0;
-}
-
-int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data)
-{
- u64 addr = data & ~KVM_MSR_ENABLED;
- if (!IS_ALIGNED(addr, 4))
- return 1;
-
- vcpu->arch.pv_eoi.msr_val = data;
- if (!pv_eoi_enabled(vcpu))
- return 0;
- return kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.pv_eoi.data,
- addr, sizeof(u8));
-}
-
void kvm_apic_accept_events(struct kvm_vcpu *vcpu)
{
struct kvm_lapic *apic = vcpu->arch.apic;
u8 sipi_vector;
- unsigned long pe;
+ size_t pe;
if (!lapic_in_kernel(vcpu) || !apic->pending_events)
return;
/*
* INITs are latched while in SMM. Because an SMM CPU cannot
- * be in KVM_MP_STATE_INIT_RECEIVED state, just eat SIPIs
+ * be in GVM_MP_STATE_INIT_RECEIVED state, just eat SIPIs
* and delay processing of INIT until the next RSM.
*/
if (is_smm(vcpu)) {
- WARN_ON_ONCE(vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED);
- if (test_bit(KVM_APIC_SIPI, &apic->pending_events))
- clear_bit(KVM_APIC_SIPI, &apic->pending_events);
+ WARN_ON_ONCE(vcpu->arch.mp_state == GVM_MP_STATE_INIT_RECEIVED);
+ if (test_bit(GVM_APIC_SIPI, &apic->pending_events))
+ clear_bit(GVM_APIC_SIPI, &apic->pending_events);
return;
}
pe = xchg(&apic->pending_events, 0);
- if (test_bit(KVM_APIC_INIT, &pe)) {
+ if (test_bit(GVM_APIC_INIT, &pe)) {
kvm_lapic_reset(vcpu, true);
kvm_vcpu_reset(vcpu, true);
if (kvm_vcpu_is_bsp(apic->vcpu))
- vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
+ vcpu->arch.mp_state = GVM_MP_STATE_RUNNABLE;
else
- vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED;
+ vcpu->arch.mp_state = GVM_MP_STATE_INIT_RECEIVED;
}
- if (test_bit(KVM_APIC_SIPI, &pe) &&
- vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) {
+ if (test_bit(GVM_APIC_SIPI, &pe) &&
+ vcpu->arch.mp_state == GVM_MP_STATE_INIT_RECEIVED) {
/* evaluate pending_events before reading the vector */
smp_rmb();
sipi_vector = apic->sipi_vector;
apic_debug("vcpu %d received sipi with vector # %x\n",
vcpu->vcpu_id, sipi_vector);
kvm_vcpu_deliver_sipi_vector(vcpu, sipi_vector);
- vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
+ vcpu->arch.mp_state = GVM_MP_STATE_RUNNABLE;
}
}
void kvm_lapic_init(void)
{
- /* do not patch jump label more than once per second */
- jump_label_rate_limit(&apic_hw_disabled, HZ);
- jump_label_rate_limit(&apic_sw_disabled, HZ);
}
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index f60d01c..ffbed39 100644..100755
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -1,3 +1,7 @@
+/*
+ * Copyright 2019 Google LLC
+ */
+
#ifndef __KVM_X86_LAPIC_H
#define __KVM_X86_LAPIC_H
@@ -5,26 +9,31 @@
#include <linux/kvm_host.h>
-#define KVM_APIC_INIT 0
-#define KVM_APIC_SIPI 1
-#define KVM_APIC_LVT_NUM 6
+#include <ntkrutils.h>
+#include <asm/apicdef.h>
+#include <asm/msr-index.h>
+#include <gvm_types.h>
+#include <ntkrutils.h>
+
+#define GVM_APIC_INIT 0
+#define GVM_APIC_SIPI 1
+#define GVM_APIC_LVT_NUM 6
+
+#define GVM_APIC_SHORT_MASK 0xc0000
+#define GVM_APIC_DEST_MASK 0x800
-#define KVM_APIC_SHORT_MASK 0xc0000
-#define KVM_APIC_DEST_MASK 0x800
+#define u32 unsigned int
struct kvm_timer {
struct hrtimer timer;
s64 period; /* unit: ns */
u32 timer_mode;
u32 timer_mode_mask;
- u64 tscdeadline;
- u64 expired_tscdeadline;
atomic_t pending; /* accumulated triggered timers */
- bool hv_timer_in_use;
};
struct kvm_lapic {
- unsigned long base_address;
+ size_t base_address;
struct kvm_io_device dev;
struct kvm_timer lapic_timer;
u32 divide_count;
@@ -41,10 +50,10 @@ struct kvm_lapic {
* the guest 1:1, because it is accessed by the vmx microcode.
* Note: Only one register, the TPR, is used by the microcode.
*/
- void *regs;
+ u8 *regs;
gpa_t vapic_addr;
struct gfn_to_hva_cache vapic_cache;
- unsigned long pending_events;
+ size_t pending_events;
unsigned int sipi_vector;
};
@@ -59,7 +68,7 @@ int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu);
void kvm_apic_accept_events(struct kvm_vcpu *vcpu);
void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event);
u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu);
-void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8);
+void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, size_t cr8);
void kvm_lapic_set_eoi(struct kvm_vcpu *vcpu);
void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value);
u64 kvm_lapic_get_base(struct kvm_vcpu *vcpu);
@@ -85,9 +94,6 @@ int kvm_apic_get_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s);
int kvm_apic_set_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s);
int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu);
-u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu);
-void kvm_set_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu, u64 data);
-
void kvm_apic_write_nodecode(struct kvm_vcpu *vcpu, u32 offset);
void kvm_apic_set_eoi_accelerated(struct kvm_vcpu *vcpu, int vector);
@@ -98,15 +104,6 @@ void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu);
int kvm_x2apic_msr_write(struct kvm_vcpu *vcpu, u32 msr, u64 data);
int kvm_x2apic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data);
-int kvm_hv_vapic_msr_write(struct kvm_vcpu *vcpu, u32 msr, u64 data);
-int kvm_hv_vapic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data);
-
-static inline bool kvm_hv_vapic_assist_page_enabled(struct kvm_vcpu *vcpu)
-{
- return vcpu->arch.hyperv.hv_vapic & HV_X64_MSR_APIC_ASSIST_PAGE_ENABLE;
-}
-
-int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data);
void kvm_lapic_init(void);
#define VEC_POS(v) ((v) & (32 - 1))
@@ -114,12 +111,12 @@ void kvm_lapic_init(void);
static inline void kvm_lapic_set_vector(int vec, void *bitmap)
{
- set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
+ set_bit(VEC_POS(vec), (size_t *)((u8 *)(bitmap) + REG_POS(vec)));
}
static inline void kvm_lapic_set_irr(int vec, struct kvm_lapic *apic)
{
- kvm_lapic_set_vector(vec, apic->regs + APIC_IRR);
+ kvm_lapic_set_vector(vec, (unsigned char *)apic->regs + APIC_IRR);
/*
* irr_pending must be true if any interrupt is pending; set it after
* APIC_IRR to avoid race with apic_clear_irr
@@ -129,39 +126,27 @@ static inline void kvm_lapic_set_irr(int vec, struct kvm_lapic *apic)
static inline u32 kvm_lapic_get_reg(struct kvm_lapic *apic, int reg_off)
{
- return *((u32 *) (apic->regs + reg_off));
+ return *((u32 *) ((unsigned char *)apic->regs + reg_off));
}
static inline void kvm_lapic_set_reg(struct kvm_lapic *apic, int reg_off, u32 val)
{
- *((u32 *) (apic->regs + reg_off)) = val;
+ *((u32 *) ((unsigned char *)apic->regs + reg_off)) = val;
}
-extern struct static_key kvm_no_apic_vcpu;
-
static inline bool lapic_in_kernel(struct kvm_vcpu *vcpu)
{
- if (static_key_false(&kvm_no_apic_vcpu))
- return vcpu->arch.apic;
- return true;
+ return vcpu->arch.apic;
}
-extern struct static_key_deferred apic_hw_disabled;
-
static inline int kvm_apic_hw_enabled(struct kvm_lapic *apic)
{
- if (static_key_false(&apic_hw_disabled.key))
- return apic->vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE;
- return MSR_IA32_APICBASE_ENABLE;
+ return apic->vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE;
}
-extern struct static_key_deferred apic_sw_disabled;
-
static inline bool kvm_apic_sw_enabled(struct kvm_lapic *apic)
{
- if (static_key_false(&apic_sw_disabled.key))
- return apic->sw_enabled;
- return true;
+ return apic->sw_enabled;
}
static inline bool kvm_apic_present(struct kvm_vcpu *vcpu)
@@ -197,7 +182,7 @@ static inline bool kvm_lowest_prio_delivery(struct kvm_lapic_irq *irq)
static inline int kvm_lapic_latched_init(struct kvm_vcpu *vcpu)
{
- return lapic_in_kernel(vcpu) && test_bit(KVM_APIC_INIT, &vcpu->arch.apic->pending_events);
+ return lapic_in_kernel(vcpu) && test_bit(GVM_APIC_INIT, &vcpu->arch.apic->pending_events);
}
static inline u32 kvm_apic_id(struct kvm_lapic *apic)
@@ -213,14 +198,8 @@ static inline u32 kvm_apic_id(struct kvm_lapic *apic)
bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector);
-void wait_lapic_expire(struct kvm_vcpu *vcpu);
-
bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, struct kvm_lapic_irq *irq,
struct kvm_vcpu **dest_vcpu);
int kvm_vector_to_index(u32 vector, u32 dest_vcpus,
- const unsigned long *bitmap, u32 bitmap_size);
-void kvm_lapic_switch_to_sw_timer(struct kvm_vcpu *vcpu);
-void kvm_lapic_switch_to_hv_timer(struct kvm_vcpu *vcpu);
-void kvm_lapic_expired_hv_timer(struct kvm_vcpu *vcpu);
-bool kvm_lapic_hv_timer_in_use(struct kvm_vcpu *vcpu);
+ const size_t *bitmap, u32 bitmap_size);
#endif
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index d9c7e98..e183d24 100644..100755
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -8,6 +8,7 @@
*
* Copyright (C) 2006 Qumranet, Inc.
* Copyright 2010 Red Hat, Inc. and/or its affiliates.
+ * Copyright 2019 Google LLC
*
* Authors:
* Yaniv Kamay <yaniv@qumranet.com>
@@ -23,27 +24,12 @@
#include "x86.h"
#include "kvm_cache_regs.h"
#include "cpuid.h"
+#include <linux/list.h>
#include <linux/kvm_host.h>
-#include <linux/types.h>
-#include <linux/string.h>
-#include <linux/mm.h>
-#include <linux/highmem.h>
-#include <linux/moduleparam.h>
-#include <linux/export.h>
-#include <linux/swap.h>
-#include <linux/hugetlb.h>
-#include <linux/compiler.h>
-#include <linux/srcu.h>
-#include <linux/slab.h>
-#include <linux/uaccess.h>
-
-#include <asm/page.h>
-#include <asm/cmpxchg.h>
-#include <asm/io.h>
-#include <asm/vmx.h>
#include <asm/kvm_page_track.h>
+#pragma warning(disable : 4221)
/*
* When setting this variable to true it enables Two-Dimensional-Paging
* where the hardware walks 2 page tables:
@@ -51,7 +37,7 @@
* 2. while doing 1. it walks guest-physical to host-physical
* If the hardware supports that we don't need to do shadow paging.
*/
-bool tdp_enabled = false;
+bool tdp_enabled = true;
enum {
AUDIT_PRE_PAGE_FAULT,
@@ -72,8 +58,8 @@ module_param(dbg, bool, 0644);
#define rmap_printk(x...) do { if (dbg) printk(x); } while (0)
#define MMU_WARN_ON(x) WARN_ON(x)
#else
-#define pgprintk(x...) do { } while (0)
-#define rmap_printk(x...) do { } while (0)
+#define pgprintk(x,...) do { } while (0)
+#define rmap_printk(x,...) do { } while (0)
#define MMU_WARN_ON(x) do { } while (0)
#endif
@@ -129,11 +115,6 @@ module_param(dbg, bool, 0644);
#define ACC_USER_MASK PT_USER_MASK
#define ACC_ALL (ACC_EXEC_MASK | ACC_WRITE_MASK | ACC_USER_MASK)
-#include <trace/events/kvm.h>
-
-#define CREATE_TRACE_POINTS
-#include "mmutrace.h"
-
#define SPTE_HOST_WRITEABLE (1ULL << PT_FIRST_AVAIL_BITS_SHIFT)
#define SPTE_MMU_WRITEABLE (1ULL << (PT_FIRST_AVAIL_BITS_SHIFT + 1))
@@ -162,13 +143,13 @@ struct kvm_shadow_walk_iterator {
#define for_each_shadow_entry_lockless(_vcpu, _addr, _walker, spte) \
for (shadow_walk_init(&(_walker), _vcpu, _addr); \
- shadow_walk_okay(&(_walker)) && \
- ({ spte = mmu_spte_get_lockless(_walker.sptep); 1; }); \
+ shadow_walk_okay(&(_walker)); \
__shadow_walk_next(&(_walker), spte))
-static struct kmem_cache *pte_list_desc_cache;
-static struct kmem_cache *mmu_page_header_cache;
-static struct percpu_counter kvm_total_used_mmu_pages;
+// todo-001
+//static struct kmem_cache *pte_list_desc_cache;
+//static struct kmem_cache *mmu_page_header_cache;
+//static struct percpu_counter kvm_total_used_mmu_pages;
static u64 __read_mostly shadow_nx_mask;
static u64 __read_mostly shadow_x_mask; /* mutual exclusive with nx_mask */
@@ -178,6 +159,60 @@ static u64 __read_mostly shadow_dirty_mask;
static u64 __read_mostly shadow_mmio_mask;
static u64 __read_mostly shadow_present_mask;
+#ifdef CONFIG_X86_64
+typedef u64 phys_addr_t;
+#define __PHYSICAL_MASK_SHIFT 46
+#endif
+/* PAGE_SHIFT determines the page size */
+#ifndef PAGE_SIZE
+#define PAGE_SHIFT 12
+#define PAGE_SIZE (_AC(1,UL) << PAGE_SHIFT)
+#define PAGE_MASK (~(PAGE_SIZE-1))
+#endif
+
+#define PMD_PAGE_SIZE (_AC(1, UL) << PMD_SHIFT)
+#define PMD_PAGE_MASK (~(PMD_PAGE_SIZE-1))
+
+#define PUD_PAGE_SIZE (_AC(1, UL) << PUD_SHIFT)
+#define PUD_PAGE_MASK (~(PUD_PAGE_SIZE-1))
+
+#define __PHYSICAL_MASK ((phys_addr_t)((1ULL << __PHYSICAL_MASK_SHIFT) - 1))
+#define __VIRTUAL_MASK ((1ULL << __VIRTUAL_MASK_SHIFT) - 1)
+
+/* Cast *PAGE_MASK to a signed type so that it is sign-extended if
+virtual addresses are 32-bits but physical addresses are larger
+(ie, 32-bit PAE). */
+#define PHYSICAL_PAGE_MASK (((ssize_t)PAGE_MASK) & __PHYSICAL_MASK)
+#define PHYSICAL_PMD_PAGE_MASK (((ssize_t)PMD_PAGE_MASK) & __PHYSICAL_MASK)
+#define PHYSICAL_PUD_PAGE_MASK (((ssize_t)PUD_PAGE_MASK) & __PHYSICAL_MASK)
+
+/* Extracts the PFN from a (pte|pmd|pud|pgd)val_t of a 4KB page */
+#define PTE_PFN_MASK ((pteval_t)PHYSICAL_PAGE_MASK)
+
+/*
+* Extracts the flags from a (pte|pmd|pud|pgd)val_t
+* This includes the protection key value.
+*/
+#define PTE_FLAGS_MASK (~PTE_PFN_MASK)
+
+#define pte_val(pte) (pte.pte)
+
+static pteval_t pte_flags(pte_t pte)
+{
+ return pte_val(pte) & PTE_FLAGS_MASK;
+}
+
+static size_t pte_pfn(pte_t pte)
+{
+ return (pte_val(pte)& PTE_PFN_MASK) >> PAGE_SHIFT;
+}
+
+static int pte_write(pte_t pte)
+{
+ return pte_flags(pte) & _PAGE_RW;
+}
+
+
static void mmu_spte_set(u64 *sptep, u64 spte);
static void mmu_free_roots(struct kvm_vcpu *vcpu);
@@ -185,7 +220,6 @@ void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask)
{
shadow_mmio_mask = mmio_mask;
}
-EXPORT_SYMBOL_GPL(kvm_mmu_set_mmio_spte_mask);
/*
* the low bit of the generation number is always presumed to be zero.
@@ -240,7 +274,6 @@ static void mark_mmio_spte(struct kvm_vcpu *vcpu, u64 *sptep, u64 gfn,
access &= ACC_WRITE_MASK | ACC_USER_MASK;
mask |= shadow_mmio_mask | access | gfn << PAGE_SHIFT;
- trace_mark_mmio_spte(sptep, gfn, access, gen);
mmu_spte_set(sptep, mask);
}
@@ -279,7 +312,6 @@ static bool check_mmio_spte(struct kvm_vcpu *vcpu, u64 spte)
kvm_gen = kvm_current_mmio_generation(vcpu);
spte_gen = get_mmio_spte_generation(spte);
- trace_check_mmio_spte(spte, kvm_gen, spte_gen);
return likely(kvm_gen == spte_gen);
}
@@ -293,7 +325,6 @@ void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
shadow_x_mask = x_mask;
shadow_present_mask = p_mask;
}
-EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes);
static int is_cpuid_PSE36(void)
{
@@ -354,7 +385,9 @@ static u64 __update_clear_spte_slow(u64 *sptep, u64 spte)
static u64 __get_spte_lockless(u64 *sptep)
{
- return ACCESS_ONCE(*sptep);
+ u64 temp;
+ ACCESS_ONCE(*sptep, temp);
+ return temp;
}
#else
union split_spte {
@@ -561,12 +594,6 @@ static bool mmu_spte_update(u64 *sptep, u64 new_spte)
ret = true;
if (!shadow_accessed_mask) {
- /*
- * We don't set page dirty when dropping non-writable spte.
- * So do it now if the new spte is becoming non-writable.
- */
- if (ret)
- kvm_set_pfn_dirty(spte_to_pfn(old_spte));
return ret;
}
@@ -578,11 +605,6 @@ static bool mmu_spte_update(u64 *sptep, u64 new_spte)
shadow_accessed_mask | shadow_dirty_mask))
ret = true;
- if (spte_is_bit_cleared(old_spte, new_spte, shadow_accessed_mask))
- kvm_set_pfn_accessed(spte_to_pfn(old_spte));
- if (spte_is_bit_cleared(old_spte, new_spte, shadow_dirty_mask))
- kvm_set_pfn_dirty(spte_to_pfn(old_spte));
-
return ret;
}
@@ -607,17 +629,12 @@ static int mmu_spte_clear_track_bits(u64 *sptep)
pfn = spte_to_pfn(old_spte);
/*
- * KVM does not hold the refcount of the page used by
+ * kvm does not hold the refcount of the page used by
* kvm mmu, before reclaiming the page, we should
* unmap it from mmu first.
*/
WARN_ON(!kvm_is_reserved_pfn(pfn) && !page_count(pfn_to_page(pfn)));
- if (!shadow_accessed_mask || old_spte & shadow_accessed_mask)
- kvm_set_pfn_accessed(pfn);
- if (old_spte & (shadow_dirty_mask ? shadow_dirty_mask :
- PT_WRITABLE_MASK))
- kvm_set_pfn_dirty(pfn);
return 1;
}
@@ -663,14 +680,14 @@ static void walk_shadow_page_lockless_end(struct kvm_vcpu *vcpu)
}
static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache,
- struct kmem_cache *base_cache, int min)
+ size_t cache_size, int min)
{
void *obj;
if (cache->nobjs >= min)
return 0;
while (cache->nobjs < ARRAY_SIZE(cache->objects)) {
- obj = kmem_cache_zalloc(base_cache, GFP_KERNEL);
+ obj = kzalloc_fast(cache_size, GFP_KERNEL);
if (!obj)
return -ENOMEM;
cache->objects[cache->nobjs++] = obj;
@@ -683,11 +700,10 @@ static int mmu_memory_cache_free_objects(struct kvm_mmu_memory_cache *cache)
return cache->nobjs;
}
-static void mmu_free_memory_cache(struct kvm_mmu_memory_cache *mc,
- struct kmem_cache *cache)
+static void mmu_free_memory_cache(struct kvm_mmu_memory_cache *mc)
{
while (mc->nobjs)
- kmem_cache_free(cache, mc->objects[--mc->nobjs]);
+ kfree_fast(mc->objects[--mc->nobjs]);
}
static int mmu_topup_memory_cache_page(struct kvm_mmu_memory_cache *cache,
@@ -709,7 +725,7 @@ static int mmu_topup_memory_cache_page(struct kvm_mmu_memory_cache *cache,
static void mmu_free_memory_cache_page(struct kvm_mmu_memory_cache *mc)
{
while (mc->nobjs)
- free_page((unsigned long)mc->objects[--mc->nobjs]);
+ free_page((size_t)mc->objects[--mc->nobjs]);
}
static int mmu_topup_memory_caches(struct kvm_vcpu *vcpu)
@@ -717,25 +733,23 @@ static int mmu_topup_memory_caches(struct kvm_vcpu *vcpu)
int r;
r = mmu_topup_memory_cache(&vcpu->arch.mmu_pte_list_desc_cache,
- pte_list_desc_cache, 8 + PTE_PREFETCH_NUM);
+ sizeof(struct pte_list_desc), 8 + PTE_PREFETCH_NUM);
if (r)
goto out;
r = mmu_topup_memory_cache_page(&vcpu->arch.mmu_page_cache, 8);
if (r)
goto out;
r = mmu_topup_memory_cache(&vcpu->arch.mmu_page_header_cache,
- mmu_page_header_cache, 4);
+ sizeof(struct kvm_mmu_page), 4);
out:
return r;
}
static void mmu_free_memory_caches(struct kvm_vcpu *vcpu)
{
- mmu_free_memory_cache(&vcpu->arch.mmu_pte_list_desc_cache,
- pte_list_desc_cache);
+ mmu_free_memory_cache(&vcpu->arch.mmu_pte_list_desc_cache);
mmu_free_memory_cache_page(&vcpu->arch.mmu_page_cache);
- mmu_free_memory_cache(&vcpu->arch.mmu_page_header_cache,
- mmu_page_header_cache);
+ mmu_free_memory_cache(&vcpu->arch.mmu_page_header_cache);
}
static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc)
@@ -754,7 +768,7 @@ static struct pte_list_desc *mmu_alloc_pte_list_desc(struct kvm_vcpu *vcpu)
static void mmu_free_pte_list_desc(struct pte_list_desc *pte_list_desc)
{
- kmem_cache_free(pte_list_desc_cache, pte_list_desc);
+ kfree_fast(pte_list_desc);
}
static gfn_t kvm_mmu_page_get_gfn(struct kvm_mmu_page *sp, int index)
@@ -773,43 +787,6 @@ static void kvm_mmu_page_set_gfn(struct kvm_mmu_page *sp, int index, gfn_t gfn)
sp->gfns[index] = gfn;
}
-/*
- * Return the pointer to the large page information for a given gfn,
- * handling slots that are not large page aligned.
- */
-static struct kvm_lpage_info *lpage_info_slot(gfn_t gfn,
- struct kvm_memory_slot *slot,
- int level)
-{
- unsigned long idx;
-
- idx = gfn_to_index(gfn, slot->base_gfn, level);
- return &slot->arch.lpage_info[level - 2][idx];
-}
-
-static void update_gfn_disallow_lpage_count(struct kvm_memory_slot *slot,
- gfn_t gfn, int count)
-{
- struct kvm_lpage_info *linfo;
- int i;
-
- for (i = PT_DIRECTORY_LEVEL; i <= PT_MAX_HUGEPAGE_LEVEL; ++i) {
- linfo = lpage_info_slot(gfn, slot, i);
- linfo->disallow_lpage += count;
- WARN_ON(linfo->disallow_lpage < 0);
- }
-}
-
-void kvm_mmu_gfn_disallow_lpage(struct kvm_memory_slot *slot, gfn_t gfn)
-{
- update_gfn_disallow_lpage_count(slot, gfn, 1);
-}
-
-void kvm_mmu_gfn_allow_lpage(struct kvm_memory_slot *slot, gfn_t gfn)
-{
- update_gfn_disallow_lpage_count(slot, gfn, -1);
-}
-
static void account_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp)
{
struct kvm_memslots *slots;
@@ -823,10 +800,8 @@ static void account_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp)
/* the non-leaf shadow pages are keeping readonly. */
if (sp->role.level > PT_PAGE_TABLE_LEVEL)
- return kvm_slot_page_track_add_page(kvm, slot, gfn,
+ kvm_slot_page_track_add_page(kvm, slot, gfn,
KVM_PAGE_TRACK_WRITE);
-
- kvm_mmu_gfn_disallow_lpage(slot, gfn);
}
static void unaccount_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp)
@@ -840,55 +815,20 @@ static void unaccount_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp)
slots = kvm_memslots_for_spte_role(kvm, sp->role);
slot = __gfn_to_memslot(slots, gfn);
if (sp->role.level > PT_PAGE_TABLE_LEVEL)
- return kvm_slot_page_track_remove_page(kvm, slot, gfn,
+ kvm_slot_page_track_remove_page(kvm, slot, gfn,
KVM_PAGE_TRACK_WRITE);
-
- kvm_mmu_gfn_allow_lpage(slot, gfn);
-}
-
-static bool __mmu_gfn_lpage_is_disallowed(gfn_t gfn, int level,
- struct kvm_memory_slot *slot)
-{
- struct kvm_lpage_info *linfo;
-
- if (slot) {
- linfo = lpage_info_slot(gfn, slot, level);
- return !!linfo->disallow_lpage;
- }
-
- return true;
}
static bool mmu_gfn_lpage_is_disallowed(struct kvm_vcpu *vcpu, gfn_t gfn,
int level)
{
- struct kvm_memory_slot *slot;
-
- slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
- return __mmu_gfn_lpage_is_disallowed(gfn, level, slot);
-}
-
-static int host_mapping_level(struct kvm *kvm, gfn_t gfn)
-{
- unsigned long page_size;
- int i, ret = 0;
-
- page_size = kvm_host_page_size(kvm, gfn);
-
- for (i = PT_PAGE_TABLE_LEVEL; i <= PT_MAX_HUGEPAGE_LEVEL; ++i) {
- if (page_size >= KVM_HPAGE_SIZE(i))
- ret = i;
- else
- break;
- }
-
- return ret;
+ return true;
}
static inline bool memslot_valid_for_gpte(struct kvm_memory_slot *slot,
bool no_dirty_log)
{
- if (!slot || slot->flags & KVM_MEMSLOT_INVALID)
+ if (!slot || slot->flags & GVM_MEMSLOT_INVALID)
return false;
if (no_dirty_log && slot->dirty_bitmap)
return false;
@@ -912,29 +852,7 @@ gfn_to_memslot_dirty_bitmap(struct kvm_vcpu *vcpu, gfn_t gfn,
static int mapping_level(struct kvm_vcpu *vcpu, gfn_t large_gfn,
bool *force_pt_level)
{
- int host_level, level, max_level;
- struct kvm_memory_slot *slot;
-
- if (unlikely(*force_pt_level))
- return PT_PAGE_TABLE_LEVEL;
-
- slot = kvm_vcpu_gfn_to_memslot(vcpu, large_gfn);
- *force_pt_level = !memslot_valid_for_gpte(slot, true);
- if (unlikely(*force_pt_level))
- return PT_PAGE_TABLE_LEVEL;
-
- host_level = host_mapping_level(vcpu->kvm, large_gfn);
-
- if (host_level == PT_PAGE_TABLE_LEVEL)
- return host_level;
-
- max_level = min(kvm_x86_ops->get_lpage_level(), host_level);
-
- for (level = PT_DIRECTORY_LEVEL; level <= max_level; ++level)
- if (__mmu_gfn_lpage_is_disallowed(large_gfn, level, slot))
- break;
-
- return level - 1;
+ return PT_PAGE_TABLE_LEVEL;
}
/*
@@ -956,17 +874,17 @@ static int pte_list_add(struct kvm_vcpu *vcpu, u64 *spte,
if (!rmap_head->val) {
rmap_printk("pte_list_add: %p %llx 0->1\n", spte, *spte);
- rmap_head->val = (unsigned long)spte;
+ rmap_head->val = (size_t)spte;
} else if (!(rmap_head->val & 1)) {
rmap_printk("pte_list_add: %p %llx 1->many\n", spte, *spte);
desc = mmu_alloc_pte_list_desc(vcpu);
desc->sptes[0] = (u64 *)rmap_head->val;
desc->sptes[1] = spte;
- rmap_head->val = (unsigned long)desc | 1;
+ rmap_head->val = (size_t)desc | 1;
++count;
} else {
rmap_printk("pte_list_add: %p %llx many->many\n", spte, *spte);
- desc = (struct pte_list_desc *)(rmap_head->val & ~1ul);
+ desc = (struct pte_list_desc *)(rmap_head->val & ~1ull);
while (desc->sptes[PTE_LIST_EXT-1] && desc->more) {
desc = desc->more;
count += PTE_LIST_EXT;
@@ -996,12 +914,12 @@ pte_list_desc_remove_entry(struct kvm_rmap_head *rmap_head,
if (j != 0)
return;
if (!prev_desc && !desc->more)
- rmap_head->val = (unsigned long)desc->sptes[0];
+ rmap_head->val = (size_t)desc->sptes[0];
else
if (prev_desc)
prev_desc->more = desc->more;
else
- rmap_head->val = (unsigned long)desc->more | 1;
+ rmap_head->val = (size_t)desc->more | 1;
mmu_free_pte_list_desc(desc);
}
@@ -1023,7 +941,7 @@ static void pte_list_remove(u64 *spte, struct kvm_rmap_head *rmap_head)
rmap_head->val = 0;
} else {
rmap_printk("pte_list_remove: %p many->many\n", spte);
- desc = (struct pte_list_desc *)(rmap_head->val & ~1ul);
+ desc = (struct pte_list_desc *)(rmap_head->val & ~1ull);
prev_desc = NULL;
while (desc) {
for (i = 0; i < PTE_LIST_EXT && desc->sptes[i]; ++i) {
@@ -1041,13 +959,13 @@ static void pte_list_remove(u64 *spte, struct kvm_rmap_head *rmap_head)
}
}
-static struct kvm_rmap_head *__gfn_to_rmap(gfn_t gfn, int level,
+static struct kvm_rmap_head *__gfn_to_rmap(gfn_t gfn,
struct kvm_memory_slot *slot)
{
- unsigned long idx;
+ size_t idx;
- idx = gfn_to_index(gfn, slot->base_gfn, level);
- return &slot->arch.rmap[level - PT_PAGE_TABLE_LEVEL][idx];
+ idx = gfn - slot->base_gfn;
+ return &slot->arch.rmap[idx];
}
static struct kvm_rmap_head *gfn_to_rmap(struct kvm *kvm, gfn_t gfn,
@@ -1058,7 +976,7 @@ static struct kvm_rmap_head *gfn_to_rmap(struct kvm *kvm, gfn_t gfn,
slots = kvm_memslots_for_spte_role(kvm, sp->role);
slot = __gfn_to_memslot(slots, gfn);
- return __gfn_to_rmap(gfn, sp->role.level, slot);
+ return __gfn_to_rmap(gfn, slot);
}
static bool rmap_can_add(struct kvm_vcpu *vcpu)
@@ -1123,7 +1041,7 @@ static u64 *rmap_get_first(struct kvm_rmap_head *rmap_head,
goto out;
}
- iter->desc = (struct pte_list_desc *)(rmap_head->val & ~1ul);
+ iter->desc = (struct pte_list_desc *)(rmap_head->val & ~1ull);
iter->pos = 0;
sptep = iter->desc->sptes[iter->pos];
out:
@@ -1296,13 +1214,13 @@ static bool __rmap_set_dirty(struct kvm *kvm, struct kvm_rmap_head *rmap_head)
*/
static void kvm_mmu_write_protect_pt_masked(struct kvm *kvm,
struct kvm_memory_slot *slot,
- gfn_t gfn_offset, unsigned long mask)
+ gfn_t gfn_offset, size_t mask)
{
struct kvm_rmap_head *rmap_head;
while (mask) {
rmap_head = __gfn_to_rmap(slot->base_gfn + gfn_offset + __ffs(mask),
- PT_PAGE_TABLE_LEVEL, slot);
+ slot);
__rmap_write_protect(kvm, rmap_head, false);
/* clear the first set bit */
@@ -1321,20 +1239,19 @@ static void kvm_mmu_write_protect_pt_masked(struct kvm *kvm,
*/
void kvm_mmu_clear_dirty_pt_masked(struct kvm *kvm,
struct kvm_memory_slot *slot,
- gfn_t gfn_offset, unsigned long mask)
+ gfn_t gfn_offset, size_t mask)
{
struct kvm_rmap_head *rmap_head;
while (mask) {
rmap_head = __gfn_to_rmap(slot->base_gfn + gfn_offset + __ffs(mask),
- PT_PAGE_TABLE_LEVEL, slot);
+ slot);
__rmap_clear_dirty(kvm, rmap_head);
/* clear the first set bit */
mask &= mask - 1;
}
}
-EXPORT_SYMBOL_GPL(kvm_mmu_clear_dirty_pt_masked);
/**
* kvm_arch_mmu_enable_log_dirty_pt_masked - enable dirty logging for selected
@@ -1348,7 +1265,7 @@ EXPORT_SYMBOL_GPL(kvm_mmu_clear_dirty_pt_masked);
*/
void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm,
struct kvm_memory_slot *slot,
- gfn_t gfn_offset, unsigned long mask)
+ gfn_t gfn_offset, size_t mask)
{
if (kvm_x86_ops->enable_log_dirty_pt_masked)
kvm_x86_ops->enable_log_dirty_pt_masked(kvm, slot, gfn_offset,
@@ -1361,13 +1278,10 @@ bool kvm_mmu_slot_gfn_write_protect(struct kvm *kvm,
struct kvm_memory_slot *slot, u64 gfn)
{
struct kvm_rmap_head *rmap_head;
- int i;
bool write_protected = false;
- for (i = PT_PAGE_TABLE_LEVEL; i <= PT_MAX_HUGEPAGE_LEVEL; ++i) {
- rmap_head = __gfn_to_rmap(gfn, i, slot);
- write_protected |= __rmap_write_protect(kvm, rmap_head, true);
- }
+ rmap_head = __gfn_to_rmap(gfn, slot);
+ write_protected |= __rmap_write_protect(kvm, rmap_head, true);
return write_protected;
}
@@ -1386,11 +1300,13 @@ static bool kvm_zap_rmapp(struct kvm *kvm, struct kvm_rmap_head *rmap_head)
struct rmap_iterator iter;
bool flush = false;
- while ((sptep = rmap_get_first(rmap_head, &iter))) {
+ sptep = rmap_get_first(rmap_head, &iter);
+ while (sptep) {
rmap_printk("%s: spte %p %llx.\n", __func__, sptep, *sptep);
drop_spte(kvm, sptep);
flush = true;
+ sptep = rmap_get_first(rmap_head, &iter);
}
return flush;
@@ -1398,14 +1314,14 @@ static bool kvm_zap_rmapp(struct kvm *kvm, struct kvm_rmap_head *rmap_head)
static int kvm_unmap_rmapp(struct kvm *kvm, struct kvm_rmap_head *rmap_head,
struct kvm_memory_slot *slot, gfn_t gfn, int level,
- unsigned long data)
+ size_t data)
{
return kvm_zap_rmapp(kvm, rmap_head);
}
static int kvm_set_pte_rmapp(struct kvm *kvm, struct kvm_rmap_head *rmap_head,
struct kvm_memory_slot *slot, gfn_t gfn, int level,
- unsigned long data)
+ size_t data)
{
u64 *sptep;
struct rmap_iterator iter;
@@ -1468,8 +1384,8 @@ rmap_walk_init_level(struct slot_rmap_walk_iterator *iterator, int level)
{
iterator->level = level;
iterator->gfn = iterator->start_gfn;
- iterator->rmap = __gfn_to_rmap(iterator->gfn, level, iterator->slot);
- iterator->end_rmap = __gfn_to_rmap(iterator->end_gfn, level,
+ iterator->rmap = __gfn_to_rmap(iterator->gfn, iterator->slot);
+ iterator->end_rmap = __gfn_to_rmap(iterator->end_gfn,
iterator->slot);
}
@@ -1495,7 +1411,7 @@ static bool slot_rmap_walk_okay(struct slot_rmap_walk_iterator *iterator)
static void slot_rmap_walk_next(struct slot_rmap_walk_iterator *iterator)
{
if (++iterator->rmap <= iterator->end_rmap) {
- iterator->gfn += (1UL << KVM_HPAGE_GFN_SHIFT(iterator->level));
+ iterator->gfn += 1ULL;
return;
}
@@ -1515,15 +1431,15 @@ static void slot_rmap_walk_next(struct slot_rmap_walk_iterator *iterator)
slot_rmap_walk_next(_iter_))
static int kvm_handle_hva_range(struct kvm *kvm,
- unsigned long start,
- unsigned long end,
- unsigned long data,
+ size_t start,
+ size_t end,
+ size_t data,
int (*handler)(struct kvm *kvm,
struct kvm_rmap_head *rmap_head,
struct kvm_memory_slot *slot,
gfn_t gfn,
int level,
- unsigned long data))
+ size_t data))
{
struct kvm_memslots *slots;
struct kvm_memory_slot *memslot;
@@ -1531,10 +1447,10 @@ static int kvm_handle_hva_range(struct kvm *kvm,
int ret = 0;
int i;
- for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
+ for (i = 0; i < GVM_ADDRESS_SPACE_NUM; i++) {
slots = __kvm_memslots(kvm, i);
kvm_for_each_memslot(memslot, slots) {
- unsigned long hva_start, hva_end;
+ size_t hva_start, hva_end;
gfn_t gfn_start, gfn_end;
hva_start = max(start, memslot->userspace_addr);
@@ -1550,7 +1466,7 @@ static int kvm_handle_hva_range(struct kvm *kvm,
gfn_end = hva_to_gfn_memslot(hva_end + PAGE_SIZE - 1, memslot);
for_each_slot_rmap_range(memslot, PT_PAGE_TABLE_LEVEL,
- PT_MAX_HUGEPAGE_LEVEL,
+ PT_PAGE_TABLE_LEVEL,
gfn_start, gfn_end - 1,
&iterator)
ret |= handler(kvm, iterator.rmap, memslot,
@@ -1561,38 +1477,38 @@ static int kvm_handle_hva_range(struct kvm *kvm,
return ret;
}
-static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
- unsigned long data,
+static int kvm_handle_hva(struct kvm *kvm, size_t hva,
+ size_t data,
int (*handler)(struct kvm *kvm,
struct kvm_rmap_head *rmap_head,
struct kvm_memory_slot *slot,
gfn_t gfn, int level,
- unsigned long data))
+ size_t data))
{
return kvm_handle_hva_range(kvm, hva, hva + 1, data, handler);
}
-int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
+int kvm_unmap_hva(struct kvm *kvm, size_t hva)
{
return kvm_handle_hva(kvm, hva, 0, kvm_unmap_rmapp);
}
-int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end)
+int kvm_unmap_hva_range(struct kvm *kvm, size_t start, size_t end)
{
return kvm_handle_hva_range(kvm, start, end, 0, kvm_unmap_rmapp);
}
-void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte)
+void kvm_set_spte_hva(struct kvm *kvm, size_t hva, pte_t pte)
{
- kvm_handle_hva(kvm, hva, (unsigned long)&pte, kvm_set_pte_rmapp);
+ kvm_handle_hva(kvm, hva, (size_t)&pte, kvm_set_pte_rmapp);
}
static int kvm_age_rmapp(struct kvm *kvm, struct kvm_rmap_head *rmap_head,
struct kvm_memory_slot *slot, gfn_t gfn, int level,
- unsigned long data)
+ size_t data)
{
u64 *sptep;
- struct rmap_iterator uninitialized_var(iter);
+ struct rmap_iterator iter;
int young = 0;
BUG_ON(!shadow_accessed_mask);
@@ -1601,17 +1517,16 @@ static int kvm_age_rmapp(struct kvm *kvm, struct kvm_rmap_head *rmap_head,
if (*sptep & shadow_accessed_mask) {
young = 1;
clear_bit((ffs(shadow_accessed_mask) - 1),
- (unsigned long *)sptep);
+ (size_t *)sptep);
}
}
- trace_kvm_age_page(gfn, level, slot, young);
return young;
}
static int kvm_test_age_rmapp(struct kvm *kvm, struct kvm_rmap_head *rmap_head,
struct kvm_memory_slot *slot, gfn_t gfn,
- int level, unsigned long data)
+ int level, size_t data)
{
u64 *sptep;
struct rmap_iterator iter;
@@ -1649,8 +1564,9 @@ static void rmap_recycle(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn)
kvm_unmap_rmapp(vcpu->kvm, rmap_head, NULL, gfn, sp->role.level, 0);
kvm_flush_remote_tlbs(vcpu->kvm);
}
-
-int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end)
+//todo-003
+#if 0
+int kvm_age_hva(struct kvm *kvm, size_t start, size_t end)
{
/*
* In case of absence of EPT Access and Dirty Bits supports,
@@ -1674,8 +1590,9 @@ int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end)
return kvm_handle_hva_range(kvm, start, end, 0, kvm_age_rmapp);
}
+#endif
-int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
+int kvm_test_age_hva(struct kvm *kvm, size_t hva)
{
return kvm_handle_hva(kvm, hva, 0, kvm_test_age_rmapp);
}
@@ -1705,7 +1622,7 @@ static int is_empty_shadow_page(u64 *spt)
static inline void kvm_mod_used_mmu_pages(struct kvm *kvm, int nr)
{
kvm->arch.n_used_mmu_pages += nr;
- percpu_counter_add(&kvm_total_used_mmu_pages, nr);
+ //percpu_counter_add(&kvm_total_used_mmu_pages, nr);
}
static void kvm_mmu_free_page(struct kvm_mmu_page *sp)
@@ -1713,15 +1630,15 @@ static void kvm_mmu_free_page(struct kvm_mmu_page *sp)
MMU_WARN_ON(!is_empty_shadow_page(sp->spt));
hlist_del(&sp->hash_link);
list_del(&sp->link);
- free_page((unsigned long)sp->spt);
+ free_page((size_t)sp->spt);
if (!sp->role.direct)
- free_page((unsigned long)sp->gfns);
- kmem_cache_free(mmu_page_header_cache, sp);
+ free_page((size_t)sp->gfns);
+ kfree_fast(sp);
}
static unsigned kvm_page_table_hashfn(gfn_t gfn)
{
- return gfn & ((1 << KVM_MMU_HASH_SHIFT) - 1);
+ return gfn & ((1 << GVM_MMU_HASH_SHIFT) - 1);
}
static void mmu_page_add_parent_pte(struct kvm_vcpu *vcpu,
@@ -1754,7 +1671,7 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, int direct
sp->spt = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache);
if (!direct)
sp->gfns = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache);
- set_page_private(virt_to_page(sp->spt), (unsigned long)sp);
+ set_page_private(virt_to_page(sp->spt), (size_t)sp);
/*
* The active_mmu_pages list is the FIFO list, do not move the
@@ -1808,13 +1725,13 @@ static void nonpaging_update_pte(struct kvm_vcpu *vcpu,
WARN_ON(1);
}
-#define KVM_PAGE_ARRAY_NR 16
+#define GVM_PAGE_ARRAY_NR 16
struct kvm_mmu_pages {
struct mmu_page_and_offset {
struct kvm_mmu_page *sp;
unsigned int idx;
- } page[KVM_PAGE_ARRAY_NR];
+ } page[GVM_PAGE_ARRAY_NR];
unsigned int nr;
};
@@ -1831,7 +1748,7 @@ static int mmu_pages_add(struct kvm_mmu_pages *pvec, struct kvm_mmu_page *sp,
pvec->page[pvec->nr].sp = sp;
pvec->page[pvec->nr].idx = idx;
pvec->nr++;
- return (pvec->nr == KVM_PAGE_ARRAY_NR);
+ return (pvec->nr == GVM_PAGE_ARRAY_NR);
}
static inline void clear_unsync_child_bit(struct kvm_mmu_page *sp, int idx)
@@ -1896,7 +1813,6 @@ static int mmu_unsync_walk(struct kvm_mmu_page *sp,
static void kvm_unlink_unsync_page(struct kvm *kvm, struct kvm_mmu_page *sp)
{
WARN_ON(!sp->unsync);
- trace_kvm_mmu_sync_page(sp);
sp->unsync = 0;
--kvm->stat.mmu_unsync;
}
@@ -1953,10 +1869,10 @@ static void kvm_mmu_flush_or_zap(struct kvm_vcpu *vcpu,
if (remote_flush)
kvm_flush_remote_tlbs(vcpu->kvm);
else if (local_flush)
- kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
+ kvm_make_request(GVM_REQ_TLB_FLUSH, vcpu);
}
-#ifdef CONFIG_KVM_MMU_AUDIT
+#ifdef CONFIG_GVM_MMU_AUDIT
#include "mmu_audit.c"
#else
static void kvm_mmu_audit(struct kvm_vcpu *vcpu, int point) { }
@@ -1982,6 +1898,7 @@ static bool kvm_sync_pages(struct kvm_vcpu *vcpu, gfn_t gfn,
struct kvm_mmu_page *s;
bool ret = false;
+#define LIST_ENTRY_TYPE_INFO struct kvm_mmu_page
for_each_gfn_indirect_valid_sp(vcpu->kvm, s, gfn) {
if (!s->unsync)
continue;
@@ -1989,6 +1906,7 @@ static bool kvm_sync_pages(struct kvm_vcpu *vcpu, gfn_t gfn,
WARN_ON(s->role.level != PT_PAGE_TABLE_LEVEL);
ret |= kvm_sync_page(vcpu, s, invalid_list);
}
+#undef LIST_ENTRY_TYPE_INFO
return ret;
}
@@ -1998,9 +1916,16 @@ struct mmu_page_path {
unsigned int idx[PT64_ROOT_LEVEL];
};
+static int __for_each_sp_end(struct kvm_mmu_page **sp, struct kvm_mmu_pages *pvec, int nr)
+{
+ *sp = pvec->page[nr].sp;
+
+ return 1;
+}
+
#define for_each_sp(pvec, sp, parents, i) \
for (i = mmu_pages_first(&pvec, &parents); \
- i < pvec.nr && ({ sp = pvec.page[i].sp; 1;}); \
+ i < pvec.nr && __for_each_sp_end(&sp, &pvec, i); \
i = mmu_pages_next(&pvec, &parents, i))
static int mmu_pages_next(struct kvm_mmu_pages *pvec,
@@ -2090,9 +2015,10 @@ static void mmu_sync_children(struct kvm_vcpu *vcpu,
flush |= kvm_sync_page(vcpu, sp, &invalid_list);
mmu_pages_clear_parents(&parents);
}
- if (need_resched() || spin_needbreak(&vcpu->kvm->mmu_lock)) {
+ //if (need_resched() || spin_needbreak(&vcpu->kvm->mmu_lock))
+ {
kvm_mmu_flush_or_zap(vcpu, &invalid_list, false, flush);
- cond_resched_lock(&vcpu->kvm->mmu_lock);
+ //cond_resched_lock(&vcpu->kvm->mmu_lock);
flush = false;
}
}
@@ -2138,6 +2064,8 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
quadrant &= (1 << ((PT32_PT_BITS - PT64_PT_BITS) * level)) - 1;
role.quadrant = quadrant;
}
+
+#define LIST_ENTRY_TYPE_INFO struct kvm_mmu_page
for_each_gfn_valid_sp(vcpu->kvm, sp, gfn) {
if (!need_sync && sp->unsync)
need_sync = true;
@@ -2153,16 +2081,16 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
break;
WARN_ON(!list_empty(&invalid_list));
- kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
+ kvm_make_request(GVM_REQ_TLB_FLUSH, vcpu);
}
if (sp->unsync_children)
- kvm_make_request(KVM_REQ_MMU_SYNC, vcpu);
+ kvm_make_request(GVM_REQ_MMU_SYNC, vcpu);
__clear_sp_write_flooding_count(sp);
- trace_kvm_mmu_get_page(sp, false);
return sp;
}
+#undef LIST_ENTRY_TYPE_INFO
++vcpu->kvm->stat.mmu_cache_miss;
@@ -2188,7 +2116,6 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
}
sp->mmu_valid_gen = vcpu->kvm->arch.mmu_valid_gen;
clear_page(sp->spt);
- trace_kvm_mmu_get_page(sp, true);
kvm_mmu_flush_or_zap(vcpu, &invalid_list, false, flush);
return sp;
@@ -2240,7 +2167,7 @@ static void __shadow_walk_next(struct kvm_shadow_walk_iterator *iterator,
static void shadow_walk_next(struct kvm_shadow_walk_iterator *iterator)
{
- return __shadow_walk_next(iterator, *iterator->sptep);
+ __shadow_walk_next(iterator, *iterator->sptep);
}
static void link_shadow_page(struct kvm_vcpu *vcpu, u64 *sptep,
@@ -2248,8 +2175,6 @@ static void link_shadow_page(struct kvm_vcpu *vcpu, u64 *sptep,
{
u64 spte;
- BUILD_BUG_ON(VMX_EPT_WRITABLE_MASK != PT_WRITABLE_MASK);
-
spte = __pa(sp->spt) | shadow_present_mask | PT_WRITABLE_MASK |
shadow_user_mask | shadow_x_mask | shadow_accessed_mask;
@@ -2322,8 +2247,11 @@ static void kvm_mmu_unlink_parents(struct kvm *kvm, struct kvm_mmu_page *sp)
u64 *sptep;
struct rmap_iterator iter;
- while ((sptep = rmap_get_first(&sp->parent_ptes, &iter)))
+ sptep = rmap_get_first(&sp->parent_ptes, &iter);
+ while (sptep) {
drop_parent_pte(sp, sptep);
+ sptep = rmap_get_first(&sp->parent_ptes, &iter);
+ }
}
static int mmu_zap_unsync_children(struct kvm *kvm,
@@ -2355,7 +2283,6 @@ static int kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp,
{
int ret;
- trace_kvm_mmu_prepare_zap_page(sp);
++kvm->stat.mmu_shadow_zapped;
ret = mmu_zap_unsync_children(kvm, sp, invalid_list);
kvm_mmu_page_unlink_children(kvm, sp);
@@ -2405,10 +2332,12 @@ static void kvm_mmu_commit_zap_page(struct kvm *kvm,
*/
kvm_flush_remote_tlbs(kvm);
+#define LIST_ENTRY_TYPE_INFO struct kvm_mmu_page
list_for_each_entry_safe(sp, nsp, invalid_list, link) {
WARN_ON(!sp->role.invalid || sp->root_count);
kvm_mmu_free_page(sp);
}
+#undef LIST_ENTRY_TYPE_INFO
}
static bool prepare_zap_oldest_mmu_page(struct kvm *kvm,
@@ -2460,22 +2389,22 @@ int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn)
pgprintk("%s: looking for gfn %llx\n", __func__, gfn);
r = 0;
spin_lock(&kvm->mmu_lock);
+#define LIST_ENTRY_TYPE_INFO struct kvm_mmu_page
for_each_gfn_indirect_valid_sp(kvm, sp, gfn) {
pgprintk("%s: gfn %llx role %x\n", __func__, gfn,
sp->role.word);
r = 1;
kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list);
}
+#undef LIST_ENTRY_TYPE_INFO
kvm_mmu_commit_zap_page(kvm, &invalid_list);
spin_unlock(&kvm->mmu_lock);
return r;
}
-EXPORT_SYMBOL_GPL(kvm_mmu_unprotect_page);
static void kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
{
- trace_kvm_mmu_unsync_page(sp);
++vcpu->kvm->stat.mmu_unsync;
sp->unsync = 1;
@@ -2487,9 +2416,12 @@ static bool mmu_need_write_protect(struct kvm_vcpu *vcpu, gfn_t gfn,
{
struct kvm_mmu_page *sp;
- if (kvm_page_track_is_active(vcpu, gfn, KVM_PAGE_TRACK_WRITE))
+#if 0
+ if (kvm_page_track_is_active(vcpu, gfn, GVM_PAGE_TRACK_WRITE))
return true;
+#endif
+#define LIST_ENTRY_TYPE_INFO struct kvm_mmu_page
for_each_gfn_indirect_valid_sp(vcpu->kvm, sp, gfn) {
if (!can_unsync)
return true;
@@ -2500,16 +2432,15 @@ static bool mmu_need_write_protect(struct kvm_vcpu *vcpu, gfn_t gfn,
WARN_ON(sp->role.level != PT_PAGE_TABLE_LEVEL);
kvm_unsync_page(vcpu, sp);
}
+#undef LIST_ENTRY_TYPE_INFO
return false;
}
static bool kvm_is_mmio_pfn(kvm_pfn_t pfn)
{
- if (pfn_valid(pfn))
- return !is_zero_pfn(pfn) && PageReserved(pfn_to_page(pfn));
-
- return true;
+ /* Without IOMMU, we won't assign real MMIO resource */
+ return false;
}
static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
@@ -2635,7 +2566,7 @@ static bool mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, unsigned pte_access,
true, host_writable)) {
if (write_fault)
emulate = true;
- kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
+ kvm_make_request(GVM_REQ_TLB_FLUSH, vcpu);
}
if (unlikely(is_mmio_spte(*sptep)))
@@ -2657,8 +2588,6 @@ static bool mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, unsigned pte_access,
}
}
- kvm_release_pfn_clean(pfn);
-
return emulate;
}
@@ -2669,7 +2598,7 @@ static kvm_pfn_t pte_prefetch_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn,
slot = gfn_to_memslot_dirty_bitmap(vcpu, gfn, no_dirty_log);
if (!slot)
- return KVM_PFN_ERR_FAULT;
+ return GVM_PFN_ERR_FAULT;
return gfn_to_pfn_memslot_atomic(slot, gfn);
}
@@ -2678,7 +2607,7 @@ static int direct_pte_prefetch_many(struct kvm_vcpu *vcpu,
struct kvm_mmu_page *sp,
u64 *start, u64 *end)
{
- struct page *pages[PTE_PREFETCH_NUM];
+ pfn_t pfn[PTE_PREFETCH_NUM];
struct kvm_memory_slot *slot;
unsigned access = sp->role.access;
int i, ret;
@@ -2689,13 +2618,13 @@ static int direct_pte_prefetch_many(struct kvm_vcpu *vcpu,
if (!slot)
return -1;
- ret = gfn_to_page_many_atomic(slot, gfn, pages, end - start);
+ ret = gfn_to_pfn_many_atomic(slot, gfn, pfn, end - start);
if (ret <= 0)
return -1;
for (i = 0; i < ret; i++, gfn++, start++)
mmu_set_spte(vcpu, start, access, 0, sp->role.level, gfn,
- page_to_pfn(pages[i]), true, true);
+ pfn[i], true, true);
return 0;
}
@@ -2744,7 +2673,7 @@ static void direct_pte_prefetch(struct kvm_vcpu *vcpu, u64 *sptep)
}
static int __direct_map(struct kvm_vcpu *vcpu, int write, int map_writable,
- int level, gfn_t gfn, kvm_pfn_t pfn, bool prefault)
+ int level, gfn_t gfn, kvm_pfn_t pfn)
{
struct kvm_shadow_walk_iterator iterator;
struct kvm_mmu_page *sp;
@@ -2757,7 +2686,7 @@ static int __direct_map(struct kvm_vcpu *vcpu, int write, int map_writable,
for_each_shadow_entry(vcpu, (u64)gfn << PAGE_SHIFT, iterator) {
if (iterator.level == level) {
emulate = mmu_set_spte(vcpu, iterator.sptep, ACC_ALL,
- write, level, gfn, pfn, prefault,
+ write, level, gfn, pfn, false,
map_writable);
direct_pte_prefetch(vcpu, iterator.sptep);
++vcpu->stat.pf_fixed;
@@ -2779,19 +2708,6 @@ static int __direct_map(struct kvm_vcpu *vcpu, int write, int map_writable,
return emulate;
}
-static void kvm_send_hwpoison_signal(unsigned long address, struct task_struct *tsk)
-{
- siginfo_t info;
-
- info.si_signo = SIGBUS;
- info.si_errno = 0;
- info.si_code = BUS_MCEERR_AR;
- info.si_addr = (void __user *)address;
- info.si_addr_lsb = PAGE_SHIFT;
-
- send_sig_info(SIGBUS, &info, tsk);
-}
-
static int kvm_handle_bad_page(struct kvm_vcpu *vcpu, gfn_t gfn, kvm_pfn_t pfn)
{
/*
@@ -2800,59 +2716,12 @@ static int kvm_handle_bad_page(struct kvm_vcpu *vcpu, gfn_t gfn, kvm_pfn_t pfn)
* caused mmio page fault and treat it as mmio access.
* Return 1 to tell kvm to emulate it.
*/
- if (pfn == KVM_PFN_ERR_RO_FAULT)
+ if (pfn == GVM_PFN_ERR_RO_FAULT)
return 1;
- if (pfn == KVM_PFN_ERR_HWPOISON) {
- kvm_send_hwpoison_signal(kvm_vcpu_gfn_to_hva(vcpu, gfn), current);
- return 0;
- }
-
return -EFAULT;
}
-static void transparent_hugepage_adjust(struct kvm_vcpu *vcpu,
- gfn_t *gfnp, kvm_pfn_t *pfnp,
- int *levelp)
-{
- kvm_pfn_t pfn = *pfnp;
- gfn_t gfn = *gfnp;
- int level = *levelp;
-
- /*
- * Check if it's a transparent hugepage. If this would be an
- * hugetlbfs page, level wouldn't be set to
- * PT_PAGE_TABLE_LEVEL and there would be no adjustment done
- * here.
- */
- if (!is_error_noslot_pfn(pfn) && !kvm_is_reserved_pfn(pfn) &&
- level == PT_PAGE_TABLE_LEVEL &&
- PageTransCompoundMap(pfn_to_page(pfn)) &&
- !mmu_gfn_lpage_is_disallowed(vcpu, gfn, PT_DIRECTORY_LEVEL)) {
- unsigned long mask;
- /*
- * mmu_notifier_retry was successful and we hold the
- * mmu_lock here, so the pmd can't become splitting
- * from under us, and in turn
- * __split_huge_page_refcount() can't run from under
- * us and we can safely transfer the refcount from
- * PG_tail to PG_head as we switch the pfn to tail to
- * head.
- */
- *levelp = level = PT_DIRECTORY_LEVEL;
- mask = KVM_PAGES_PER_HPAGE(level) - 1;
- VM_BUG_ON((gfn & mask) != (pfn & mask));
- if (pfn & mask) {
- gfn &= ~mask;
- *gfnp = gfn;
- kvm_release_pfn_clean(pfn);
- pfn &= ~mask;
- kvm_get_pfn(pfn);
- *pfnp = pfn;
- }
- }
-}
-
static bool handle_abnormal_pfn(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn,
kvm_pfn_t pfn, unsigned access, int *ret_val)
{
@@ -2941,9 +2810,11 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level,
return false;
walk_shadow_page_lockless_begin(vcpu);
- for_each_shadow_entry_lockless(vcpu, gva, iterator, spte)
+ for_each_shadow_entry_lockless(vcpu, gva, iterator, spte) {
+ spte = mmu_spte_get_lockless(iterator.sptep);
if (!is_shadow_present_pte(spte) || iterator.level < level)
break;
+ }
/*
* If the mapping has been changed, let the vcpu fault on the
@@ -2996,67 +2867,42 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level,
*/
ret = fast_pf_fix_direct_spte(vcpu, sp, iterator.sptep, spte);
exit:
- trace_fast_page_fault(vcpu, gva, error_code, iterator.sptep,
- spte, ret);
walk_shadow_page_lockless_end(vcpu);
return ret;
}
-static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn,
+static void get_pfn(struct kvm_vcpu *vcpu, gfn_t gfn,
gva_t gva, kvm_pfn_t *pfn, bool write, bool *writable);
static void make_mmu_pages_available(struct kvm_vcpu *vcpu);
static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code,
- gfn_t gfn, bool prefault)
+ gfn_t gfn)
{
int r;
int level;
bool force_pt_level = false;
kvm_pfn_t pfn;
- unsigned long mmu_seq;
bool map_writable, write = error_code & PFERR_WRITE_MASK;
level = mapping_level(vcpu, gfn, &force_pt_level);
- if (likely(!force_pt_level)) {
- /*
- * This path builds a PAE pagetable - so we can map
- * 2mb pages at maximum. Therefore check if the level
- * is larger than that.
- */
- if (level > PT_DIRECTORY_LEVEL)
- level = PT_DIRECTORY_LEVEL;
-
- gfn &= ~(KVM_PAGES_PER_HPAGE(level) - 1);
- }
if (fast_page_fault(vcpu, v, level, error_code))
return 0;
- mmu_seq = vcpu->kvm->mmu_notifier_seq;
smp_rmb();
- if (try_async_pf(vcpu, prefault, gfn, v, &pfn, write, &map_writable))
- return 0;
+ get_pfn(vcpu, gfn, v, &pfn, write, &map_writable);
if (handle_abnormal_pfn(vcpu, v, gfn, pfn, ACC_ALL, &r))
return r;
spin_lock(&vcpu->kvm->mmu_lock);
- if (mmu_notifier_retry(vcpu->kvm, mmu_seq))
- goto out_unlock;
make_mmu_pages_available(vcpu);
- if (likely(!force_pt_level))
- transparent_hugepage_adjust(vcpu, &gfn, &pfn, &level);
- r = __direct_map(vcpu, write, map_writable, level, gfn, pfn, prefault);
+ r = __direct_map(vcpu, write, map_writable, level, gfn, pfn);
spin_unlock(&vcpu->kvm->mmu_lock);
return r;
-
-out_unlock:
- spin_unlock(&vcpu->kvm->mmu_lock);
- kvm_release_pfn_clean(pfn);
- return 0;
}
@@ -3110,7 +2956,7 @@ static int mmu_check_root(struct kvm_vcpu *vcpu, gfn_t root_gfn)
int ret = 0;
if (!kvm_is_visible_gfn(vcpu->kvm, root_gfn)) {
- kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
+ kvm_make_request(GVM_REQ_TRIPLE_FAULT, vcpu);
ret = 1;
}
@@ -3291,7 +3137,6 @@ void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu)
mmu_sync_roots(vcpu);
spin_unlock(&vcpu->kvm->mmu_lock);
}
-EXPORT_SYMBOL_GPL(kvm_mmu_sync_roots);
static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr,
u32 access, struct x86_exception *exception)
@@ -3405,7 +3250,6 @@ int handle_mmio_page_fault(struct kvm_vcpu *vcpu, u64 addr, bool direct)
if (direct)
addr = 0;
- trace_handle_mmio_page_fault(addr, gfn, access);
vcpu_cache_mmio_info(vcpu, addr, gfn, access);
return RET_MMIO_PF_EMULATE;
}
@@ -3416,7 +3260,6 @@ int handle_mmio_page_fault(struct kvm_vcpu *vcpu, u64 addr, bool direct)
*/
return RET_MMIO_PF_RETRY;
}
-EXPORT_SYMBOL_GPL(handle_mmio_page_fault);
static bool page_fault_handle_page_track(struct kvm_vcpu *vcpu,
u32 error_code, gfn_t gfn)
@@ -3428,12 +3271,14 @@ static bool page_fault_handle_page_track(struct kvm_vcpu *vcpu,
!(error_code & PFERR_WRITE_MASK))
return false;
+#if 0
/*
* guest is writing the page which is write tracked which can
* not be fixed by page fault handler.
*/
- if (kvm_page_track_is_active(vcpu, gfn, KVM_PAGE_TRACK_WRITE))
+ if (kvm_page_track_is_active(vcpu, gfn, GVM_PAGE_TRACK_WRITE))
return true;
+#endif
return false;
}
@@ -3448,6 +3293,7 @@ static void shadow_page_table_clear_flood(struct kvm_vcpu *vcpu, gva_t addr)
walk_shadow_page_lockless_begin(vcpu);
for_each_shadow_entry_lockless(vcpu, addr, iterator, spte) {
+ spte = mmu_spte_get_lockless(iterator.sptep);
clear_sp_write_flooding_count(iterator.sptep);
if (!is_shadow_present_pte(spte))
break;
@@ -3456,7 +3302,7 @@ static void shadow_page_table_clear_flood(struct kvm_vcpu *vcpu, gva_t addr)
}
static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva,
- u32 error_code, bool prefault)
+ u32 error_code)
{
gfn_t gfn = gva >> PAGE_SHIFT;
int r;
@@ -3473,76 +3319,24 @@ static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva,
MMU_WARN_ON(!VALID_PAGE(vcpu->arch.mmu.root_hpa));
- return nonpaging_map(vcpu, gva & PAGE_MASK,
- error_code, gfn, prefault);
+ return nonpaging_map(vcpu, gva & PAGE_MASK, error_code, gfn);
}
-static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn)
-{
- struct kvm_arch_async_pf arch;
-
- arch.token = (vcpu->arch.apf.id++ << 12) | vcpu->vcpu_id;
- arch.gfn = gfn;
- arch.direct_map = vcpu->arch.mmu.direct_map;
- arch.cr3 = vcpu->arch.mmu.get_cr3(vcpu);
-
- return kvm_setup_async_pf(vcpu, gva, kvm_vcpu_gfn_to_hva(vcpu, gfn), &arch);
-}
-
-static bool can_do_async_pf(struct kvm_vcpu *vcpu)
-{
- if (unlikely(!lapic_in_kernel(vcpu) ||
- kvm_event_needs_reinjection(vcpu)))
- return false;
-
- return kvm_x86_ops->interrupt_allowed(vcpu);
-}
-
-static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn,
+static void get_pfn(struct kvm_vcpu *vcpu, gfn_t gfn,
gva_t gva, kvm_pfn_t *pfn, bool write, bool *writable)
{
struct kvm_memory_slot *slot;
- bool async;
slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
- async = false;
- *pfn = __gfn_to_pfn_memslot(slot, gfn, false, &async, write, writable);
- if (!async)
- return false; /* *pfn has correct page already */
-
- if (!prefault && can_do_async_pf(vcpu)) {
- trace_kvm_try_async_get_page(gva, gfn);
- if (kvm_find_async_pf_gfn(vcpu, gfn)) {
- trace_kvm_async_pf_doublefault(gva, gfn);
- kvm_make_request(KVM_REQ_APF_HALT, vcpu);
- return true;
- } else if (kvm_arch_setup_async_pf(vcpu, gva, gfn))
- return true;
- }
-
*pfn = __gfn_to_pfn_memslot(slot, gfn, false, NULL, write, writable);
- return false;
-}
-
-static bool
-check_hugepage_cache_consistency(struct kvm_vcpu *vcpu, gfn_t gfn, int level)
-{
- int page_num = KVM_PAGES_PER_HPAGE(level);
-
- gfn &= ~(page_num - 1);
-
- return kvm_mtrr_check_gfn_range_consistency(vcpu, gfn, page_num);
}
-static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code,
- bool prefault)
+static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code)
{
kvm_pfn_t pfn;
int r;
int level;
- bool force_pt_level;
gfn_t gfn = gpa >> PAGE_SHIFT;
- unsigned long mmu_seq;
int write = error_code & PFERR_WRITE_MASK;
bool map_writable;
@@ -3555,43 +3349,24 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code,
if (r)
return r;
- force_pt_level = !check_hugepage_cache_consistency(vcpu, gfn,
- PT_DIRECTORY_LEVEL);
- level = mapping_level(vcpu, gfn, &force_pt_level);
- if (likely(!force_pt_level)) {
- if (level > PT_DIRECTORY_LEVEL &&
- !check_hugepage_cache_consistency(vcpu, gfn, level))
- level = PT_DIRECTORY_LEVEL;
- gfn &= ~(KVM_PAGES_PER_HPAGE(level) - 1);
- }
+ level = mapping_level(vcpu, gfn, NULL);
if (fast_page_fault(vcpu, gpa, level, error_code))
return 0;
- mmu_seq = vcpu->kvm->mmu_notifier_seq;
smp_rmb();
- if (try_async_pf(vcpu, prefault, gfn, gpa, &pfn, write, &map_writable))
- return 0;
+ get_pfn(vcpu, gfn, gpa, &pfn, write, &map_writable);
if (handle_abnormal_pfn(vcpu, 0, gfn, pfn, ACC_ALL, &r))
return r;
spin_lock(&vcpu->kvm->mmu_lock);
- if (mmu_notifier_retry(vcpu->kvm, mmu_seq))
- goto out_unlock;
make_mmu_pages_available(vcpu);
- if (likely(!force_pt_level))
- transparent_hugepage_adjust(vcpu, &gfn, &pfn, &level);
- r = __direct_map(vcpu, write, map_writable, level, gfn, pfn, prefault);
+ r = __direct_map(vcpu, write, map_writable, level, gfn, pfn);
spin_unlock(&vcpu->kvm->mmu_lock);
return r;
-
-out_unlock:
- spin_unlock(&vcpu->kvm->mmu_lock);
- kvm_release_pfn_clean(pfn);
- return 0;
}
static void nonpaging_init_context(struct kvm_vcpu *vcpu,
@@ -3614,7 +3389,7 @@ void kvm_mmu_new_cr3(struct kvm_vcpu *vcpu)
mmu_free_roots(vcpu);
}
-static unsigned long get_cr3(struct kvm_vcpu *vcpu)
+static size_t get_cr3(struct kvm_vcpu *vcpu)
{
return kvm_read_cr3(vcpu);
}
@@ -3662,10 +3437,12 @@ static inline bool is_last_gpte(struct kvm_mmu *mmu,
return gpte & PT_PAGE_SIZE_MASK;
}
+#if 0
#define PTTYPE_EPT 18 /* arbitrary */
#define PTTYPE PTTYPE_EPT
#include "paging_tmpl.h"
#undef PTTYPE
+#endif
#define PTTYPE 64
#include "paging_tmpl.h"
@@ -3820,7 +3597,7 @@ reset_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, struct kvm_mmu *context)
/*
* Passing "true" to the last argument is okay; it adds a check
- * on bit 8 of the SPTEs which KVM doesn't use anyway.
+ * on bit 8 of the SPTEs which kvm doesn't use anyway.
*/
__reset_rsvds_bits_mask(vcpu, &context->shadow_zero_check,
boot_cpu_data.x86_phys_bits,
@@ -3828,7 +3605,6 @@ reset_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, struct kvm_mmu *context)
guest_cpuid_has_gbpages(vcpu), is_pse(vcpu),
true);
}
-EXPORT_SYMBOL_GPL(reset_shadow_zero_bits_mask);
static inline bool boot_cpu_is_amd(void)
{
@@ -3932,81 +3708,6 @@ static void update_permission_bitmask(struct kvm_vcpu *vcpu,
}
}
-/*
-* PKU is an additional mechanism by which the paging controls access to
-* user-mode addresses based on the value in the PKRU register. Protection
-* key violations are reported through a bit in the page fault error code.
-* Unlike other bits of the error code, the PK bit is not known at the
-* call site of e.g. gva_to_gpa; it must be computed directly in
-* permission_fault based on two bits of PKRU, on some machine state (CR4,
-* CR0, EFER, CPL), and on other bits of the error code and the page tables.
-*
-* In particular the following conditions come from the error code, the
-* page tables and the machine state:
-* - PK is always zero unless CR4.PKE=1 and EFER.LMA=1
-* - PK is always zero if RSVD=1 (reserved bit set) or F=1 (instruction fetch)
-* - PK is always zero if U=0 in the page tables
-* - PKRU.WD is ignored if CR0.WP=0 and the access is a supervisor access.
-*
-* The PKRU bitmask caches the result of these four conditions. The error
-* code (minus the P bit) and the page table's U bit form an index into the
-* PKRU bitmask. Two bits of the PKRU bitmask are then extracted and ANDed
-* with the two bits of the PKRU register corresponding to the protection key.
-* For the first three conditions above the bits will be 00, thus masking
-* away both AD and WD. For all reads or if the last condition holds, WD
-* only will be masked away.
-*/
-static void update_pkru_bitmask(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
- bool ept)
-{
- unsigned bit;
- bool wp;
-
- if (ept) {
- mmu->pkru_mask = 0;
- return;
- }
-
- /* PKEY is enabled only if CR4.PKE and EFER.LMA are both set. */
- if (!kvm_read_cr4_bits(vcpu, X86_CR4_PKE) || !is_long_mode(vcpu)) {
- mmu->pkru_mask = 0;
- return;
- }
-
- wp = is_write_protection(vcpu);
-
- for (bit = 0; bit < ARRAY_SIZE(mmu->permissions); ++bit) {
- unsigned pfec, pkey_bits;
- bool check_pkey, check_write, ff, uf, wf, pte_user;
-
- pfec = bit << 1;
- ff = pfec & PFERR_FETCH_MASK;
- uf = pfec & PFERR_USER_MASK;
- wf = pfec & PFERR_WRITE_MASK;
-
- /* PFEC.RSVD is replaced by ACC_USER_MASK. */
- pte_user = pfec & PFERR_RSVD_MASK;
-
- /*
- * Only need to check the access which is not an
- * instruction fetch and is to a user page.
- */
- check_pkey = (!ff && pte_user);
- /*
- * write access is controlled by PKRU if it is a
- * user access or CR0.WP = 1.
- */
- check_write = check_pkey && wf && (uf || wp);
-
- /* PKRU.AD stops both read and write access. */
- pkey_bits = !!check_pkey;
- /* PKRU.WD stops write access. */
- pkey_bits |= (!!check_write) << 1;
-
- mmu->pkru_mask |= (pkey_bits & 3) << pfec;
- }
-}
-
static void update_last_nonleaf_level(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu)
{
unsigned root_level = mmu->root_level;
@@ -4025,7 +3726,6 @@ static void paging64_init_context_common(struct kvm_vcpu *vcpu,
reset_rsvds_bits_mask(vcpu, context);
update_permission_bitmask(vcpu, context, false);
- update_pkru_bitmask(vcpu, context, false);
update_last_nonleaf_level(vcpu, context);
MMU_WARN_ON(!is_pae(vcpu));
@@ -4053,7 +3753,6 @@ static void paging32_init_context(struct kvm_vcpu *vcpu,
reset_rsvds_bits_mask(vcpu, context);
update_permission_bitmask(vcpu, context, false);
- update_pkru_bitmask(vcpu, context, false);
update_last_nonleaf_level(vcpu, context);
context->page_fault = paging32_page_fault;
@@ -4112,7 +3811,6 @@ static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
}
update_permission_bitmask(vcpu, context, false);
- update_pkru_bitmask(vcpu, context, false);
update_last_nonleaf_level(vcpu, context);
reset_tdp_shadow_zero_bits_mask(vcpu, context);
}
@@ -4144,10 +3842,10 @@ void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu)
context->base_role.smm = is_smm(vcpu);
reset_shadow_zero_bits_mask(vcpu, context);
}
-EXPORT_SYMBOL_GPL(kvm_init_shadow_mmu);
void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly)
{
+#if 0
struct kvm_mmu *context = &vcpu->arch.mmu;
MMU_WARN_ON(VALID_PAGE(context->root_hpa));
@@ -4165,11 +3863,10 @@ void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly)
context->direct_map = false;
update_permission_bitmask(vcpu, context, true);
- update_pkru_bitmask(vcpu, context, true);
reset_rsvds_bits_mask_ept(vcpu, context, execonly);
reset_ept_shadow_zero_bits_mask(vcpu, context, execonly);
+#endif
}
-EXPORT_SYMBOL_GPL(kvm_init_shadow_ept_mmu);
static void init_kvm_softmmu(struct kvm_vcpu *vcpu)
{
@@ -4220,7 +3917,6 @@ static void init_kvm_nested_mmu(struct kvm_vcpu *vcpu)
}
update_permission_bitmask(vcpu, g_context, false);
- update_pkru_bitmask(vcpu, g_context, false);
update_last_nonleaf_level(vcpu, g_context);
}
@@ -4239,7 +3935,6 @@ void kvm_mmu_reset_context(struct kvm_vcpu *vcpu)
kvm_mmu_unload(vcpu);
init_kvm_mmu(vcpu);
}
-EXPORT_SYMBOL_GPL(kvm_mmu_reset_context);
int kvm_mmu_load(struct kvm_vcpu *vcpu)
{
@@ -4257,14 +3952,12 @@ int kvm_mmu_load(struct kvm_vcpu *vcpu)
out:
return r;
}
-EXPORT_SYMBOL_GPL(kvm_mmu_load);
void kvm_mmu_unload(struct kvm_vcpu *vcpu)
{
mmu_free_roots(vcpu);
WARN_ON(VALID_PAGE(vcpu->arch.mmu.root_hpa));
}
-EXPORT_SYMBOL_GPL(kvm_mmu_unload);
static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu,
struct kvm_mmu_page *sp, u64 *spte,
@@ -4413,7 +4106,7 @@ static void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
u64 entry, gentry, *spte;
int npte;
bool remote_flush, local_flush;
- union kvm_mmu_page_role mask = { };
+ union kvm_mmu_page_role mask = { 0 };
mask.cr0_wp = 1;
mask.cr4_pae = 1;
@@ -4426,7 +4119,9 @@ static void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
* If we don't have indirect shadow pages, it means no page is
* write-protected, so we can exit simply.
*/
- if (!ACCESS_ONCE(vcpu->kvm->arch.indirect_shadow_pages))
+ unsigned int temp;
+ ACCESS_ONCE(vcpu->kvm->arch.indirect_shadow_pages, temp);
+ if (!temp)
return;
remote_flush = local_flush = false;
@@ -4446,6 +4141,7 @@ static void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
++vcpu->kvm->stat.mmu_pte_write;
kvm_mmu_audit(vcpu, AUDIT_PRE_PTE_WRITE);
+#define LIST_ENTRY_TYPE_INFO struct kvm_mmu_page
for_each_gfn_indirect_valid_sp(vcpu->kvm, sp, gfn) {
if (detect_write_misaligned(sp, gpa, bytes) ||
detect_write_flooding(sp)) {
@@ -4471,6 +4167,7 @@ static void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
++spte;
}
}
+#undef LIST_ENTRY_TYPE_INFO
kvm_mmu_flush_or_zap(vcpu, &invalid_list, remote_flush, local_flush);
kvm_mmu_audit(vcpu, AUDIT_POST_PTE_WRITE);
spin_unlock(&vcpu->kvm->mmu_lock);
@@ -4490,16 +4187,15 @@ int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva)
return r;
}
-EXPORT_SYMBOL_GPL(kvm_mmu_unprotect_page_virt);
static void make_mmu_pages_available(struct kvm_vcpu *vcpu)
{
LIST_HEAD(invalid_list);
- if (likely(kvm_mmu_available_pages(vcpu->kvm) >= KVM_MIN_FREE_MMU_PAGES))
+ if (likely(kvm_mmu_available_pages(vcpu->kvm) >= GVM_MIN_FREE_MMU_PAGES))
return;
- while (kvm_mmu_available_pages(vcpu->kvm) < KVM_REFILL_PAGES) {
+ while (kvm_mmu_available_pages(vcpu->kvm) < GVM_REFILL_PAGES) {
if (!prepare_zap_oldest_mmu_page(vcpu->kvm, &invalid_list))
break;
@@ -4527,7 +4223,7 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code,
return r;
}
- r = vcpu->arch.mmu.page_fault(vcpu, cr2, error_code, false);
+ r = vcpu->arch.mmu.page_fault(vcpu, cr2, error_code);
if (r < 0)
return r;
if (!r)
@@ -4550,38 +4246,33 @@ emulate:
BUG();
}
}
-EXPORT_SYMBOL_GPL(kvm_mmu_page_fault);
void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva)
{
vcpu->arch.mmu.invlpg(vcpu, gva);
- kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
+ kvm_make_request(GVM_REQ_TLB_FLUSH, vcpu);
++vcpu->stat.invlpg;
}
-EXPORT_SYMBOL_GPL(kvm_mmu_invlpg);
void kvm_enable_tdp(void)
{
tdp_enabled = true;
}
-EXPORT_SYMBOL_GPL(kvm_enable_tdp);
void kvm_disable_tdp(void)
{
tdp_enabled = false;
}
-EXPORT_SYMBOL_GPL(kvm_disable_tdp);
static void free_mmu_pages(struct kvm_vcpu *vcpu)
{
- free_page((unsigned long)vcpu->arch.mmu.pae_root);
+ MmFreeContiguousMemory(vcpu->arch.mmu.pae_root);
if (vcpu->arch.mmu.lm_root != NULL)
- free_page((unsigned long)vcpu->arch.mmu.lm_root);
+ free_page((size_t)vcpu->arch.mmu.lm_root);
}
static int alloc_mmu_pages(struct kvm_vcpu *vcpu)
{
- struct page *page;
int i;
/*
@@ -4589,11 +4280,14 @@ static int alloc_mmu_pages(struct kvm_vcpu *vcpu)
* Therefore we need to allocate shadow page tables in the first
* 4GB of memory, which happens to fit the DMA32 zone.
*/
- page = alloc_page(GFP_KERNEL | __GFP_DMA32);
- if (!page)
+ PHYSICAL_ADDRESS addr_4g;
+ addr_4g.QuadPart = 0xFFFFFFFF;
+
+ vcpu->arch.mmu.pae_root =
+ MmAllocateContiguousMemory(PAGE_SIZE, addr_4g);
+ if (!vcpu->arch.mmu.pae_root)
return -ENOMEM;
- vcpu->arch.mmu.pae_root = page_address(page);
for (i = 0; i < 4; ++i)
vcpu->arch.mmu.pae_root[i] = INVALID_PAGE;
@@ -4649,6 +4343,7 @@ slot_handle_level_range(struct kvm *kvm, struct kvm_memory_slot *memslot,
if (iterator.rmap)
flush |= fn(kvm, iterator.rmap);
+#if 0
if (need_resched() || spin_needbreak(&kvm->mmu_lock)) {
if (flush && lock_flush_tlb) {
kvm_flush_remote_tlbs(kvm);
@@ -4656,6 +4351,7 @@ slot_handle_level_range(struct kvm *kvm, struct kvm_memory_slot *memslot,
}
cond_resched_lock(&kvm->mmu_lock);
}
+#endif
}
if (flush && lock_flush_tlb) {
@@ -4682,15 +4378,7 @@ slot_handle_all_level(struct kvm *kvm, struct kvm_memory_slot *memslot,
slot_level_handler fn, bool lock_flush_tlb)
{
return slot_handle_level(kvm, memslot, fn, PT_PAGE_TABLE_LEVEL,
- PT_MAX_HUGEPAGE_LEVEL, lock_flush_tlb);
-}
-
-static bool
-slot_handle_large_level(struct kvm *kvm, struct kvm_memory_slot *memslot,
- slot_level_handler fn, bool lock_flush_tlb)
-{
- return slot_handle_level(kvm, memslot, fn, PT_PAGE_TABLE_LEVEL + 1,
- PT_MAX_HUGEPAGE_LEVEL, lock_flush_tlb);
+ PT_PAGE_TABLE_LEVEL, lock_flush_tlb);
}
static bool
@@ -4708,7 +4396,7 @@ void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end)
int i;
spin_lock(&kvm->mmu_lock);
- for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
+ for (i = 0; i < GVM_ADDRESS_SPACE_NUM; i++) {
slots = __kvm_memslots(kvm, i);
kvm_for_each_memslot(memslot, slots) {
gfn_t start, end;
@@ -4719,7 +4407,7 @@ void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end)
continue;
slot_handle_level_range(kvm, memslot, kvm_zap_rmapp,
- PT_PAGE_TABLE_LEVEL, PT_MAX_HUGEPAGE_LEVEL,
+ PT_PAGE_TABLE_LEVEL, PT_PAGE_TABLE_LEVEL,
start, end - 1, true);
}
}
@@ -4748,7 +4436,7 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm,
* which do tlb flush out of mmu-lock should be serialized by
* kvm->slots_lock otherwise tlb flush would be missed.
*/
- lockdep_assert_held(&kvm->slots_lock);
+ //lockdep_assert_held(&kvm->slots_lock);
/*
* We can flush all the TLBs out of the mmu lock without TLB
@@ -4786,9 +4474,8 @@ restart:
* the guest, and the guest page table is using 4K page size
* mapping if the indirect sp has level = 1.
*/
- if (sp->role.direct &&
- !kvm_is_reserved_pfn(pfn) &&
- PageTransCompoundMap(pfn_to_page(pfn))) {
+ if (sp->role.direct //&&
+ /*PageTransCompoundMap(pfn_to_page(pfn))*/) {
drop_spte(kvm, sptep);
need_tlb_flush = 1;
goto restart;
@@ -4817,7 +4504,7 @@ void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm,
flush = slot_handle_leaf(kvm, memslot, __rmap_clear_dirty, false);
spin_unlock(&kvm->mmu_lock);
- lockdep_assert_held(&kvm->slots_lock);
+ //lockdep_assert_held(&kvm->slots_lock);
/*
* It's also safe to flush TLBs out of mmu lock here as currently this
@@ -4828,25 +4515,6 @@ void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm,
if (flush)
kvm_flush_remote_tlbs(kvm);
}
-EXPORT_SYMBOL_GPL(kvm_mmu_slot_leaf_clear_dirty);
-
-void kvm_mmu_slot_largepage_remove_write_access(struct kvm *kvm,
- struct kvm_memory_slot *memslot)
-{
- bool flush;
-
- spin_lock(&kvm->mmu_lock);
- flush = slot_handle_large_level(kvm, memslot, slot_rmap_write_protect,
- false);
- spin_unlock(&kvm->mmu_lock);
-
- /* see kvm_mmu_slot_remove_write_access */
- lockdep_assert_held(&kvm->slots_lock);
-
- if (flush)
- kvm_flush_remote_tlbs(kvm);
-}
-EXPORT_SYMBOL_GPL(kvm_mmu_slot_largepage_remove_write_access);
void kvm_mmu_slot_set_dirty(struct kvm *kvm,
struct kvm_memory_slot *memslot)
@@ -4857,13 +4525,12 @@ void kvm_mmu_slot_set_dirty(struct kvm *kvm,
flush = slot_handle_all_level(kvm, memslot, __rmap_set_dirty, false);
spin_unlock(&kvm->mmu_lock);
- lockdep_assert_held(&kvm->slots_lock);
+ //lockdep_assert_held(&kvm->slots_lock);
/* see kvm_mmu_slot_leaf_clear_dirty */
if (flush)
kvm_flush_remote_tlbs(kvm);
}
-EXPORT_SYMBOL_GPL(kvm_mmu_slot_set_dirty);
#define BATCH_ZAP_PAGES 10
static void kvm_zap_obsolete_pages(struct kvm *kvm)
@@ -4872,6 +4539,7 @@ static void kvm_zap_obsolete_pages(struct kvm *kvm)
int batch = 0;
restart:
+#define LIST_ENTRY_TYPE_INFO struct kvm_mmu_page
list_for_each_entry_safe_reverse(sp, node,
&kvm->arch.active_mmu_pages, link) {
int ret;
@@ -4895,8 +4563,8 @@ restart:
* Need not flush tlb since we only zap the sp with invalid
* generation number.
*/
- if (batch >= BATCH_ZAP_PAGES &&
- cond_resched_lock(&kvm->mmu_lock)) {
+ if (batch >= BATCH_ZAP_PAGES) {// &&
+ //cond_resched_lock(&kvm->mmu_lock)) {
batch = 0;
goto restart;
}
@@ -4908,6 +4576,7 @@ restart:
if (ret)
goto restart;
}
+#undef LIST_ENTRY_TYPE_INFO
/*
* Should flush tlb before free page tables since lockless-walking
@@ -4921,14 +4590,13 @@ restart:
* to zap obsolete pages.
*
* It's required when memslot is being deleted or VM is being
- * destroyed, in these cases, we should ensure that KVM MMU does
+ * destroyed, in these cases, we should ensure that kvm MMU does
* not use any resource of the being-deleted slot or all slots
* after calling the function.
*/
void kvm_mmu_invalidate_zap_all_pages(struct kvm *kvm)
{
spin_lock(&kvm->mmu_lock);
- trace_kvm_mmu_invalidate_zap_all_pages(kvm);
kvm->arch.mmu_valid_gen++;
/*
@@ -4963,12 +4631,14 @@ void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, struct kvm_memslots *slots)
}
}
-static unsigned long
+// todo-002
+#if 0
+static size_t
mmu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
{
struct kvm *kvm;
int nr_to_scan = sc->nr_to_scan;
- unsigned long freed = 0;
+ size_t freed = 0;
spin_lock(&kvm_lock);
@@ -5024,7 +4694,7 @@ unlock:
return freed;
}
-static unsigned long
+static size_t
mmu_shrink_count(struct shrinker *shrink, struct shrink_control *sc)
{
return percpu_counter_read_positive(&kvm_total_used_mmu_pages);
@@ -5035,39 +4705,16 @@ static struct shrinker mmu_shrinker = {
.scan_objects = mmu_shrink_scan,
.seeks = DEFAULT_SEEKS * 10,
};
+#endif
static void mmu_destroy_caches(void)
{
+#if 0
if (pte_list_desc_cache)
kmem_cache_destroy(pte_list_desc_cache);
if (mmu_page_header_cache)
kmem_cache_destroy(mmu_page_header_cache);
-}
-
-int kvm_mmu_module_init(void)
-{
- pte_list_desc_cache = kmem_cache_create("pte_list_desc",
- sizeof(struct pte_list_desc),
- 0, 0, NULL);
- if (!pte_list_desc_cache)
- goto nomem;
-
- mmu_page_header_cache = kmem_cache_create("kvm_mmu_page_header",
- sizeof(struct kvm_mmu_page),
- 0, 0, NULL);
- if (!mmu_page_header_cache)
- goto nomem;
-
- if (percpu_counter_init(&kvm_total_used_mmu_pages, 0, GFP_KERNEL))
- goto nomem;
-
- register_shrinker(&mmu_shrinker);
-
- return 0;
-
-nomem:
- mmu_destroy_caches();
- return -ENOMEM;
+#endif
}
/*
@@ -5081,16 +4728,16 @@ unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm)
struct kvm_memory_slot *memslot;
int i;
- for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
+ for (i = 0; i < GVM_ADDRESS_SPACE_NUM; i++) {
slots = __kvm_memslots(kvm, i);
kvm_for_each_memslot(memslot, slots)
nr_pages += memslot->npages;
}
- nr_mmu_pages = nr_pages * KVM_PERMILLE_MMU_PAGES / 1000;
+ nr_mmu_pages = nr_pages * GVM_PERMILLE_MMU_PAGES / 1000;
nr_mmu_pages = max(nr_mmu_pages,
- (unsigned int) KVM_MIN_ALLOC_MMU_PAGES);
+ (unsigned int) GVM_MIN_ALLOC_MMU_PAGES);
return nr_mmu_pages;
}
@@ -5104,8 +4751,11 @@ void kvm_mmu_destroy(struct kvm_vcpu *vcpu)
void kvm_mmu_module_exit(void)
{
+ // todo-001
+#if 0
mmu_destroy_caches();
percpu_counter_destroy(&kvm_total_used_mmu_pages);
unregister_shrinker(&mmu_shrinker);
mmu_audit_disable();
+#endif
}
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index ddc56e9..cf39e5a 100644..100755
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -1,3 +1,7 @@
+/*
+ * Copyright 2019 Google LLC
+ */
+
#ifndef __KVM_X86_MMU_H
#define __KVM_X86_MMU_H
@@ -44,7 +48,7 @@
#define PT_PDPE_LEVEL 3
#define PT_DIRECTORY_LEVEL 2
#define PT_PAGE_TABLE_LEVEL 1
-#define PT_MAX_HUGEPAGE_LEVEL (PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES - 1)
+#define PT_MAX_HUGEPAGE_LEVEL (PT_PAGE_TABLE_LEVEL + GVM_NR_PAGE_SIZES - 1)
static inline u64 rsvd_bits(int s, int e)
{
@@ -96,7 +100,7 @@ static inline int kvm_mmu_reload(struct kvm_vcpu *vcpu)
/*
* Currently, we have two sorts of write-protection, a) the first one
* write-protects guest page to sync the guest modification, b) another one is
- * used to sync dirty bitmap when we do KVM_GET_DIRTY_LOG. The differences
+ * used to sync dirty bitmap when we do GVM_GET_DIRTY_LOG. The differences
* between these two sorts are:
* 1) the first case clears SPTE_MMU_WRITEABLE bit.
* 2) the first case requires flushing tlb immediately avoiding corrupting
@@ -126,7 +130,7 @@ static inline int kvm_mmu_reload(struct kvm_vcpu *vcpu)
*
* TODO: introduce APIs to split these two cases.
*/
-static inline int is_writable_pte(unsigned long pte)
+static inline int is_writable_pte(size_t pte)
{
return pte & PT_WRITABLE_MASK;
}
@@ -149,7 +153,7 @@ static inline u8 permission_fault(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
unsigned pfec)
{
int cpl = kvm_x86_ops->get_cpl(vcpu);
- unsigned long rflags = kvm_x86_ops->get_rflags(vcpu);
+ size_t rflags = kvm_x86_ops->get_rflags(vcpu);
/*
* If CPL < 3, SMAP prevention are disabled if EFLAGS.AC = 1.
@@ -164,41 +168,20 @@ static inline u8 permission_fault(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
* but it will be one in index if SMAP checks are being overridden.
* It is important to keep this branchless.
*/
- unsigned long smap = (cpl - 3) & (rflags & X86_EFLAGS_AC);
+ size_t smap = (cpl - 3) & (rflags & X86_EFLAGS_AC);
int index = (pfec >> 1) +
(smap >> (X86_EFLAGS_AC_BIT - PFERR_RSVD_BIT + 1));
bool fault = (mmu->permissions[index] >> pte_access) & 1;
u32 errcode = PFERR_PRESENT_MASK;
WARN_ON(pfec & (PFERR_PK_MASK | PFERR_RSVD_MASK));
- if (unlikely(mmu->pkru_mask)) {
- u32 pkru_bits, offset;
-
- /*
- * PKRU defines 32 bits, there are 16 domains and 2
- * attribute bits per domain in pkru. pte_pkey is the
- * index of the protection domain, so pte_pkey * 2 is
- * is the index of the first bit for the domain.
- */
- pkru_bits = (kvm_read_pkru(vcpu) >> (pte_pkey * 2)) & 3;
-
- /* clear present bit, replace PFEC.RSVD with ACC_USER_MASK. */
- offset = (pfec & ~1) +
- ((pte_access & PT_USER_MASK) << (PFERR_RSVD_BIT - PT_USER_SHIFT));
-
- pkru_bits &= mmu->pkru_mask >> offset;
- errcode |= -pkru_bits & PFERR_PK_MASK;
- fault |= (pkru_bits != 0);
- }
-
- return -(u32)fault & errcode;
+
+ return -(s32)fault & errcode;
}
void kvm_mmu_invalidate_zap_all_pages(struct kvm *kvm);
void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end);
-void kvm_mmu_gfn_disallow_lpage(struct kvm_memory_slot *slot, gfn_t gfn);
-void kvm_mmu_gfn_allow_lpage(struct kvm_memory_slot *slot, gfn_t gfn);
bool kvm_mmu_slot_gfn_write_protect(struct kvm *kvm,
struct kvm_memory_slot *slot, u64 gfn);
#endif
diff --git a/arch/x86/kvm/mmu_audit.c b/arch/x86/kvm/mmu_audit.c
index dcce533..76050b1 100644..100755
--- a/arch/x86/kvm/mmu_audit.c
+++ b/arch/x86/kvm/mmu_audit.c
@@ -5,6 +5,7 @@
*
* Copyright (C) 2006 Qumranet, Inc.
* Copyright 2010 Red Hat, Inc. and/or its affiliates.
+ * Copyright 2019 Google LLC
*
* Authors:
* Yaniv Kamay <yaniv@qumranet.com>
@@ -17,6 +18,7 @@
*
*/
+#if 0
#include <linux/ratelimit.h>
char const *audit_point_name[] = {
@@ -278,7 +280,7 @@ static void mmu_audit_disable(void)
static int mmu_audit_set(const char *val, const struct kernel_param *kp)
{
int ret;
- unsigned long enable;
+ size_t enable;
ret = kstrtoul(val, 10, &enable);
if (ret < 0)
@@ -304,3 +306,4 @@ static const struct kernel_param_ops audit_param_ops = {
};
arch_param_cb(mmu_audit, &audit_param_ops, &mmu_audit, 0644);
+#endif
diff --git a/arch/x86/kvm/mmutrace.h b/arch/x86/kvm/mmutrace.h
deleted file mode 100644
index 5a24b84..0000000
--- a/arch/x86/kvm/mmutrace.h
+++ /dev/null
@@ -1,333 +0,0 @@
-#if !defined(_TRACE_KVMMMU_H) || defined(TRACE_HEADER_MULTI_READ)
-#define _TRACE_KVMMMU_H
-
-#include <linux/tracepoint.h>
-#include <linux/trace_events.h>
-
-#undef TRACE_SYSTEM
-#define TRACE_SYSTEM kvmmmu
-
-#define KVM_MMU_PAGE_FIELDS \
- __field(unsigned long, mmu_valid_gen) \
- __field(__u64, gfn) \
- __field(__u32, role) \
- __field(__u32, root_count) \
- __field(bool, unsync)
-
-#define KVM_MMU_PAGE_ASSIGN(sp) \
- __entry->mmu_valid_gen = sp->mmu_valid_gen; \
- __entry->gfn = sp->gfn; \
- __entry->role = sp->role.word; \
- __entry->root_count = sp->root_count; \
- __entry->unsync = sp->unsync;
-
-#define KVM_MMU_PAGE_PRINTK() ({ \
- const char *saved_ptr = trace_seq_buffer_ptr(p); \
- static const char *access_str[] = { \
- "---", "--x", "w--", "w-x", "-u-", "-ux", "wu-", "wux" \
- }; \
- union kvm_mmu_page_role role; \
- \
- role.word = __entry->role; \
- \
- trace_seq_printf(p, "sp gen %lx gfn %llx %u%s q%u%s %s%s" \
- " %snxe root %u %s%c", __entry->mmu_valid_gen, \
- __entry->gfn, role.level, \
- role.cr4_pae ? " pae" : "", \
- role.quadrant, \
- role.direct ? " direct" : "", \
- access_str[role.access], \
- role.invalid ? " invalid" : "", \
- role.nxe ? "" : "!", \
- __entry->root_count, \
- __entry->unsync ? "unsync" : "sync", 0); \
- saved_ptr; \
- })
-
-#define kvm_mmu_trace_pferr_flags \
- { PFERR_PRESENT_MASK, "P" }, \
- { PFERR_WRITE_MASK, "W" }, \
- { PFERR_USER_MASK, "U" }, \
- { PFERR_RSVD_MASK, "RSVD" }, \
- { PFERR_FETCH_MASK, "F" }
-
-/*
- * A pagetable walk has started
- */
-TRACE_EVENT(
- kvm_mmu_pagetable_walk,
- TP_PROTO(u64 addr, u32 pferr),
- TP_ARGS(addr, pferr),
-
- TP_STRUCT__entry(
- __field(__u64, addr)
- __field(__u32, pferr)
- ),
-
- TP_fast_assign(
- __entry->addr = addr;
- __entry->pferr = pferr;
- ),
-
- TP_printk("addr %llx pferr %x %s", __entry->addr, __entry->pferr,
- __print_flags(__entry->pferr, "|", kvm_mmu_trace_pferr_flags))
-);
-
-
-/* We just walked a paging element */
-TRACE_EVENT(
- kvm_mmu_paging_element,
- TP_PROTO(u64 pte, int level),
- TP_ARGS(pte, level),
-
- TP_STRUCT__entry(
- __field(__u64, pte)
- __field(__u32, level)
- ),
-
- TP_fast_assign(
- __entry->pte = pte;
- __entry->level = level;
- ),
-
- TP_printk("pte %llx level %u", __entry->pte, __entry->level)
-);
-
-DECLARE_EVENT_CLASS(kvm_mmu_set_bit_class,
-
- TP_PROTO(unsigned long table_gfn, unsigned index, unsigned size),
-
- TP_ARGS(table_gfn, index, size),
-
- TP_STRUCT__entry(
- __field(__u64, gpa)
- ),
-
- TP_fast_assign(
- __entry->gpa = ((u64)table_gfn << PAGE_SHIFT)
- + index * size;
- ),
-
- TP_printk("gpa %llx", __entry->gpa)
-);
-
-/* We set a pte accessed bit */
-DEFINE_EVENT(kvm_mmu_set_bit_class, kvm_mmu_set_accessed_bit,
-
- TP_PROTO(unsigned long table_gfn, unsigned index, unsigned size),
-
- TP_ARGS(table_gfn, index, size)
-);
-
-/* We set a pte dirty bit */
-DEFINE_EVENT(kvm_mmu_set_bit_class, kvm_mmu_set_dirty_bit,
-
- TP_PROTO(unsigned long table_gfn, unsigned index, unsigned size),
-
- TP_ARGS(table_gfn, index, size)
-);
-
-TRACE_EVENT(
- kvm_mmu_walker_error,
- TP_PROTO(u32 pferr),
- TP_ARGS(pferr),
-
- TP_STRUCT__entry(
- __field(__u32, pferr)
- ),
-
- TP_fast_assign(
- __entry->pferr = pferr;
- ),
-
- TP_printk("pferr %x %s", __entry->pferr,
- __print_flags(__entry->pferr, "|", kvm_mmu_trace_pferr_flags))
-);
-
-TRACE_EVENT(
- kvm_mmu_get_page,
- TP_PROTO(struct kvm_mmu_page *sp, bool created),
- TP_ARGS(sp, created),
-
- TP_STRUCT__entry(
- KVM_MMU_PAGE_FIELDS
- __field(bool, created)
- ),
-
- TP_fast_assign(
- KVM_MMU_PAGE_ASSIGN(sp)
- __entry->created = created;
- ),
-
- TP_printk("%s %s", KVM_MMU_PAGE_PRINTK(),
- __entry->created ? "new" : "existing")
-);
-
-DECLARE_EVENT_CLASS(kvm_mmu_page_class,
-
- TP_PROTO(struct kvm_mmu_page *sp),
- TP_ARGS(sp),
-
- TP_STRUCT__entry(
- KVM_MMU_PAGE_FIELDS
- ),
-
- TP_fast_assign(
- KVM_MMU_PAGE_ASSIGN(sp)
- ),
-
- TP_printk("%s", KVM_MMU_PAGE_PRINTK())
-);
-
-DEFINE_EVENT(kvm_mmu_page_class, kvm_mmu_sync_page,
- TP_PROTO(struct kvm_mmu_page *sp),
-
- TP_ARGS(sp)
-);
-
-DEFINE_EVENT(kvm_mmu_page_class, kvm_mmu_unsync_page,
- TP_PROTO(struct kvm_mmu_page *sp),
-
- TP_ARGS(sp)
-);
-
-DEFINE_EVENT(kvm_mmu_page_class, kvm_mmu_prepare_zap_page,
- TP_PROTO(struct kvm_mmu_page *sp),
-
- TP_ARGS(sp)
-);
-
-TRACE_EVENT(
- mark_mmio_spte,
- TP_PROTO(u64 *sptep, gfn_t gfn, unsigned access, unsigned int gen),
- TP_ARGS(sptep, gfn, access, gen),
-
- TP_STRUCT__entry(
- __field(void *, sptep)
- __field(gfn_t, gfn)
- __field(unsigned, access)
- __field(unsigned int, gen)
- ),
-
- TP_fast_assign(
- __entry->sptep = sptep;
- __entry->gfn = gfn;
- __entry->access = access;
- __entry->gen = gen;
- ),
-
- TP_printk("sptep:%p gfn %llx access %x gen %x", __entry->sptep,
- __entry->gfn, __entry->access, __entry->gen)
-);
-
-TRACE_EVENT(
- handle_mmio_page_fault,
- TP_PROTO(u64 addr, gfn_t gfn, unsigned access),
- TP_ARGS(addr, gfn, access),
-
- TP_STRUCT__entry(
- __field(u64, addr)
- __field(gfn_t, gfn)
- __field(unsigned, access)
- ),
-
- TP_fast_assign(
- __entry->addr = addr;
- __entry->gfn = gfn;
- __entry->access = access;
- ),
-
- TP_printk("addr:%llx gfn %llx access %x", __entry->addr, __entry->gfn,
- __entry->access)
-);
-
-#define __spte_satisfied(__spte) \
- (__entry->retry && is_writable_pte(__entry->__spte))
-
-TRACE_EVENT(
- fast_page_fault,
- TP_PROTO(struct kvm_vcpu *vcpu, gva_t gva, u32 error_code,
- u64 *sptep, u64 old_spte, bool retry),
- TP_ARGS(vcpu, gva, error_code, sptep, old_spte, retry),
-
- TP_STRUCT__entry(
- __field(int, vcpu_id)
- __field(gva_t, gva)
- __field(u32, error_code)
- __field(u64 *, sptep)
- __field(u64, old_spte)
- __field(u64, new_spte)
- __field(bool, retry)
- ),
-
- TP_fast_assign(
- __entry->vcpu_id = vcpu->vcpu_id;
- __entry->gva = gva;
- __entry->error_code = error_code;
- __entry->sptep = sptep;
- __entry->old_spte = old_spte;
- __entry->new_spte = *sptep;
- __entry->retry = retry;
- ),
-
- TP_printk("vcpu %d gva %lx error_code %s sptep %p old %#llx"
- " new %llx spurious %d fixed %d", __entry->vcpu_id,
- __entry->gva, __print_flags(__entry->error_code, "|",
- kvm_mmu_trace_pferr_flags), __entry->sptep,
- __entry->old_spte, __entry->new_spte,
- __spte_satisfied(old_spte), __spte_satisfied(new_spte)
- )
-);
-
-TRACE_EVENT(
- kvm_mmu_invalidate_zap_all_pages,
- TP_PROTO(struct kvm *kvm),
- TP_ARGS(kvm),
-
- TP_STRUCT__entry(
- __field(unsigned long, mmu_valid_gen)
- __field(unsigned int, mmu_used_pages)
- ),
-
- TP_fast_assign(
- __entry->mmu_valid_gen = kvm->arch.mmu_valid_gen;
- __entry->mmu_used_pages = kvm->arch.n_used_mmu_pages;
- ),
-
- TP_printk("kvm-mmu-valid-gen %lx used_pages %x",
- __entry->mmu_valid_gen, __entry->mmu_used_pages
- )
-);
-
-
-TRACE_EVENT(
- check_mmio_spte,
- TP_PROTO(u64 spte, unsigned int kvm_gen, unsigned int spte_gen),
- TP_ARGS(spte, kvm_gen, spte_gen),
-
- TP_STRUCT__entry(
- __field(unsigned int, kvm_gen)
- __field(unsigned int, spte_gen)
- __field(u64, spte)
- ),
-
- TP_fast_assign(
- __entry->kvm_gen = kvm_gen;
- __entry->spte_gen = spte_gen;
- __entry->spte = spte;
- ),
-
- TP_printk("spte %llx kvm_gen %x spte-gen %x valid %d", __entry->spte,
- __entry->kvm_gen, __entry->spte_gen,
- __entry->kvm_gen == __entry->spte_gen
- )
-);
-#endif /* _TRACE_KVMMMU_H */
-
-#undef TRACE_INCLUDE_PATH
-#define TRACE_INCLUDE_PATH .
-#undef TRACE_INCLUDE_FILE
-#define TRACE_INCLUDE_FILE mmutrace
-
-/* This part must be outside protection */
-#include <trace/define_trace.h>
diff --git a/arch/x86/kvm/mtrr.c b/arch/x86/kvm/mtrr.c
index 0149ac5..6f3c042 100644..100755
--- a/arch/x86/kvm/mtrr.c
+++ b/arch/x86/kvm/mtrr.c
@@ -4,6 +4,7 @@
* Copyright (C) 2006 Qumranet, Inc.
* Copyright 2010 Red Hat, Inc. and/or its affiliates.
* Copyright(C) 2015 Intel Corporation.
+ * Copyright 2019 Google LLC
*
* Authors:
* Yaniv Kamay <yaniv@qumranet.com>
@@ -17,7 +18,6 @@
*/
#include <linux/kvm_host.h>
-#include <asm/mtrr.h>
#include "cpuid.h"
#include "mmu.h"
@@ -26,10 +26,19 @@
#define IA32_MTRR_DEF_TYPE_FE (1ULL << 10)
#define IA32_MTRR_DEF_TYPE_TYPE_MASK (0xff)
+/* MTRR memory types, which are defined in SDM */
+#define MTRR_TYPE_UNCACHABLE 0
+#define MTRR_TYPE_WRCOMB 1
+/*#define MTRR_TYPE_ 2*/
+/*#define MTRR_TYPE_ 3*/
+#define MTRR_TYPE_WRTHROUGH 4
+#define MTRR_TYPE_WRPROT 5
+#define MTRR_TYPE_WRBACK 6
+#define MTRR_NUM_TYPES 7
+
static bool msr_mtrr_valid(unsigned msr)
{
switch (msr) {
- case 0x200 ... 0x200 + 2 * KVM_NR_VAR_MTRR - 1:
case MSR_MTRRfix64K_00000:
case MSR_MTRRfix16K_80000:
case MSR_MTRRfix16K_A0000:
@@ -44,6 +53,9 @@ static bool msr_mtrr_valid(unsigned msr)
case MSR_MTRRdefType:
case MSR_IA32_CR_PAT:
return true;
+ default:
+ if (msr >= 0x200 && msr < 0x210)
+ return true;
}
return false;
}
@@ -83,7 +95,7 @@ bool kvm_mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data)
}
/* variable MTRRs */
- WARN_ON(!(msr >= 0x200 && msr < 0x200 + 2 * KVM_NR_VAR_MTRR));
+ WARN_ON(!(msr >= 0x200 && msr < 0x200 + 2 * kvm_NR_VAR_MTRR));
mask = (~0ULL) << cpuid_maxphyaddr(vcpu);
if ((msr & 1) == 0) {
@@ -101,7 +113,6 @@ bool kvm_mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data)
return true;
}
-EXPORT_SYMBOL_GPL(kvm_mtrr_valid);
static bool mtrr_is_enabled(struct kvm_mtrr *mtrr_state)
{
@@ -200,11 +211,19 @@ static bool fixed_msr_to_seg_unit(u32 msr, int *seg, int *unit)
*seg = 0;
*unit = 0;
break;
- case MSR_MTRRfix16K_80000 ... MSR_MTRRfix16K_A0000:
+ case MSR_MTRRfix16K_80000:
+ case MSR_MTRRfix16K_A0000:
*seg = 1;
*unit = msr - MSR_MTRRfix16K_80000;
break;
- case MSR_MTRRfix4K_C0000 ... MSR_MTRRfix4K_F8000:
+ case MSR_MTRRfix4K_C0000:
+ case MSR_MTRRfix4K_C8000:
+ case MSR_MTRRfix4K_D0000:
+ case MSR_MTRRfix4K_D8000:
+ case MSR_MTRRfix4K_E0000:
+ case MSR_MTRRfix4K_E8000:
+ case MSR_MTRRfix4K_F0000:
+ case MSR_MTRRfix4K_F8000:
*seg = 2;
*unit = msr - MSR_MTRRfix4K_C0000;
break;
@@ -319,8 +338,7 @@ static void update_mtrr(struct kvm_vcpu *vcpu, u32 msr)
gfn_t start, end;
int index;
- if (msr == MSR_IA32_CR_PAT || !tdp_enabled ||
- !kvm_arch_has_noncoherent_dma(vcpu->kvm))
+ if (msr == MSR_IA32_CR_PAT || !tdp_enabled)
return;
if (!mtrr_is_enabled(mtrr_state) && msr != MSR_MTRRdefType)
@@ -372,10 +390,12 @@ static void set_var_mtrr_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data)
/* add it to the list if it's enabled. */
if (var_mtrr_range_is_valid(cur)) {
+#define LIST_ENTRY_TYPE_INFO struct kvm_mtrr_range
list_for_each_entry(tmp, &mtrr_state->head, node)
if (cur->base >= tmp->base)
break;
list_add_tail(&cur->node, &tmp->node);
+#undef LIST_ENTRY_TYPE_INFO
}
}
@@ -410,9 +430,9 @@ int kvm_mtrr_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
* SMRR = 0
* WC = 1
* FIX = 1
- * VCNT = KVM_NR_VAR_MTRR
+ * VCNT = kvm_NR_VAR_MTRR
*/
- *pdata = 0x500 | KVM_NR_VAR_MTRR;
+ *pdata = 0x500 | 8;
return 0;
}
@@ -525,9 +545,11 @@ static void __mtrr_lookup_var_next(struct mtrr_iter *iter)
{
struct kvm_mtrr *mtrr_state = iter->mtrr_state;
+#define LIST_ENTRY_TYPE_INFO struct kvm_mtrr_range
list_for_each_entry_continue(iter->range, &mtrr_state->head, node)
if (match_var_range(iter, iter->range))
return;
+#undef LIST_ENTRY_TYPE_INFO
iter->range = NULL;
iter->partial_map |= iter->start_max < iter->end;
@@ -540,7 +562,9 @@ static void mtrr_lookup_var_start(struct mtrr_iter *iter)
iter->fixed = false;
iter->start_max = iter->start;
iter->range = NULL;
+#define LIST_ENTRY_TYPE_INFO struct kvm_mtrr_range
iter->range = list_prepare_entry(iter->range, &mtrr_state->head, node);
+#undef LIST_ENTRY_TYPE_INFO
__mtrr_lookup_var_next(iter);
}
@@ -557,9 +581,10 @@ static void mtrr_lookup_fixed_next(struct mtrr_iter *iter)
iter->index++;
/* have looked up for all fixed MTRRs. */
- if (iter->index >= ARRAY_SIZE(iter->mtrr_state->fixed_ranges))
- return mtrr_lookup_var_start(iter);
-
+ if (iter->index >= ARRAY_SIZE(iter->mtrr_state->fixed_ranges)) {
+ mtrr_lookup_var_start(iter);
+ return;
+ }
/* switch to next segment. */
if (iter->index > fixed_mtrr_seg_end_range_index(iter->seg))
iter->seg++;
@@ -696,7 +721,6 @@ u8 kvm_mtrr_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn)
return type;
}
-EXPORT_SYMBOL_GPL(kvm_mtrr_get_guest_memory_type);
bool kvm_mtrr_check_gfn_range_consistency(struct kvm_vcpu *vcpu, gfn_t gfn,
int page_num)
diff --git a/arch/x86/kvm/page_track.c b/arch/x86/kvm/page_track.c
index b431539..2d6d87d 100644..100755
--- a/arch/x86/kvm/page_track.c
+++ b/arch/x86/kvm/page_track.c
@@ -5,6 +5,7 @@
* write access is tracked.
*
* Copyright(C) 2015 Intel Corporation.
+ * Copyright 2019 Google LLC
*
* Author:
* Xiao Guangrong <guangrong.xiao@linux.intel.com>
@@ -33,7 +34,7 @@ void kvm_page_track_free_memslot(struct kvm_memory_slot *free,
}
int kvm_page_track_create_memslot(struct kvm_memory_slot *slot,
- unsigned long npages)
+ size_t npages)
{
int i;
@@ -64,7 +65,7 @@ static void update_gfn_track(struct kvm_memory_slot *slot, gfn_t gfn,
{
int index, val;
- index = gfn_to_index(gfn, slot->base_gfn, PT_PAGE_TABLE_LEVEL);
+ index = gfn - slot->base_gfn;
val = slot->arch.gfn_track[mode][index];
@@ -96,12 +97,6 @@ void kvm_slot_page_track_add_page(struct kvm *kvm,
update_gfn_track(slot, gfn, mode, 1);
- /*
- * new track stops large page mapping for the
- * tracked page.
- */
- kvm_mmu_gfn_disallow_lpage(slot, gfn);
-
if (mode == KVM_PAGE_TRACK_WRITE)
if (kvm_mmu_slot_gfn_write_protect(kvm, slot, gfn))
kvm_flush_remote_tlbs(kvm);
@@ -128,12 +123,6 @@ void kvm_slot_page_track_remove_page(struct kvm *kvm,
return;
update_gfn_track(slot, gfn, mode, -1);
-
- /*
- * allow large page mapping for the tracked page
- * after the tracker is gone.
- */
- kvm_mmu_gfn_allow_lpage(slot, gfn);
}
/*
@@ -144,6 +133,7 @@ bool kvm_page_track_is_active(struct kvm_vcpu *vcpu, gfn_t gfn,
{
struct kvm_memory_slot *slot;
int index;
+ unsigned short temp;
if (WARN_ON(!page_track_mode_is_valid(mode)))
return false;
@@ -152,8 +142,9 @@ bool kvm_page_track_is_active(struct kvm_vcpu *vcpu, gfn_t gfn,
if (!slot)
return false;
- index = gfn_to_index(gfn, slot->base_gfn, PT_PAGE_TABLE_LEVEL);
- return !!ACCESS_ONCE(slot->arch.gfn_track[mode][index]);
+ index = gfn - slot->base_gfn;
+ ACCESS_ONCE(slot->arch.gfn_track[mode][index], temp);
+ return !!temp;
}
void kvm_page_track_init(struct kvm *kvm)
@@ -165,6 +156,14 @@ void kvm_page_track_init(struct kvm *kvm)
INIT_HLIST_HEAD(&head->track_notifier_list);
}
+void kvm_page_track_destroy(struct kvm *kvm)
+{
+ struct kvm_page_track_notifier_head *head;
+
+ head = &kvm->arch.track_notifier_head;
+ cleanup_srcu_struct(&head->track_srcu);
+}
+
/*
* register the notifier so that event interception for the tracked guest
* pages can be received.
@@ -220,8 +219,10 @@ void kvm_page_track_write(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new,
return;
idx = srcu_read_lock(&head->track_srcu);
+#define LIST_ENTRY_TYPE_INFO struct kvm_page_track_notifier_node
hlist_for_each_entry_rcu(n, &head->track_notifier_list, node)
if (n->track_write)
n->track_write(vcpu, gpa, new, bytes);
+#undef LIST_ENTRY_TYPE_INFO
srcu_read_unlock(&head->track_srcu, idx);
}
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index a011054..0d5fd47 100644..100755
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -8,6 +8,7 @@
*
* Copyright (C) 2006 Qumranet, Inc.
* Copyright 2010 Red Hat, Inc. and/or its affiliates.
+ * Copyright 2019 Google LLC
*
* Authors:
* Yaniv Kamay <yaniv@qumranet.com>
@@ -27,9 +28,13 @@
* This is used to catch non optimized PT_GUEST_(DIRTY|ACCESS)_SHIFT macro
* uses for EPT without A/D paging type.
*/
+#if 0
extern u64 __pure __using_nonexistent_pte_bit(void)
__compiletime_error("wrong use of PT_GUEST_(DIRTY|ACCESS)_SHIFT");
+#endif
+#pragma warning(disable : 4127)
+#pragma warning(disable : 4310)
#if PTTYPE == 64
#define pt_element_t u64
#define guest_walker guest_walker64
@@ -65,6 +70,7 @@ extern u64 __pure __using_nonexistent_pte_bit(void)
#define PT_GUEST_DIRTY_SHIFT PT_DIRTY_SHIFT
#define PT_GUEST_ACCESSED_SHIFT PT_ACCESSED_SHIFT
#define CMPXCHG cmpxchg
+#if 0
#elif PTTYPE == PTTYPE_EPT
#define pt_element_t u64
#define guest_walker guest_walkerEPT
@@ -80,6 +86,7 @@ extern u64 __pure __using_nonexistent_pte_bit(void)
#define PT_GUEST_ACCESSED_SHIFT __using_nonexistent_pte_bit()
#define CMPXCHG cmpxchg64
#define PT_MAX_FULL_LEVELS 4
+#endif
#else
#error Invalid PTTYPE value
#endif
@@ -119,8 +126,6 @@ static inline void FNAME(protect_clean_gpte)(unsigned *access, unsigned gpte)
if (!PT_GUEST_DIRTY_MASK)
return;
- BUILD_BUG_ON(PT_WRITABLE_MASK != ACC_WRITE_MASK);
-
mask = (unsigned)~ACC_WRITE_MASK;
/* Allow write access to dirty gptes */
mask |= (gpte >> (PT_GUEST_DIRTY_SHIFT - PT_WRITABLE_SHIFT)) &
@@ -128,7 +133,7 @@ static inline void FNAME(protect_clean_gpte)(unsigned *access, unsigned gpte)
*access &= mask;
}
-static inline int FNAME(is_present_gpte)(unsigned long pte)
+static inline int FNAME(is_present_gpte)(size_t pte)
{
#if PTTYPE != PTTYPE_EPT
return pte & PT_PRESENT_MASK;
@@ -144,18 +149,20 @@ static int FNAME(cmpxchg_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
int npages;
pt_element_t ret;
pt_element_t *table;
- struct page *page;
+ PMDL kmap_mdl;
- npages = get_user_pages_fast((unsigned long)ptep_user, 1, 1, &page);
+ npages = get_user_pages_fast((size_t)ptep_user, 1, 1, &kmap_mdl);
/* Check if the user is doing something meaningless. */
if (unlikely(npages != 1))
return -EFAULT;
- table = kmap_atomic(page);
+ table = kmap_atomic(kmap_mdl);
+ if (!table)
+ return -EFAULT;
ret = CMPXCHG(&table[index], orig_pte, new_pte);
- kunmap_atomic(table);
+ kunmap_atomic(kmap_mdl);
- kvm_release_page_dirty(page);
+ kvm_release_page(kmap_mdl);
return (ret != orig_pte);
}
@@ -195,8 +202,6 @@ static inline unsigned FNAME(gpte_access)(struct kvm_vcpu *vcpu, u64 gpte)
((gpte & VMX_EPT_EXECUTABLE_MASK) ? ACC_EXEC_MASK : 0) |
((gpte & VMX_EPT_READABLE_MASK) ? ACC_USER_MASK : 0);
#else
- BUILD_BUG_ON(ACC_EXEC_MASK != PT_PRESENT_MASK);
- BUILD_BUG_ON(ACC_EXEC_MASK != 1);
access = gpte & (PT_WRITABLE_MASK | PT_USER_MASK | PT_PRESENT_MASK);
/* Combine NX with P (which is set here) to get ACC_EXEC_MASK. */
access ^= (gpte >> PT64_NX_SHIFT);
@@ -226,12 +231,10 @@ static int FNAME(update_accessed_dirty_bits)(struct kvm_vcpu *vcpu,
ptep_user = walker->ptep_user[level - 1];
index = offset_in_page(ptep_user) / sizeof(pt_element_t);
if (!(pte & PT_GUEST_ACCESSED_MASK)) {
- trace_kvm_mmu_set_accessed_bit(table_gfn, index, sizeof(pte));
pte |= PT_GUEST_ACCESSED_MASK;
}
if (level == walker->level && write_fault &&
!(pte & PT_GUEST_DIRTY_MASK)) {
- trace_kvm_mmu_set_dirty_bit(table_gfn, index, sizeof(pte));
pte |= PT_GUEST_DIRTY_MASK;
}
if (pte == orig_pte)
@@ -266,11 +269,13 @@ static int FNAME(update_accessed_dirty_bits)(struct kvm_vcpu *vcpu,
static inline unsigned FNAME(gpte_pkeys)(struct kvm_vcpu *vcpu, u64 gpte)
{
unsigned pkeys = 0;
+#if 0
#if PTTYPE == 64
pte_t pte = {.pte = gpte};
pkeys = pte_flags_pkey(pte_flags(pte));
#endif
+#endif
return pkeys;
}
@@ -283,7 +288,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
{
int ret;
pt_element_t pte;
- pt_element_t __user *uninitialized_var(ptep_user);
+ pt_element_t __user *ptep_user;
gfn_t table_gfn;
unsigned index, pt_access, pte_access, accessed_dirty, pte_pkey;
gpa_t pte_gpa;
@@ -295,7 +300,6 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
gpa_t real_gpa;
gfn_t gfn;
- trace_kvm_mmu_pagetable_walk(addr, access);
retry_walk:
walker->level = mmu->root_level;
pte = mmu->get_cr3(vcpu);
@@ -303,7 +307,6 @@ retry_walk:
#if PTTYPE == 64
if (walker->level == PT32E_ROOT_LEVEL) {
pte = mmu->get_pdptr(vcpu, (addr >> 30) & 3);
- trace_kvm_mmu_paging_element(pte, walker->level);
if (!FNAME(is_present_gpte)(pte))
goto error;
--walker->level;
@@ -318,7 +321,7 @@ retry_walk:
do {
gfn_t real_gfn;
- unsigned long host_addr;
+ size_t host_addr;
pt_access &= pte_access;
--walker->level;
@@ -355,13 +358,11 @@ retry_walk:
if (unlikely(kvm_is_error_hva(host_addr)))
goto error;
- ptep_user = (pt_element_t __user *)((void *)host_addr + offset);
+ ptep_user = (pt_element_t __user *)((char *)host_addr + offset);
if (unlikely(__copy_from_user(&pte, ptep_user, sizeof(pte))))
goto error;
walker->ptep_user[walker->level - 1] = ptep_user;
- trace_kvm_mmu_paging_element(pte, walker->level);
-
if (unlikely(!FNAME(is_present_gpte)(pte)))
goto error;
@@ -449,7 +450,6 @@ error:
walker->fault.address = addr;
walker->fault.nested_page_fault = mmu != vcpu->arch.walk_mmu;
- trace_kvm_mmu_walker_error(walker->fault.error_code);
return 0;
}
@@ -546,7 +546,7 @@ static void FNAME(pte_prefetch)(struct kvm_vcpu *vcpu, struct guest_walker *gw,
return;
if (sp->role.direct)
- return __direct_pte_prefetch(vcpu, sp, sptep);
+ __direct_pte_prefetch(vcpu, sp, sptep);
i = (sptep - sp->spt) & ~(PTE_PREFETCH_NUM - 1);
spte = sp->spt + i;
@@ -571,7 +571,7 @@ static void FNAME(pte_prefetch)(struct kvm_vcpu *vcpu, struct guest_walker *gw,
static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
struct guest_walker *gw,
int write_fault, int hlevel,
- kvm_pfn_t pfn, bool map_writable, bool prefault)
+ kvm_pfn_t pfn, bool map_writable)
{
struct kvm_mmu_page *sp = NULL;
struct kvm_shadow_walk_iterator it;
@@ -634,7 +634,7 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
if (is_shadow_present_pte(*it.sptep))
continue;
- direct_gfn = gw->gfn & ~(KVM_PAGES_PER_HPAGE(it.level) - 1);
+ direct_gfn = gw->gfn;
sp = kvm_mmu_get_page(vcpu, direct_gfn, addr, it.level-1,
true, direct_access);
@@ -643,13 +643,12 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
clear_sp_write_flooding_count(it.sptep);
emulate = mmu_set_spte(vcpu, it.sptep, gw->pte_access, write_fault,
- it.level, gw->gfn, pfn, prefault, map_writable);
+ it.level, gw->gfn, pfn, false, map_writable);
FNAME(pte_prefetch)(vcpu, gw, it.sptep);
return emulate;
out_gpte_changed:
- kvm_release_pfn_clean(pfn);
return 0;
}
@@ -676,7 +675,6 @@ FNAME(is_self_change_mapping)(struct kvm_vcpu *vcpu,
bool *write_fault_to_shadow_pgtable)
{
int level;
- gfn_t mask = ~(KVM_PAGES_PER_HPAGE(walker->level) - 1);
bool self_changed = false;
if (!(walker->pte_access & ACC_WRITE_MASK ||
@@ -686,7 +684,7 @@ FNAME(is_self_change_mapping)(struct kvm_vcpu *vcpu,
for (level = walker->level; level <= walker->max_level; level++) {
gfn_t gfn = walker->gfn ^ walker->table_gfn[level - 1];
- self_changed |= !(gfn & mask);
+ self_changed |= !gfn;
*write_fault_to_shadow_pgtable |= !gfn;
}
@@ -707,8 +705,7 @@ FNAME(is_self_change_mapping)(struct kvm_vcpu *vcpu,
* Returns: 1 if we need to emulate the instruction, 0 otherwise, or
* a negative value on error.
*/
-static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,
- bool prefault)
+static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code)
{
int write_fault = error_code & PFERR_WRITE_MASK;
int user_fault = error_code & PFERR_USER_MASK;
@@ -716,8 +713,6 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,
int r;
kvm_pfn_t pfn;
int level = PT_PAGE_TABLE_LEVEL;
- bool force_pt_level = false;
- unsigned long mmu_seq;
bool map_writable, is_self_change_mapping;
pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code);
@@ -742,8 +737,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,
*/
if (!r) {
pgprintk("%s: guest page fault\n", __func__);
- if (!prefault)
- inject_page_fault(vcpu, &walker.fault);
+ inject_page_fault(vcpu, &walker.fault);
return 0;
}
@@ -759,20 +753,13 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,
&walker, user_fault, &vcpu->arch.write_fault_to_shadow_pgtable);
if (walker.level >= PT_DIRECTORY_LEVEL && !is_self_change_mapping) {
- level = mapping_level(vcpu, walker.gfn, &force_pt_level);
- if (likely(!force_pt_level)) {
- level = min(walker.level, level);
- walker.gfn = walker.gfn & ~(KVM_PAGES_PER_HPAGE(level) - 1);
- }
- } else
- force_pt_level = true;
+ level = mapping_level(vcpu, walker.gfn, NULL);
+ }
- mmu_seq = vcpu->kvm->mmu_notifier_seq;
+ //mmu_seq = vcpu->kvm->mmu_notifier_seq;
smp_rmb();
- if (try_async_pf(vcpu, prefault, walker.gfn, addr, &pfn, write_fault,
- &map_writable))
- return 0;
+ get_pfn(vcpu, walker.gfn, addr, &pfn, write_fault, &map_writable);
if (handle_abnormal_pfn(vcpu, mmu_is_nested(vcpu) ? 0 : addr,
walker.gfn, pfn, walker.pte_access, &r))
@@ -799,25 +786,15 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,
}
spin_lock(&vcpu->kvm->mmu_lock);
- if (mmu_notifier_retry(vcpu->kvm, mmu_seq))
- goto out_unlock;
-
kvm_mmu_audit(vcpu, AUDIT_PRE_PAGE_FAULT);
make_mmu_pages_available(vcpu);
- if (!force_pt_level)
- transparent_hugepage_adjust(vcpu, &walker.gfn, &pfn, &level);
r = FNAME(fetch)(vcpu, addr, &walker, write_fault,
- level, pfn, map_writable, prefault);
+ level, pfn, map_writable);
++vcpu->stat.pf_fixed;
kvm_mmu_audit(vcpu, AUDIT_POST_PAGE_FAULT);
spin_unlock(&vcpu->kvm->mmu_lock);
return r;
-
-out_unlock:
- spin_unlock(&vcpu->kvm->mmu_lock);
- kvm_release_pfn_clean(pfn);
- return 0;
}
static gpa_t FNAME(get_level1_sp_gpa)(struct kvm_mmu_page *sp)
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index 06ce377..d3937d2 100644..100755
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -2,6 +2,7 @@
* Kernel-based Virtual Machine -- Performance Monitoring Unit support
*
* Copyright 2015 Red Hat, Inc. and/or its affiliates.
+ * Copyright 2019 Google LLC
*
* Authors:
* Avi Kivity <avi@redhat.com>
@@ -13,6 +14,7 @@
*
*/
+#if 0
#include <linux/types.h>
#include <linux/kvm_host.h>
#include <linux/perf_event.h>
@@ -63,9 +65,9 @@ static void kvm_perf_overflow(struct perf_event *perf_event,
struct kvm_pmu *pmu = pmc_to_pmu(pmc);
if (!test_and_set_bit(pmc->idx,
- (unsigned long *)&pmu->reprogram_pmi)) {
- __set_bit(pmc->idx, (unsigned long *)&pmu->global_status);
- kvm_make_request(KVM_REQ_PMU, pmc->vcpu);
+ (size_t *)&pmu->reprogram_pmi)) {
+ __set_bit(pmc->idx, (size_t *)&pmu->global_status);
+ kvm_make_request(GVM_REQ_PMU, pmc->vcpu);
}
}
@@ -77,9 +79,9 @@ static void kvm_perf_overflow_intr(struct perf_event *perf_event,
struct kvm_pmu *pmu = pmc_to_pmu(pmc);
if (!test_and_set_bit(pmc->idx,
- (unsigned long *)&pmu->reprogram_pmi)) {
- __set_bit(pmc->idx, (unsigned long *)&pmu->global_status);
- kvm_make_request(KVM_REQ_PMU, pmc->vcpu);
+ (size_t *)&pmu->reprogram_pmi)) {
+ __set_bit(pmc->idx, (size_t *)&pmu->global_status);
+ kvm_make_request(GVM_REQ_PMU, pmc->vcpu);
/*
* Inject PMI. If vcpu was in a guest mode during NMI PMI
@@ -92,7 +94,7 @@ static void kvm_perf_overflow_intr(struct perf_event *perf_event,
if (!kvm_is_in_guest())
irq_work_queue(&pmc_to_pmu(pmc)->irq_work);
else
- kvm_make_request(KVM_REQ_PMI, pmc->vcpu);
+ kvm_make_request(GVM_REQ_PMI, pmc->vcpu);
}
}
@@ -130,7 +132,7 @@ static void pmc_reprogram_counter(struct kvm_pmc *pmc, u32 type,
}
pmc->perf_event = event;
- clear_bit(pmc->idx, (unsigned long*)&pmc_to_pmu(pmc)->reprogram_pmi);
+ clear_bit(pmc->idx, (size_t*)&pmc_to_pmu(pmc)->reprogram_pmi);
}
void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel)
@@ -173,7 +175,6 @@ void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel)
(eventsel & HSW_IN_TX),
(eventsel & HSW_IN_TX_CHECKPOINTED));
}
-EXPORT_SYMBOL_GPL(reprogram_gp_counter);
void reprogram_fixed_counter(struct kvm_pmc *pmc, u8 ctrl, int idx)
{
@@ -191,7 +192,6 @@ void reprogram_fixed_counter(struct kvm_pmc *pmc, u8 ctrl, int idx)
!(en_field & 0x1), /* exclude kernel */
pmi, false, false);
}
-EXPORT_SYMBOL_GPL(reprogram_fixed_counter);
void reprogram_counter(struct kvm_pmu *pmu, int pmc_idx)
{
@@ -209,7 +209,6 @@ void reprogram_counter(struct kvm_pmu *pmu, int pmc_idx)
reprogram_fixed_counter(pmc, ctrl, idx);
}
}
-EXPORT_SYMBOL_GPL(reprogram_counter);
void kvm_pmu_handle_event(struct kvm_vcpu *vcpu)
{
@@ -219,11 +218,11 @@ void kvm_pmu_handle_event(struct kvm_vcpu *vcpu)
bitmask = pmu->reprogram_pmi;
- for_each_set_bit(bit, (unsigned long *)&bitmask, X86_PMC_IDX_MAX) {
+ for_each_set_bit(bit, (size_t *)&bitmask, X86_PMC_IDX_MAX) {
struct kvm_pmc *pmc = kvm_x86_ops->pmu_ops->pmc_idx_to_pmc(pmu, bit);
if (unlikely(!pmc || !pmc->perf_event)) {
- clear_bit(bit, (unsigned long *)&pmu->reprogram_pmi);
+ clear_bit(bit, (size_t *)&pmu->reprogram_pmi);
continue;
}
@@ -307,3 +306,4 @@ void kvm_pmu_destroy(struct kvm_vcpu *vcpu)
{
kvm_pmu_reset(vcpu);
}
+#endif
diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h
index f96e1f9..1025403 100644..100755
--- a/arch/x86/kvm/pmu.h
+++ b/arch/x86/kvm/pmu.h
@@ -1,6 +1,8 @@
#ifndef __KVM_X86_PMU_H
#define __KVM_X86_PMU_H
+#if 0
+
#define vcpu_to_pmu(vcpu) (&(vcpu)->arch.pmu)
#define pmu_to_vcpu(pmu) (container_of((pmu), struct kvm_vcpu, arch.pmu))
#define pmc_to_pmu(pmc) (&(pmc)->vcpu->arch.pmu)
@@ -115,4 +117,6 @@ void kvm_pmu_destroy(struct kvm_vcpu *vcpu);
extern struct kvm_pmu_ops intel_pmu_ops;
extern struct kvm_pmu_ops amd_pmu_ops;
+#endif
+
#endif /* __KVM_X86_PMU_H */
diff --git a/arch/x86/kvm/pmu_amd.c b/arch/x86/kvm/pmu_amd.c
index cd94443..5db57c6 100644..100755
--- a/arch/x86/kvm/pmu_amd.c
+++ b/arch/x86/kvm/pmu_amd.c
@@ -11,6 +11,7 @@
*
* Implementation is based on pmu_intel.c file
*/
+#if 0
#include <linux/types.h>
#include <linux/kvm_host.h>
#include <linux/perf_event.h>
@@ -203,3 +204,4 @@ struct kvm_pmu_ops amd_pmu_ops = {
.init = amd_pmu_init,
.reset = amd_pmu_reset,
};
+#endif
diff --git a/arch/x86/kvm/pmu_intel.c b/arch/x86/kvm/pmu_intel.c
index 9d4a850..4fb5c5f 100644..100755
--- a/arch/x86/kvm/pmu_intel.c
+++ b/arch/x86/kvm/pmu_intel.c
@@ -2,6 +2,7 @@
* KVM PMU support for Intel CPUs
*
* Copyright 2011 Red Hat, Inc. and/or its affiliates.
+ * Copyright 2019 Google LLC
*
* Authors:
* Avi Kivity <avi@redhat.com>
@@ -11,6 +12,7 @@
* the COPYING file in the top-level directory.
*
*/
+#if 0
#include <linux/types.h>
#include <linux/kvm_host.h>
#include <linux/perf_event.h>
@@ -63,7 +65,7 @@ static void global_ctrl_changed(struct kvm_pmu *pmu, u64 data)
pmu->global_ctrl = data;
- for_each_set_bit(bit, (unsigned long *)&diff, X86_PMC_IDX_MAX)
+ for_each_set_bit(bit, (size_t *)&diff, X86_PMC_IDX_MAX)
reprogram_counter(pmu, bit);
}
@@ -98,7 +100,7 @@ static bool intel_pmc_is_enabled(struct kvm_pmc *pmc)
{
struct kvm_pmu *pmu = pmc_to_pmu(pmc);
- return test_bit(pmc->idx, (unsigned long *)&pmu->global_ctrl);
+ return test_bit(pmc->idx, (size_t *)&pmu->global_ctrl);
}
static struct kvm_pmc *intel_pmc_idx_to_pmc(struct kvm_pmu *pmu, int pmc_idx)
@@ -356,3 +358,4 @@ struct kvm_pmu_ops intel_pmu_ops = {
.init = intel_pmu_init,
.reset = intel_pmu_reset,
};
+#endif
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 8ca1eca..31fc896 100644..100755
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -5,6 +5,7 @@
*
* Copyright (C) 2006 Qumranet, Inc.
* Copyright 2010 Red Hat, Inc. and/or its affiliates.
+ * Copyright 2019 Google LLC
*
* Authors:
* Yaniv Kamay <yaniv@qumranet.com>
@@ -26,199 +27,12 @@
#include "cpuid.h"
#include "pmu.h"
-#include <linux/module.h>
-#include <linux/mod_devicetable.h>
-#include <linux/kernel.h>
-#include <linux/vmalloc.h>
-#include <linux/highmem.h>
-#include <linux/sched.h>
-#include <linux/trace_events.h>
-#include <linux/slab.h>
-#include <linux/amd-iommu.h>
-#include <linux/hashtable.h>
-
-#include <asm/apic.h>
-#include <asm/perf_event.h>
-#include <asm/tlbflush.h>
-#include <asm/desc.h>
-#include <asm/debugreg.h>
-#include <asm/kvm_para.h>
-#include <asm/irq_remapping.h>
-
-#include <asm/virtext.h>
-#include "trace.h"
-
-#define __ex(x) __kvm_handle_fault_on_reboot(x)
-
-MODULE_AUTHOR("Qumranet");
-MODULE_LICENSE("GPL");
-
-static const struct x86_cpu_id svm_cpu_id[] = {
- X86_FEATURE_MATCH(X86_FEATURE_SVM),
- {}
-};
-MODULE_DEVICE_TABLE(x86cpu, svm_cpu_id);
-
-#define IOPM_ALLOC_ORDER 2
-#define MSRPM_ALLOC_ORDER 1
-
-#define SEG_TYPE_LDT 2
-#define SEG_TYPE_BUSY_TSS16 3
-
-#define SVM_FEATURE_NPT (1 << 0)
-#define SVM_FEATURE_LBRV (1 << 1)
-#define SVM_FEATURE_SVML (1 << 2)
-#define SVM_FEATURE_NRIP (1 << 3)
-#define SVM_FEATURE_TSC_RATE (1 << 4)
-#define SVM_FEATURE_VMCB_CLEAN (1 << 5)
-#define SVM_FEATURE_FLUSH_ASID (1 << 6)
-#define SVM_FEATURE_DECODE_ASSIST (1 << 7)
-#define SVM_FEATURE_PAUSE_FILTER (1 << 10)
-
-#define SVM_AVIC_DOORBELL 0xc001011b
-
-#define NESTED_EXIT_HOST 0 /* Exit handled on host level */
-#define NESTED_EXIT_DONE 1 /* Exit caused nested vmexit */
-#define NESTED_EXIT_CONTINUE 2 /* Further checks needed */
-
-#define DEBUGCTL_RESERVED_BITS (~(0x3fULL))
-
-#define TSC_RATIO_RSVD 0xffffff0000000000ULL
-#define TSC_RATIO_MIN 0x0000000000000001ULL
-#define TSC_RATIO_MAX 0x000000ffffffffffULL
-
-#define AVIC_HPA_MASK ~((0xFFFULL << 52) | 0xFFF)
-
-/*
- * 0xff is broadcast, so the max index allowed for physical APIC ID
- * table is 0xfe. APIC IDs above 0xff are reserved.
- */
-#define AVIC_MAX_PHYSICAL_ID_COUNT 255
-
-#define AVIC_UNACCEL_ACCESS_WRITE_MASK 1
-#define AVIC_UNACCEL_ACCESS_OFFSET_MASK 0xFF0
-#define AVIC_UNACCEL_ACCESS_VECTOR_MASK 0xFFFFFFFF
-
-/* AVIC GATAG is encoded using VM and VCPU IDs */
-#define AVIC_VCPU_ID_BITS 8
-#define AVIC_VCPU_ID_MASK ((1 << AVIC_VCPU_ID_BITS) - 1)
-
-#define AVIC_VM_ID_BITS 24
-#define AVIC_VM_ID_NR (1 << AVIC_VM_ID_BITS)
-#define AVIC_VM_ID_MASK ((1 << AVIC_VM_ID_BITS) - 1)
-
-#define AVIC_GATAG(x, y) (((x & AVIC_VM_ID_MASK) << AVIC_VCPU_ID_BITS) | \
- (y & AVIC_VCPU_ID_MASK))
-#define AVIC_GATAG_TO_VMID(x) ((x >> AVIC_VCPU_ID_BITS) & AVIC_VM_ID_MASK)
-#define AVIC_GATAG_TO_VCPUID(x) (x & AVIC_VCPU_ID_MASK)
-
-static bool erratum_383_found __read_mostly;
-
-static const u32 host_save_user_msrs[] = {
-#ifdef CONFIG_X86_64
- MSR_STAR, MSR_LSTAR, MSR_CSTAR, MSR_SYSCALL_MASK, MSR_KERNEL_GS_BASE,
- MSR_FS_BASE,
-#endif
- MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
- MSR_TSC_AUX,
-};
-
-#define NR_HOST_SAVE_USER_MSRS ARRAY_SIZE(host_save_user_msrs)
-
-struct kvm_vcpu;
-
-struct nested_state {
- struct vmcb *hsave;
- u64 hsave_msr;
- u64 vm_cr_msr;
- u64 vmcb;
-
- /* These are the merged vectors */
- u32 *msrpm;
-
- /* gpa pointers to the real vectors */
- u64 vmcb_msrpm;
- u64 vmcb_iopm;
-
- /* A VMEXIT is required but not yet emulated */
- bool exit_required;
-
- /* cache for intercepts of the guest */
- u32 intercept_cr;
- u32 intercept_dr;
- u32 intercept_exceptions;
- u64 intercept;
-
- /* Nested Paging related state */
- u64 nested_cr3;
-};
-
-#define MSRPM_OFFSETS 16
-static u32 msrpm_offsets[MSRPM_OFFSETS] __read_mostly;
-
-/*
- * Set osvw_len to higher value when updated Revision Guides
- * are published and we know what the new status bits are
- */
-static uint64_t osvw_len = 4, osvw_status;
-
-struct vcpu_svm {
- struct kvm_vcpu vcpu;
- struct vmcb *vmcb;
- unsigned long vmcb_pa;
- struct svm_cpu_data *svm_data;
- uint64_t asid_generation;
- uint64_t sysenter_esp;
- uint64_t sysenter_eip;
- uint64_t tsc_aux;
-
- u64 next_rip;
-
- u64 host_user_msrs[NR_HOST_SAVE_USER_MSRS];
- struct {
- u16 fs;
- u16 gs;
- u16 ldt;
- u64 gs_base;
- } host;
+#include <asm/svm.h>
+#include <asm/vmx.h>
- u32 *msrpm;
-
- ulong nmi_iret_rip;
-
- struct nested_state nested;
-
- bool nmi_singlestep;
-
- unsigned int3_injected;
- unsigned long int3_rip;
- u32 apf_reason;
-
- /* cached guest cpuid flags for faster access */
- bool nrips_enabled : 1;
-
- u32 ldr_reg;
- struct page *avic_backing_page;
- u64 *avic_physical_id_cache;
- bool avic_is_running;
-
- /*
- * Per-vcpu list of struct amd_svm_iommu_ir:
- * This is used mainly to store interrupt remapping information used
- * when update the vcpu affinity. This avoids the need to scan for
- * IRTE and try to match ga_tag in the IOMMU driver.
- */
- struct list_head ir_list;
- spinlock_t ir_list_lock;
-};
-
-/*
- * This is a wrapper of struct amd_iommu_ir_data.
- */
-struct amd_svm_iommu_ir {
- struct list_head node; /* Used by SVM for per-vcpu ir_list */
- void *data; /* Storing pointer to struct amd_ir_data */
-};
+#include <__asm.h>
+//seperate definitions to svm_def.h for asmgen
+#include "svm_def.h"
#define AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK (0xFF)
#define AVIC_LOGICAL_ID_ENTRY_VALID_MASK (1 << 31)
@@ -228,9 +42,6 @@ struct amd_svm_iommu_ir {
#define AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK (1ULL << 62)
#define AVIC_PHYSICAL_ID_ENTRY_VALID_MASK (1ULL << 63)
-static DEFINE_PER_CPU(u64, current_tsc_ratio);
-#define TSC_RATIO_DEFAULT 0x0100000000ULL
-
#define MSR_INVALID 0xffffffffU
static const struct svm_direct_access_msrs {
@@ -263,23 +74,18 @@ static bool npt_enabled;
/* allow nested paging (virtualized MMU) for all guests */
static int npt = true;
-module_param(npt, int, S_IRUGO);
-/* allow nested virtualization in KVM/SVM */
-static int nested = true;
-module_param(nested, int, S_IRUGO);
+/* allow nested virtualization in kvm/SVM */
+static int nested = false;
/* enable / disable AVIC */
static int avic;
-#ifdef CONFIG_X86_LOCAL_APIC
-module_param(avic, int, S_IRUGO);
-#endif
/* AVIC VM ID bit masks and lock */
static DECLARE_BITMAP(avic_vm_id_bitmap, AVIC_VM_ID_NR);
static DEFINE_SPINLOCK(avic_vm_id_lock);
-static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0);
+static void svm_set_cr0(struct kvm_vcpu *vcpu, size_t cr0);
static void svm_flush_tlb(struct kvm_vcpu *vcpu);
static void svm_complete_interrupts(struct vcpu_svm *svm);
@@ -345,11 +151,13 @@ static inline bool avic_vcpu_is_running(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
u64 *entry = svm->avic_physical_id_cache;
+ u64 temp;
if (!entry)
return false;
- return (READ_ONCE(*entry) & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK);
+ READ_ONCE(*entry, temp);
+ return temp & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
}
static void recalc_intercepts(struct vcpu_svm *svm)
@@ -489,8 +297,10 @@ static inline bool gif_set(struct vcpu_svm *svm)
return !!(svm->vcpu.arch.hflags & HF_GIF_MASK);
}
-static unsigned long iopm_base;
+static size_t iopm_base;
+static void *iopm_va;
+#pragma pack(push, 1)
struct kvm_ldttss_desc {
u16 limit0;
u16 base0;
@@ -498,7 +308,8 @@ struct kvm_ldttss_desc {
unsigned limit1:4, zero0:3, g:1, base2:8;
u32 base3;
u32 zero1;
-} __attribute__((packed));
+};
+#pragma pack(pop)
struct svm_cpu_data {
int cpu;
@@ -549,17 +360,17 @@ static u32 svm_msrpm_offset(u32 msr)
static inline void clgi(void)
{
- asm volatile (__ex(SVM_CLGI));
+ __svm_clgi();
}
static inline void stgi(void)
{
- asm volatile (__ex(SVM_STGI));
+ __svm_stgi();
}
-static inline void invlpga(unsigned long addr, u32 asid)
+static inline void invlpga(size_t addr, u32 asid)
{
- asm volatile (__ex(SVM_INVLPGA) : : "a"(addr), "c"(asid));
+ __svm_invlpga((void *)addr, asid);
}
static int get_npt_level(void)
@@ -593,7 +404,7 @@ static u32 svm_get_interrupt_shadow(struct kvm_vcpu *vcpu)
u32 ret = 0;
if (svm->vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK)
- ret = KVM_X86_SHADOW_INT_STI | KVM_X86_SHADOW_INT_MOV_SS;
+ ret = GVM_X86_SHADOW_INT_STI | GVM_X86_SHADOW_INT_MOV_SS;
return ret;
}
@@ -646,7 +457,7 @@ static void svm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
return;
if (nr == BP_VECTOR && !static_cpu_has(X86_FEATURE_NRIPS)) {
- unsigned long rip, old_rip = kvm_rip_read(&svm->vcpu);
+ size_t rip, old_rip = kvm_rip_read(&svm->vcpu);
/*
* For guest debugging where we have to reinject #BP if some
@@ -668,15 +479,28 @@ static void svm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
svm->vmcb->control.event_inj_err = error_code;
}
+/**
+* upper_32_bits - return bits 32-63 of a number
+* @n: the number we're accessing
+*
+* A basic shift-right of a 64- or 32-bit quantity. Use this to suppress
+* the "right shift count >= width of type" warning when that quantity is
+* 32-bits.
+*/
+#define upper_32_bits(n) ((u32)(((n) >> 16) >> 16))
+
+/**
+* lower_32_bits - return bits 0-31 of a number
+* @n: the number we're accessing
+*/
+#define lower_32_bits(n) ((u32)(n))
+
static void svm_init_erratum_383(void)
{
u32 low, high;
int err;
u64 val;
- if (!static_cpu_has_bug(X86_BUG_AMD_TLB_MMATCH))
- return;
-
/* Use _safe variants to not break nested virtualization */
val = native_read_msr_safe(MSR_AMD64_DC_CFG, &err);
if (err)
@@ -715,25 +539,23 @@ static void svm_init_osvw(struct kvm_vcpu *vcpu)
static int has_svm(void)
{
- const char *msg;
+ return static_cpu_has(X86_FEATURE_SVM);
+}
- if (!cpu_has_svm(&msg)) {
- printk(KERN_INFO "has_svm: %s\n", msg);
- return 0;
- }
+static inline void cpu_svm_disable(void)
+{
+ uint64_t efer;
- return 1;
+ wrmsrl(MSR_VM_HSAVE_PA, 0);
+ rdmsrl(MSR_EFER, efer);
+ wrmsrl(MSR_EFER, efer & ~EFER_SVME);
}
static void svm_hardware_disable(void)
{
- /* Make sure we clean up behind us */
- if (static_cpu_has(X86_FEATURE_TSCRATEMSR))
- wrmsrl(MSR_AMD64_TSC_RATIO, TSC_RATIO_DEFAULT);
-
cpu_svm_disable();
- amd_pmu_disable_virt();
+ //amd_pmu_disable_virt();
}
static int svm_hardware_enable(void)
@@ -771,11 +593,6 @@ static int svm_hardware_enable(void)
wrmsrl(MSR_VM_HSAVE_PA, page_to_pfn(sd->save_area) << PAGE_SHIFT);
- if (static_cpu_has(X86_FEATURE_TSCRATEMSR)) {
- wrmsrl(MSR_AMD64_TSC_RATIO, TSC_RATIO_DEFAULT);
- __this_cpu_write(current_tsc_ratio, TSC_RATIO_DEFAULT);
- }
-
/*
* Get OSVW bits.
@@ -808,7 +625,7 @@ static int svm_hardware_enable(void)
svm_init_erratum_383();
- amd_pmu_enable_virt();
+ //amd_pmu_enable_virt();
return 0;
}
@@ -864,7 +681,7 @@ static void set_msr_interception(u32 *msrpm, unsigned msr,
int read, int write)
{
u8 bit_read, bit_write;
- unsigned long tmp;
+ size_t tmp;
u32 offset;
/*
@@ -965,70 +782,17 @@ static void svm_disable_lbrv(struct vcpu_svm *svm)
set_msr_interception(msrpm, MSR_IA32_LASTINTTOIP, 0, 0);
}
-/* Note:
- * This hash table is used to map VM_ID to a struct kvm_arch,
- * when handling AMD IOMMU GALOG notification to schedule in
- * a particular vCPU.
- */
-#define SVM_VM_DATA_HASH_BITS 8
-DECLARE_HASHTABLE(svm_vm_data_hash, SVM_VM_DATA_HASH_BITS);
-static spinlock_t svm_vm_data_hash_lock;
-
-/* Note:
- * This function is called from IOMMU driver to notify
- * SVM to schedule in a particular vCPU of a particular VM.
- */
-static int avic_ga_log_notifier(u32 ga_tag)
-{
- unsigned long flags;
- struct kvm_arch *ka = NULL;
- struct kvm_vcpu *vcpu = NULL;
- u32 vm_id = AVIC_GATAG_TO_VMID(ga_tag);
- u32 vcpu_id = AVIC_GATAG_TO_VCPUID(ga_tag);
-
- pr_debug("SVM: %s: vm_id=%#x, vcpu_id=%#x\n", __func__, vm_id, vcpu_id);
-
- spin_lock_irqsave(&svm_vm_data_hash_lock, flags);
- hash_for_each_possible(svm_vm_data_hash, ka, hnode, vm_id) {
- struct kvm *kvm = container_of(ka, struct kvm, arch);
- struct kvm_arch *vm_data = &kvm->arch;
-
- if (vm_data->avic_vm_id != vm_id)
- continue;
- vcpu = kvm_get_vcpu_by_id(kvm, vcpu_id);
- break;
- }
- spin_unlock_irqrestore(&svm_vm_data_hash_lock, flags);
-
- if (!vcpu)
- return 0;
-
- /* Note:
- * At this point, the IOMMU should have already set the pending
- * bit in the vAPIC backing page. So, we just need to schedule
- * in the vcpu.
- */
- if (vcpu->mode == OUTSIDE_GUEST_MODE)
- kvm_vcpu_wake_up(vcpu);
-
- return 0;
-}
-
-static __init int svm_hardware_setup(void)
+static int svm_hardware_setup(void)
{
int cpu;
- struct page *iopm_pages;
- void *iopm_va;
int r;
+ PHYSICAL_ADDRESS max_phys = { .QuadPart = MAXULONG64 };
- iopm_pages = alloc_pages(GFP_KERNEL, IOPM_ALLOC_ORDER);
-
- if (!iopm_pages)
- return -ENOMEM;
-
- iopm_va = page_address(iopm_pages);
+ iopm_va = MmAllocateContiguousMemory(PAGE_SIZE * (1 << IOPM_ALLOC_ORDER), max_phys);
+ if (!iopm_va)
+ return ENOMEM;
memset(iopm_va, 0xff, PAGE_SIZE * (1 << IOPM_ALLOC_ORDER));
- iopm_base = page_to_pfn(iopm_pages) << PAGE_SHIFT;
+ iopm_base = MmGetPhysicalAddress(iopm_va).QuadPart;
init_msrpm_offsets();
@@ -1038,12 +802,6 @@ static __init int svm_hardware_setup(void)
if (boot_cpu_has(X86_FEATURE_FXSR_OPT))
kvm_enable_efer_bits(EFER_FFXSR);
- if (boot_cpu_has(X86_FEATURE_TSCRATEMSR)) {
- kvm_has_tsc_control = true;
- kvm_max_tsc_scaling_ratio = TSC_RATIO_MAX;
- kvm_tsc_scaling_ratio_frac_bits = 32;
- }
-
if (nested) {
printk(KERN_INFO "kvm: Nested Virtualization enabled\n");
kvm_enable_efer_bits(EFER_SVME | EFER_LMSLE);
@@ -1071,34 +829,31 @@ static __init int svm_hardware_setup(void)
if (avic) {
if (!npt_enabled ||
- !boot_cpu_has(X86_FEATURE_AVIC) ||
- !IS_ENABLED(CONFIG_X86_LOCAL_APIC)) {
+ !boot_cpu_has(X86_FEATURE_AVIC)) {
avic = false;
} else {
pr_info("AVIC enabled\n");
-
- hash_init(svm_vm_data_hash);
- spin_lock_init(&svm_vm_data_hash_lock);
- amd_iommu_register_ga_log_notifier(&avic_ga_log_notifier);
}
}
return 0;
err:
- __free_pages(iopm_pages, IOPM_ALLOC_ORDER);
+ MmFreeContiguousMemory(iopm_va);
+ iopm_va = NULL;
iopm_base = 0;
return r;
}
-static __exit void svm_hardware_unsetup(void)
+static void svm_hardware_unsetup(void)
{
int cpu;
for_each_possible_cpu(cpu)
svm_cpu_uninit(cpu);
- __free_pages(pfn_to_page(iopm_base >> PAGE_SHIFT), IOPM_ALLOC_ORDER);
+ MmFreeContiguousMemory(iopm_va);
+ iopm_va = NULL;
iopm_base = 0;
}
@@ -1128,10 +883,7 @@ static void svm_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
g_tsc_offset = svm->vmcb->control.tsc_offset -
svm->nested.hsave->control.tsc_offset;
svm->nested.hsave->control.tsc_offset = offset;
- } else
- trace_kvm_write_tsc_offset(vcpu->vcpu_id,
- svm->vmcb->control.tsc_offset,
- offset);
+ }
svm->vmcb->control.tsc_offset = offset + g_tsc_offset;
@@ -1159,7 +911,6 @@ static void init_vmcb(struct vcpu_svm *svm)
struct vmcb_control_area *control = &svm->vmcb->control;
struct vmcb_save_area *save = &svm->vmcb->save;
- svm->vcpu.fpu_active = 1;
svm->vcpu.arch.hflags = 0;
set_cr_intercept(svm, INTERCEPT_CR0_READ);
@@ -1260,11 +1011,6 @@ static void init_vmcb(struct vcpu_svm *svm)
svm->nested.vmcb = 0;
svm->vcpu.arch.hflags = 0;
- if (boot_cpu_has(X86_FEATURE_PAUSEFILTER)) {
- control->pause_filter_count = 3000;
- set_intercept(svm, INTERCEPT_PAUSE);
- }
-
if (avic)
avic_init_vmcb(svm);
@@ -1338,7 +1084,7 @@ static int avic_init_backing_page(struct kvm_vcpu *vcpu)
if (!entry)
return -EINVAL;
- new_entry = READ_ONCE(*entry);
+ READ_ONCE(*entry, new_entry);
new_entry = (page_to_phys(svm->avic_backing_page) &
AVIC_PHYSICAL_ID_ENTRY_BACKING_PAGE_MASK) |
AVIC_PHYSICAL_ID_ENTRY_VALID_MASK;
@@ -1379,7 +1125,6 @@ static inline int avic_free_vm_id(int id)
static void avic_vm_destroy(struct kvm *kvm)
{
- unsigned long flags;
struct kvm_arch *vm_data = &kvm->arch;
avic_free_vm_id(vm_data->avic_vm_id);
@@ -1388,15 +1133,10 @@ static void avic_vm_destroy(struct kvm *kvm)
__free_page(vm_data->avic_logical_id_table_page);
if (vm_data->avic_physical_id_table_page)
__free_page(vm_data->avic_physical_id_table_page);
-
- spin_lock_irqsave(&svm_vm_data_hash_lock, flags);
- hash_del(&vm_data->hnode);
- spin_unlock_irqrestore(&svm_vm_data_hash_lock, flags);
}
static int avic_vm_init(struct kvm *kvm)
{
- unsigned long flags;
int vm_id, err = -ENOMEM;
struct kvm_arch *vm_data = &kvm->arch;
struct page *p_page;
@@ -1426,10 +1166,6 @@ static int avic_vm_init(struct kvm *kvm)
vm_data->avic_logical_id_table_page = l_page;
clear_page(page_address(l_page));
- spin_lock_irqsave(&svm_vm_data_hash_lock, flags);
- hash_add(svm_vm_data_hash, &vm_data->hnode, vm_data->avic_vm_id);
- spin_unlock_irqrestore(&svm_vm_data_hash_lock, flags);
-
return 0;
free_avic:
@@ -1437,36 +1173,6 @@ free_avic:
return err;
}
-static inline int
-avic_update_iommu_vcpu_affinity(struct kvm_vcpu *vcpu, int cpu, bool r)
-{
- int ret = 0;
- unsigned long flags;
- struct amd_svm_iommu_ir *ir;
- struct vcpu_svm *svm = to_svm(vcpu);
-
- if (!kvm_arch_has_assigned_device(vcpu->kvm))
- return 0;
-
- /*
- * Here, we go through the per-vcpu ir_list to update all existing
- * interrupt remapping table entry targeting this vcpu.
- */
- spin_lock_irqsave(&svm->ir_list_lock, flags);
-
- if (list_empty(&svm->ir_list))
- goto out;
-
- list_for_each_entry(ir, &svm->ir_list, node) {
- ret = amd_iommu_update_ga(cpu, r, ir->data);
- if (ret)
- break;
- }
-out:
- spin_unlock_irqrestore(&svm->ir_list_lock, flags);
- return ret;
-}
-
static void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{
u64 entry;
@@ -1480,7 +1186,7 @@ static void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
if (WARN_ON(h_physical_id >= AVIC_MAX_PHYSICAL_ID_COUNT))
return;
- entry = READ_ONCE(*(svm->avic_physical_id_cache));
+ READ_ONCE(*(svm->avic_physical_id_cache), entry);
WARN_ON(entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK);
entry &= ~AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK;
@@ -1491,8 +1197,6 @@ static void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
entry |= AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
WRITE_ONCE(*(svm->avic_physical_id_cache), entry);
- avic_update_iommu_vcpu_affinity(vcpu, h_physical_id,
- svm->avic_is_running);
}
static void avic_vcpu_put(struct kvm_vcpu *vcpu)
@@ -1503,10 +1207,7 @@ static void avic_vcpu_put(struct kvm_vcpu *vcpu)
if (!kvm_vcpu_apicv_active(vcpu))
return;
- entry = READ_ONCE(*(svm->avic_physical_id_cache));
- if (entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK)
- avic_update_iommu_vcpu_affinity(vcpu, -1, 0);
-
+ READ_ONCE(*(svm->avic_physical_id_cache), entry);
entry &= ~AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
WRITE_ONCE(*(svm->avic_physical_id_cache), entry);
}
@@ -1550,12 +1251,13 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
{
struct vcpu_svm *svm;
struct page *page;
- struct page *msrpm_pages;
struct page *hsave_page;
- struct page *nested_msrpm_pages;
+ void *msrpm_va;
+ void *nested_msrpm_va;
int err;
+ PHYSICAL_ADDRESS max_phys = { .QuadPart = MAXULONG64 };
- svm = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
+ svm = kzalloc_fast(sizeof(struct vcpu_svm), GFP_KERNEL);
if (!svm) {
err = -ENOMEM;
goto out;
@@ -1570,12 +1272,12 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
if (!page)
goto uninit;
- msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER);
- if (!msrpm_pages)
+ msrpm_va = MmAllocateContiguousMemory(PAGE_SIZE * (1 << MSRPM_ALLOC_ORDER), max_phys);
+ if (!msrpm_va)
goto free_page1;
- nested_msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER);
- if (!nested_msrpm_pages)
+ nested_msrpm_va = MmAllocateContiguousMemory(PAGE_SIZE * (1 << MSRPM_ALLOC_ORDER), max_phys);
+ if (!nested_msrpm_va)
goto free_page2;
hsave_page = alloc_page(GFP_KERNEL);
@@ -1586,9 +1288,6 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
err = avic_init_backing_page(&svm->vcpu);
if (err)
goto free_page4;
-
- INIT_LIST_HEAD(&svm->ir_list);
- spin_lock_init(&svm->ir_list_lock);
}
/* We initialize this flag to true to make sure that the is_running
@@ -1598,10 +1297,10 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
svm->nested.hsave = page_address(hsave_page);
- svm->msrpm = page_address(msrpm_pages);
+ svm->msrpm = msrpm_va;
svm_vcpu_init_msrpm(svm->msrpm);
- svm->nested.msrpm = page_address(nested_msrpm_pages);
+ svm->nested.msrpm = nested_msrpm_va;
svm_vcpu_init_msrpm(svm->nested.msrpm);
svm->vmcb = page_address(page);
@@ -1617,15 +1316,15 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
free_page4:
__free_page(hsave_page);
free_page3:
- __free_pages(nested_msrpm_pages, MSRPM_ALLOC_ORDER);
+ MmFreeContiguousMemory(nested_msrpm_va);
free_page2:
- __free_pages(msrpm_pages, MSRPM_ALLOC_ORDER);
+ MmFreeContiguousMemory(msrpm_va);
free_page1:
__free_page(page);
uninit:
kvm_vcpu_uninit(&svm->vcpu);
free_svm:
- kmem_cache_free(kvm_vcpu_cache, svm);
+ kfree(svm);
out:
return ERR_PTR(err);
}
@@ -1635,16 +1334,27 @@ static void svm_free_vcpu(struct kvm_vcpu *vcpu)
struct vcpu_svm *svm = to_svm(vcpu);
__free_page(pfn_to_page(svm->vmcb_pa >> PAGE_SHIFT));
- __free_pages(virt_to_page(svm->msrpm), MSRPM_ALLOC_ORDER);
+ MmFreeContiguousMemory(svm->msrpm);
__free_page(virt_to_page(svm->nested.hsave));
- __free_pages(virt_to_page(svm->nested.msrpm), MSRPM_ALLOC_ORDER);
+ MmFreeContiguousMemory(svm->nested.msrpm);
kvm_vcpu_uninit(vcpu);
- kmem_cache_free(kvm_vcpu_cache, svm);
+ kfree(svm);
}
static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{
+ avic_vcpu_load(vcpu, cpu);
+}
+
+static void svm_vcpu_put(struct kvm_vcpu *vcpu)
+{
+ avic_vcpu_put(vcpu);
+}
+
+static void svm_save_host_state(struct kvm_vcpu *vcpu)
+{
struct vcpu_svm *svm = to_svm(vcpu);
+ int cpu = smp_processor_id();
int i;
if (unlikely(cpu != vcpu->cpu)) {
@@ -1661,38 +1371,17 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++)
rdmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]);
-
- if (static_cpu_has(X86_FEATURE_TSCRATEMSR)) {
- u64 tsc_ratio = vcpu->arch.tsc_scaling_ratio;
- if (tsc_ratio != __this_cpu_read(current_tsc_ratio)) {
- __this_cpu_write(current_tsc_ratio, tsc_ratio);
- wrmsrl(MSR_AMD64_TSC_RATIO, tsc_ratio);
- }
- }
- /* This assumes that the kernel never uses MSR_TSC_AUX */
- if (static_cpu_has(X86_FEATURE_RDTSCP))
- wrmsrl(MSR_TSC_AUX, svm->tsc_aux);
-
- avic_vcpu_load(vcpu, cpu);
}
-static void svm_vcpu_put(struct kvm_vcpu *vcpu)
+static void svm_load_host_state(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
int i;
- avic_vcpu_put(vcpu);
-
- ++vcpu->stat.host_state_reload;
kvm_load_ldt(svm->host.ldt);
#ifdef CONFIG_X86_64
loadsegment(fs, svm->host.fs);
- wrmsrl(MSR_KERNEL_GS_BASE, current->thread.gsbase);
load_gs_index(svm->host.gs);
-#else
-#ifdef CONFIG_X86_32_LAZY_GS
- loadsegment(gs, svm->host.gs);
-#endif
#endif
for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++)
wrmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]);
@@ -1708,12 +1397,12 @@ static void svm_vcpu_unblocking(struct kvm_vcpu *vcpu)
avic_set_running(vcpu, true);
}
-static unsigned long svm_get_rflags(struct kvm_vcpu *vcpu)
+static size_t svm_get_rflags(struct kvm_vcpu *vcpu)
{
return to_svm(vcpu)->vmcb->save.rflags;
}
-static void svm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
+static void svm_set_rflags(struct kvm_vcpu *vcpu, size_t rflags)
{
/*
* Any change of EFLAGS.VM is accompanied by a reload of SS
@@ -1723,11 +1412,6 @@ static void svm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
to_svm(vcpu)->vmcb->save.rflags = rflags;
}
-static u32 svm_get_pkru(struct kvm_vcpu *vcpu)
-{
- return 0;
-}
-
static void svm_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg)
{
switch (reg) {
@@ -1765,7 +1449,6 @@ static struct vmcb_seg *svm_seg(struct kvm_vcpu *vcpu, int seg)
case VCPU_SREG_LDTR: return &save->ldtr;
}
BUG();
- return NULL;
}
static u64 svm_get_segment_base(struct kvm_vcpu *vcpu, int seg)
@@ -1796,7 +1479,7 @@ static void svm_get_segment(struct kvm_vcpu *vcpu,
* However, the SVM spec states that the G bit is not observed by the
* CPU, and some VMware virtual CPUs drop the G bit for all segments.
* So let's synthesize a legal G bit for all segments, this helps
- * running KVM nested. It also helps cross-vendor migration, because
+ * running kvm nested. It also helps cross-vendor migration, because
* Intel's vmentry has a check on the 'G' bit.
*/
var->g = s->limit > 0xfffff;
@@ -1901,15 +1584,12 @@ static void update_cr0_intercept(struct vcpu_svm *svm)
ulong gcr0 = svm->vcpu.arch.cr0;
u64 *hcr0 = &svm->vmcb->save.cr0;
- if (!svm->vcpu.fpu_active)
- *hcr0 |= SVM_CR0_SELECTIVE_MASK;
- else
- *hcr0 = (*hcr0 & ~SVM_CR0_SELECTIVE_MASK)
- | (gcr0 & SVM_CR0_SELECTIVE_MASK);
+ *hcr0 = (*hcr0 & ~SVM_CR0_SELECTIVE_MASK)
+ | (gcr0 & SVM_CR0_SELECTIVE_MASK);
mark_dirty(svm->vmcb, VMCB_CR);
- if (gcr0 == *hcr0 && svm->vcpu.fpu_active) {
+ if (gcr0 == *hcr0) {
clr_cr_intercept(svm, INTERCEPT_CR0_READ);
clr_cr_intercept(svm, INTERCEPT_CR0_WRITE);
} else {
@@ -1918,7 +1598,7 @@ static void update_cr0_intercept(struct vcpu_svm *svm)
}
}
-static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
+static void svm_set_cr0(struct kvm_vcpu *vcpu, size_t cr0)
{
struct vcpu_svm *svm = to_svm(vcpu);
@@ -1940,24 +1620,22 @@ static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
if (!npt_enabled)
cr0 |= X86_CR0_PG | X86_CR0_WP;
- if (!vcpu->fpu_active)
- cr0 |= X86_CR0_TS;
/*
* re-enable caching here because the QEMU bios
* does not do it - this results in some delay at
* reboot
*/
- if (kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_CD_NW_CLEARED))
+ if (kvm_check_has_quirk(vcpu->kvm, GVM_X86_QUIRK_CD_NW_CLEARED))
cr0 &= ~(X86_CR0_CD | X86_CR0_NW);
svm->vmcb->save.cr0 = cr0;
mark_dirty(svm->vmcb, VMCB_CR);
update_cr0_intercept(svm);
}
-static int svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
+static int svm_set_cr4(struct kvm_vcpu *vcpu, size_t cr4)
{
- unsigned long host_cr4_mce = cr4_read_shadow() & X86_CR4_MCE;
- unsigned long old_cr4 = to_svm(vcpu)->vmcb->save.cr4;
+ size_t host_cr4_mce = cr4_read_shadow() & X86_CR4_MCE;
+ size_t old_cr4 = to_svm(vcpu)->vmcb->save.cr4;
if (cr4 & X86_CR4_VMXE)
return 1;
@@ -2014,8 +1692,8 @@ static void update_bp_intercept(struct kvm_vcpu *vcpu)
clr_exception_intercept(svm, BP_VECTOR);
- if (vcpu->guest_debug & KVM_GUESTDBG_ENABLE) {
- if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP)
+ if (vcpu->guest_debug & GVM_GUESTDBG_ENABLE) {
+ if (vcpu->guest_debug & GVM_GUESTDBG_USE_SW_BP)
set_exception_intercept(svm, BP_VECTOR);
} else
vcpu->guest_debug = 0;
@@ -2040,7 +1718,7 @@ static u64 svm_get_dr6(struct kvm_vcpu *vcpu)
return to_svm(vcpu)->vmcb->save.dr6;
}
-static void svm_set_dr6(struct kvm_vcpu *vcpu, unsigned long value)
+static void svm_set_dr6(struct kvm_vcpu *vcpu, size_t value)
{
struct vcpu_svm *svm = to_svm(vcpu);
@@ -2059,11 +1737,11 @@ static void svm_sync_dirty_debug_regs(struct kvm_vcpu *vcpu)
vcpu->arch.dr6 = svm_get_dr6(vcpu);
vcpu->arch.dr7 = svm->vmcb->save.dr7;
- vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_WONT_EXIT;
+ vcpu->arch.switch_db_regs &= ~GVM_DEBUGREG_WONT_EXIT;
set_dr_intercepts(svm);
}
-static void svm_set_dr7(struct kvm_vcpu *vcpu, unsigned long value)
+static void svm_set_dr7(struct kvm_vcpu *vcpu, size_t value)
{
struct vcpu_svm *svm = to_svm(vcpu);
@@ -2077,30 +1755,14 @@ static int pf_interception(struct vcpu_svm *svm)
u32 error_code;
int r = 1;
- switch (svm->apf_reason) {
- default:
- error_code = svm->vmcb->control.exit_info_1;
-
- trace_kvm_page_fault(fault_address, error_code);
- if (!npt_enabled && kvm_event_needs_reinjection(&svm->vcpu))
- kvm_mmu_unprotect_page_virt(&svm->vcpu, fault_address);
- r = kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code,
- svm->vmcb->control.insn_bytes,
- svm->vmcb->control.insn_len);
- break;
- case KVM_PV_REASON_PAGE_NOT_PRESENT:
- svm->apf_reason = 0;
- local_irq_disable();
- kvm_async_pf_task_wait(fault_address);
- local_irq_enable();
- break;
- case KVM_PV_REASON_PAGE_READY:
- svm->apf_reason = 0;
- local_irq_disable();
- kvm_async_pf_task_wake(fault_address);
- local_irq_enable();
- break;
- }
+ error_code = svm->vmcb->control.exit_info_1;
+
+ if (!npt_enabled && kvm_event_needs_reinjection(&svm->vcpu))
+ kvm_mmu_unprotect_page_virt(&svm->vcpu, fault_address);
+ r = kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code,
+ svm->vmcb->control.insn_bytes,
+ svm->vmcb->control.insn_len);
+
return r;
}
@@ -2109,7 +1771,7 @@ static int db_interception(struct vcpu_svm *svm)
struct kvm_run *kvm_run = svm->vcpu.run;
if (!(svm->vcpu.guest_debug &
- (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) &&
+ (GVM_GUESTDBG_SINGLESTEP | GVM_GUESTDBG_USE_HW_BP)) &&
!svm->nmi_singlestep) {
kvm_queue_exception(&svm->vcpu, DB_VECTOR);
return 1;
@@ -2117,14 +1779,14 @@ static int db_interception(struct vcpu_svm *svm)
if (svm->nmi_singlestep) {
svm->nmi_singlestep = false;
- if (!(svm->vcpu.guest_debug & KVM_GUESTDBG_SINGLESTEP))
+ if (!(svm->vcpu.guest_debug & GVM_GUESTDBG_SINGLESTEP))
svm->vmcb->save.rflags &=
~(X86_EFLAGS_TF | X86_EFLAGS_RF);
}
if (svm->vcpu.guest_debug &
- (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) {
- kvm_run->exit_reason = KVM_EXIT_DEBUG;
+ (GVM_GUESTDBG_SINGLESTEP | GVM_GUESTDBG_USE_HW_BP)) {
+ kvm_run->exit_reason = GVM_EXIT_DEBUG;
kvm_run->debug.arch.pc =
svm->vmcb->save.cs.base + svm->vmcb->save.rip;
kvm_run->debug.arch.exception = DB_VECTOR;
@@ -2138,7 +1800,7 @@ static int bp_interception(struct vcpu_svm *svm)
{
struct kvm_run *kvm_run = svm->vcpu.run;
- kvm_run->exit_reason = KVM_EXIT_DEBUG;
+ kvm_run->exit_reason = GVM_EXIT_DEBUG;
kvm_run->debug.arch.pc = svm->vmcb->save.cs.base + svm->vmcb->save.rip;
kvm_run->debug.arch.exception = BP_VECTOR;
return 0;
@@ -2160,22 +1822,6 @@ static int ac_interception(struct vcpu_svm *svm)
return 1;
}
-static void svm_fpu_activate(struct kvm_vcpu *vcpu)
-{
- struct vcpu_svm *svm = to_svm(vcpu);
-
- clr_exception_intercept(svm, NM_VECTOR);
-
- svm->vcpu.fpu_active = 1;
- update_cr0_intercept(svm);
-}
-
-static int nm_interception(struct vcpu_svm *svm)
-{
- svm_fpu_activate(&svm->vcpu);
- return 1;
-}
-
static bool is_erratum_383(void)
{
int err, i;
@@ -2210,7 +1856,7 @@ static bool is_erratum_383(void)
}
/* Flush tlb to evict multi-match entries */
- __flush_tlb_all();
+ //__flush_tlb_all();
return true;
}
@@ -2222,9 +1868,9 @@ static void svm_handle_mce(struct vcpu_svm *svm)
* Erratum 383 triggered. Guest state is corrupt so kill the
* guest.
*/
- pr_err("KVM: Guest triggered AMD Erratum 383\n");
+ pr_err("kvm: Guest triggered AMD Erratum 383\n");
- kvm_make_request(KVM_REQ_TRIPLE_FAULT, &svm->vcpu);
+ kvm_make_request(GVM_REQ_TRIPLE_FAULT, &svm->vcpu);
return;
}
@@ -2233,8 +1879,7 @@ static void svm_handle_mce(struct vcpu_svm *svm)
* On an #MC intercept the MCE handler is not called automatically in
* the host. So do it by hand here.
*/
- asm volatile (
- "int $0x12\n");
+ __int12();
/* not sure if we ever come back to this point */
return;
@@ -2256,7 +1901,7 @@ static int shutdown_interception(struct vcpu_svm *svm)
clear_page(svm->vmcb);
init_vmcb(svm);
- kvm_run->exit_reason = KVM_EXIT_SHUTDOWN;
+ kvm_run->exit_reason = GVM_EXIT_SHUTDOWN;
return 0;
}
@@ -2303,13 +1948,7 @@ static int halt_interception(struct vcpu_svm *svm)
return kvm_emulate_halt(&svm->vcpu);
}
-static int vmmcall_interception(struct vcpu_svm *svm)
-{
- svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
- return kvm_emulate_hypercall(&svm->vcpu);
-}
-
-static unsigned long nested_svm_get_tdp_cr3(struct kvm_vcpu *vcpu)
+static size_t nested_svm_get_tdp_cr3(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
@@ -2331,7 +1970,7 @@ static u64 nested_svm_get_tdp_pdptr(struct kvm_vcpu *vcpu, int index)
}
static void nested_svm_set_tdp_cr3(struct kvm_vcpu *vcpu,
- unsigned long root)
+ size_t root)
{
struct vcpu_svm *svm = to_svm(vcpu);
@@ -2455,7 +2094,6 @@ static inline bool nested_svm_intr(struct vcpu_svm *svm)
* the #vmexit here.
*/
svm->nested.exit_required = true;
- trace_kvm_nested_intr_vmexit(svm->vmcb->save.rip);
return false;
}
@@ -2477,30 +2115,44 @@ static inline bool nested_svm_nmi(struct vcpu_svm *svm)
return false;
}
-static void *nested_svm_map(struct vcpu_svm *svm, u64 gpa, struct page **_page)
+static void *nested_svm_map(struct vcpu_svm *svm, u64 gpa, PMDL *_mdl)
{
- struct page *page;
+ size_t hva;
+ PMDL mdl;
+ void *ret;
might_sleep();
- page = kvm_vcpu_gfn_to_page(&svm->vcpu, gpa >> PAGE_SHIFT);
- if (is_error_page(page))
+ hva = gfn_to_hva(svm->vcpu.kvm, gpa >> PAGE_SHIFT);
+ if (kvm_is_error_hva(hva))
goto error;
- *_page = page;
+ mdl = IoAllocateMdl((void *)hva, PAGE_SIZE, FALSE, FALSE, NULL);
+ if (!mdl)
+ goto error;
+
+ MmProbeAndLockPages(mdl, KernelMode, IoWriteAccess);
- return kmap(page);
+ ret = kmap(mdl);
+ if (!ret)
+ goto error1;
+ *_mdl = mdl;
+ return ret;
+
+error1:
+ MmUnlockPages(mdl);
+ IoFreeMdl(mdl);
error:
kvm_inject_gp(&svm->vcpu, 0);
return NULL;
}
-static void nested_svm_unmap(struct page *page)
+static void nested_svm_unmap(PMDL mdl)
{
- kunmap(page);
- kvm_release_page_dirty(page);
+ kunmap(mdl);
+ kvm_release_page(mdl);
}
static int nested_svm_intercept_ioio(struct vcpu_svm *svm)
@@ -2569,12 +2221,9 @@ static int nested_svm_exit_special(struct vcpu_svm *svm)
break;
case SVM_EXIT_EXCP_BASE + PF_VECTOR:
/* When we're shadowing, trap PFs, but not async PF */
- if (!npt_enabled && svm->apf_reason == 0)
+ if (!npt_enabled)
return NESTED_EXIT_HOST;
break;
- case SVM_EXIT_EXCP_BASE + NM_VECTOR:
- nm_interception(svm);
- break;
default:
break;
}
@@ -2597,26 +2246,77 @@ static int nested_svm_intercept(struct vcpu_svm *svm)
case SVM_EXIT_IOIO:
vmexit = nested_svm_intercept_ioio(svm);
break;
- case SVM_EXIT_READ_CR0 ... SVM_EXIT_WRITE_CR8: {
+ case SVM_EXIT_READ_CR0:
+ case SVM_EXIT_READ_CR2:
+ case SVM_EXIT_READ_CR3:
+ case SVM_EXIT_READ_CR4:
+ case SVM_EXIT_READ_CR8:
+ case SVM_EXIT_WRITE_CR0:
+ case SVM_EXIT_WRITE_CR2:
+ case SVM_EXIT_WRITE_CR3:
+ case SVM_EXIT_WRITE_CR4:
+ case SVM_EXIT_WRITE_CR8: {
u32 bit = 1U << (exit_code - SVM_EXIT_READ_CR0);
if (svm->nested.intercept_cr & bit)
vmexit = NESTED_EXIT_DONE;
break;
}
- case SVM_EXIT_READ_DR0 ... SVM_EXIT_WRITE_DR7: {
+ case SVM_EXIT_READ_DR0:
+ case SVM_EXIT_READ_DR1:
+ case SVM_EXIT_READ_DR2:
+ case SVM_EXIT_READ_DR3:
+ case SVM_EXIT_READ_DR4:
+ case SVM_EXIT_READ_DR5:
+ case SVM_EXIT_READ_DR6:
+ case SVM_EXIT_READ_DR7:
+ case SVM_EXIT_WRITE_DR0:
+ case SVM_EXIT_WRITE_DR1:
+ case SVM_EXIT_WRITE_DR2:
+ case SVM_EXIT_WRITE_DR3:
+ case SVM_EXIT_WRITE_DR4:
+ case SVM_EXIT_WRITE_DR5:
+ case SVM_EXIT_WRITE_DR6:
+ case SVM_EXIT_WRITE_DR7: {
u32 bit = 1U << (exit_code - SVM_EXIT_READ_DR0);
if (svm->nested.intercept_dr & bit)
vmexit = NESTED_EXIT_DONE;
break;
}
- case SVM_EXIT_EXCP_BASE ... SVM_EXIT_EXCP_BASE + 0x1f: {
+ case SVM_EXIT_EXCP_BASE:
+ case SVM_EXIT_EXCP_BASE + 0x1:
+ case SVM_EXIT_EXCP_BASE + 0x2:
+ case SVM_EXIT_EXCP_BASE + 0x3:
+ case SVM_EXIT_EXCP_BASE + 0x4:
+ case SVM_EXIT_EXCP_BASE + 0x5:
+ case SVM_EXIT_EXCP_BASE + 0x6:
+ case SVM_EXIT_EXCP_BASE + 0x7:
+ case SVM_EXIT_EXCP_BASE + 0x8:
+ case SVM_EXIT_EXCP_BASE + 0x9:
+ case SVM_EXIT_EXCP_BASE + 0xa:
+ case SVM_EXIT_EXCP_BASE + 0xb:
+ case SVM_EXIT_EXCP_BASE + 0xc:
+ case SVM_EXIT_EXCP_BASE + 0xd:
+ case SVM_EXIT_EXCP_BASE + 0xe:
+ case SVM_EXIT_EXCP_BASE + 0xf:
+ case SVM_EXIT_EXCP_BASE + 0x10:
+ case SVM_EXIT_EXCP_BASE + 0x11:
+ case SVM_EXIT_EXCP_BASE + 0x12:
+ case SVM_EXIT_EXCP_BASE + 0x13:
+ case SVM_EXIT_EXCP_BASE + 0x14:
+ case SVM_EXIT_EXCP_BASE + 0x15:
+ case SVM_EXIT_EXCP_BASE + 0x16:
+ case SVM_EXIT_EXCP_BASE + 0x17:
+ case SVM_EXIT_EXCP_BASE + 0x18:
+ case SVM_EXIT_EXCP_BASE + 0x19:
+ case SVM_EXIT_EXCP_BASE + 0x1a:
+ case SVM_EXIT_EXCP_BASE + 0x1b:
+ case SVM_EXIT_EXCP_BASE + 0x1c:
+ case SVM_EXIT_EXCP_BASE + 0x1d:
+ case SVM_EXIT_EXCP_BASE + 0x1e:
+ case SVM_EXIT_EXCP_BASE + 0x1f: {
u32 excp_bits = 1 << (exit_code - SVM_EXIT_EXCP_BASE);
if (svm->nested.intercept_exceptions & excp_bits)
vmexit = NESTED_EXIT_DONE;
- /* async page fault always cause vmexit */
- else if ((exit_code == SVM_EXIT_EXCP_BASE + PF_VECTOR) &&
- svm->apf_reason != 0)
- vmexit = NESTED_EXIT_DONE;
break;
}
case SVM_EXIT_ERR: {
@@ -2680,16 +2380,9 @@ static int nested_svm_vmexit(struct vcpu_svm *svm)
struct vmcb *nested_vmcb;
struct vmcb *hsave = svm->nested.hsave;
struct vmcb *vmcb = svm->vmcb;
- struct page *page;
+ PMDL kmap_mdl;
- trace_kvm_nested_vmexit_inject(vmcb->control.exit_code,
- vmcb->control.exit_info_1,
- vmcb->control.exit_info_2,
- vmcb->control.exit_int_info,
- vmcb->control.exit_int_info_err,
- KVM_ISA_SVM);
-
- nested_vmcb = nested_svm_map(svm, svm->nested.vmcb, &page);
+ nested_vmcb = nested_svm_map(svm, svm->nested.vmcb, &kmap_mdl);
if (!nested_vmcb)
return 1;
@@ -2789,7 +2482,7 @@ static int nested_svm_vmexit(struct vcpu_svm *svm)
mark_all_dirty(svm->vmcb);
- nested_svm_unmap(page);
+ nested_svm_unmap(kmap_mdl);
nested_svm_uninit_mmu_context(&svm->vcpu);
kvm_mmu_reset_context(&svm->vcpu);
@@ -2850,12 +2543,12 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm)
struct vmcb *nested_vmcb;
struct vmcb *hsave = svm->nested.hsave;
struct vmcb *vmcb = svm->vmcb;
- struct page *page;
+ PMDL kmap_mdl;
u64 vmcb_gpa;
vmcb_gpa = svm->vmcb->save.rax;
- nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, &page);
+ nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, &kmap_mdl);
if (!nested_vmcb)
return false;
@@ -2865,22 +2558,11 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm)
nested_vmcb->control.exit_info_1 = 0;
nested_vmcb->control.exit_info_2 = 0;
- nested_svm_unmap(page);
+ nested_svm_unmap(kmap_mdl);
return false;
}
- trace_kvm_nested_vmrun(svm->vmcb->save.rip, vmcb_gpa,
- nested_vmcb->save.rip,
- nested_vmcb->control.int_ctl,
- nested_vmcb->control.event_inj,
- nested_vmcb->control.nested_ctl);
-
- trace_kvm_nested_intercepts(nested_vmcb->control.intercept_cr & 0xffff,
- nested_vmcb->control.intercept_cr >> 16,
- nested_vmcb->control.intercept_exceptions,
- nested_vmcb->control.intercept);
-
/* Clear internal status */
kvm_clear_exception_queue(&svm->vcpu);
kvm_clear_interrupt_queue(&svm->vcpu);
@@ -2985,7 +2667,7 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm)
svm->vmcb->control.event_inj = nested_vmcb->control.event_inj;
svm->vmcb->control.event_inj_err = nested_vmcb->control.event_inj_err;
- nested_svm_unmap(page);
+ nested_svm_unmap(kmap_mdl);
/* Enter Guest-Mode */
enter_guest_mode(&svm->vcpu);
@@ -3024,12 +2706,12 @@ static void nested_svm_vmloadsave(struct vmcb *from_vmcb, struct vmcb *to_vmcb)
static int vmload_interception(struct vcpu_svm *svm)
{
struct vmcb *nested_vmcb;
- struct page *page;
+ PMDL kmap_mdl;
if (nested_svm_check_permissions(svm))
return 1;
- nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, &page);
+ nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, &kmap_mdl);
if (!nested_vmcb)
return 1;
@@ -3037,7 +2719,7 @@ static int vmload_interception(struct vcpu_svm *svm)
skip_emulated_instruction(&svm->vcpu);
nested_svm_vmloadsave(nested_vmcb, svm->vmcb);
- nested_svm_unmap(page);
+ nested_svm_unmap(kmap_mdl);
return 1;
}
@@ -3045,12 +2727,12 @@ static int vmload_interception(struct vcpu_svm *svm)
static int vmsave_interception(struct vcpu_svm *svm)
{
struct vmcb *nested_vmcb;
- struct page *page;
+ PMDL kmap_mdl;
if (nested_svm_check_permissions(svm))
return 1;
- nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, &page);
+ nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, &kmap_mdl);
if (!nested_vmcb)
return 1;
@@ -3058,7 +2740,7 @@ static int vmsave_interception(struct vcpu_svm *svm)
skip_emulated_instruction(&svm->vcpu);
nested_svm_vmloadsave(svm->vmcb, nested_vmcb);
- nested_svm_unmap(page);
+ nested_svm_unmap(kmap_mdl);
return 1;
}
@@ -3098,7 +2780,7 @@ static int stgi_interception(struct vcpu_svm *svm)
svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
skip_emulated_instruction(&svm->vcpu);
- kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
+ kvm_make_request(GVM_REQ_EVENT, &svm->vcpu);
enable_gif(svm);
@@ -3129,9 +2811,6 @@ static int invlpga_interception(struct vcpu_svm *svm)
{
struct kvm_vcpu *vcpu = &svm->vcpu;
- trace_kvm_invlpga(svm->vmcb->save.rip, kvm_register_read(&svm->vcpu, VCPU_REGS_RCX),
- kvm_register_read(&svm->vcpu, VCPU_REGS_RAX));
-
/* Let's treat INVLPGA the same as INVLPG (can be optimized!) */
kvm_mmu_invlpg(vcpu, kvm_register_read(&svm->vcpu, VCPU_REGS_RAX));
@@ -3142,8 +2821,6 @@ static int invlpga_interception(struct vcpu_svm *svm)
static int skinit_interception(struct vcpu_svm *svm)
{
- trace_kvm_skinit(svm->vmcb->save.rip, kvm_register_read(&svm->vcpu, VCPU_REGS_RAX));
-
kvm_queue_exception(&svm->vcpu, UD_VECTOR);
return 1;
}
@@ -3227,8 +2904,8 @@ static int task_switch_interception(struct vcpu_svm *svm)
if (kvm_task_switch(&svm->vcpu, tss_selector, int_vec, reason,
has_error_code, error_code) == EMULATE_FAIL) {
- svm->vcpu.run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
- svm->vcpu.run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
+ svm->vcpu.run->exit_reason = GVM_EXIT_INTERNAL_ERROR;
+ svm->vcpu.run->internal.suberror = GVM_INTERNAL_ERROR_EMULATION;
svm->vcpu.run->internal.ndata = 0;
return 0;
}
@@ -3248,7 +2925,7 @@ static int iret_interception(struct vcpu_svm *svm)
clr_intercept(svm, INTERCEPT_IRET);
svm->vcpu.arch.hflags |= HF_IRET_MASK;
svm->nmi_iret_rip = kvm_rip_read(&svm->vcpu);
- kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
+ kvm_make_request(GVM_REQ_EVENT, &svm->vcpu);
return 1;
}
@@ -3269,6 +2946,7 @@ static int emulate_on_interception(struct vcpu_svm *svm)
static int rdpmc_interception(struct vcpu_svm *svm)
{
+#if 0
int err;
if (!static_cpu_has(X86_FEATURE_NRIPS))
@@ -3276,14 +2954,15 @@ static int rdpmc_interception(struct vcpu_svm *svm)
err = kvm_rdpmc(&svm->vcpu);
kvm_complete_insn_gp(&svm->vcpu, err);
+#endif
return 1;
}
static bool check_selective_cr0_intercepted(struct vcpu_svm *svm,
- unsigned long val)
+ size_t val)
{
- unsigned long cr0 = svm->vcpu.arch.cr0;
+ size_t cr0 = svm->vcpu.arch.cr0;
bool ret = false;
u64 intercept;
@@ -3309,7 +2988,7 @@ static bool check_selective_cr0_intercepted(struct vcpu_svm *svm,
static int cr_interception(struct vcpu_svm *svm)
{
int reg, cr;
- unsigned long val;
+ size_t val;
int err;
if (!static_cpu_has(X86_FEATURE_DECODEASSISTS))
@@ -3346,7 +3025,7 @@ static int cr_interception(struct vcpu_svm *svm)
err = kvm_set_cr8(&svm->vcpu, val);
break;
default:
- WARN(1, "unhandled write to CR%d", cr);
+ //WARN(1, "unhandled write to CR%d", cr);
kvm_queue_exception(&svm->vcpu, UD_VECTOR);
return 1;
}
@@ -3368,7 +3047,7 @@ static int cr_interception(struct vcpu_svm *svm)
val = kvm_get_cr8(&svm->vcpu);
break;
default:
- WARN(1, "unhandled read from CR%d", cr);
+ //WARN(1, "unhandled read from CR%d", cr);
kvm_queue_exception(&svm->vcpu, UD_VECTOR);
return 1;
}
@@ -3382,7 +3061,7 @@ static int cr_interception(struct vcpu_svm *svm)
static int dr_interception(struct vcpu_svm *svm)
{
int reg, dr;
- unsigned long val;
+ size_t val;
if (svm->vcpu.guest_debug == 0) {
/*
@@ -3391,7 +3070,7 @@ static int dr_interception(struct vcpu_svm *svm)
* retrieve the full state of the debug registers.
*/
clr_dr_intercepts(svm);
- svm->vcpu.arch.switch_db_regs |= KVM_DEBUGREG_WONT_EXIT;
+ svm->vcpu.arch.switch_db_regs |= GVM_DEBUGREG_WONT_EXIT;
return 1;
}
@@ -3430,7 +3109,7 @@ static int cr8_write_interception(struct vcpu_svm *svm)
return r;
if (cr8_prev <= kvm_get_cr8(&svm->vcpu))
return r;
- kvm_run->exit_reason = KVM_EXIT_SET_TPR;
+ kvm_run->exit_reason = GVM_EXIT_SET_TPR;
return 0;
}
@@ -3440,9 +3119,7 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
switch (msr_info->index) {
case MSR_IA32_TSC: {
- msr_info->data = svm->vmcb->control.tsc_offset +
- kvm_scale_tsc(vcpu, rdtsc());
-
+ msr_info->data = svm->vmcb->control.tsc_offset + rdtsc();
break;
}
case MSR_STAR:
@@ -3536,11 +3213,8 @@ static int rdmsr_interception(struct vcpu_svm *svm)
msr_info.index = ecx;
msr_info.host_initiated = false;
if (svm_get_msr(&svm->vcpu, &msr_info)) {
- trace_kvm_msr_read_ex(ecx);
kvm_inject_gp(&svm->vcpu, 0);
} else {
- trace_kvm_msr_read(ecx, msr_info.data);
-
kvm_register_write(&svm->vcpu, VCPU_REGS_RAX,
msr_info.data & 0xffffffff);
kvm_register_write(&svm->vcpu, VCPU_REGS_RDX,
@@ -3624,7 +3298,6 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
* svm_vcpu_put.
*/
svm->tsc_aux = data;
- wrmsrl(MSR_TSC_AUX, svm->tsc_aux);
break;
case MSR_IA32_DEBUGCTLMSR:
if (!boot_cpu_has(X86_FEATURE_LBRV)) {
@@ -3672,10 +3345,8 @@ static int wrmsr_interception(struct vcpu_svm *svm)
svm->next_rip = kvm_rip_read(&svm->vcpu) + 2;
if (kvm_set_msr(&svm->vcpu, &msr)) {
- trace_kvm_msr_write_ex(ecx, data);
kvm_inject_gp(&svm->vcpu, 0);
} else {
- trace_kvm_msr_write(ecx, data);
skip_emulated_instruction(&svm->vcpu);
}
return 1;
@@ -3691,7 +3362,7 @@ static int msr_interception(struct vcpu_svm *svm)
static int interrupt_window_interception(struct vcpu_svm *svm)
{
- kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
+ kvm_make_request(GVM_REQ_EVENT, &svm->vcpu);
svm_clear_vintr(svm);
svm->vmcb->control.int_ctl &= ~V_IRQ_MASK;
mark_dirty(svm->vmcb, VMCB_INTR);
@@ -3699,12 +3370,6 @@ static int interrupt_window_interception(struct vcpu_svm *svm)
return 1;
}
-static int pause_interception(struct vcpu_svm *svm)
-{
- kvm_vcpu_on_spin(&(svm->vcpu));
- return 1;
-}
-
static int nop_interception(struct vcpu_svm *svm)
{
skip_emulated_instruction(&(svm->vcpu));
@@ -3735,11 +3400,8 @@ static int avic_incomplete_ipi_interception(struct vcpu_svm *svm)
u32 icrh = svm->vmcb->control.exit_info_1 >> 32;
u32 icrl = svm->vmcb->control.exit_info_1;
u32 id = svm->vmcb->control.exit_info_2 >> 32;
- u32 index = svm->vmcb->control.exit_info_2 & 0xFF;
struct kvm_lapic *apic = svm->vcpu.arch.apic;
- trace_kvm_avic_incomplete_ipi(svm->vcpu.vcpu_id, icrh, icrl, id, index);
-
switch (id) {
case AVIC_IPI_FAILURE_INVALID_INT_TYPE:
/*
@@ -3760,7 +3422,6 @@ static int avic_incomplete_ipi_interception(struct vcpu_svm *svm)
int i;
struct kvm_vcpu *vcpu;
struct kvm *kvm = svm->vcpu.kvm;
- struct kvm_lapic *apic = svm->vcpu.arch.apic;
/*
* At this point, we expect that the AVIC HW has already
@@ -3769,9 +3430,9 @@ static int avic_incomplete_ipi_interception(struct vcpu_svm *svm)
*/
kvm_for_each_vcpu(i, vcpu, kvm) {
bool m = kvm_apic_match_dest(vcpu, apic,
- icrl & KVM_APIC_SHORT_MASK,
+ icrl & GVM_APIC_SHORT_MASK,
GET_APIC_DEST_FIELD(icrh),
- icrl & KVM_APIC_DEST_MASK);
+ icrl & GVM_APIC_DEST_MASK);
if (m && !avic_vcpu_is_running(vcpu))
kvm_vcpu_wake_up(vcpu);
@@ -3781,7 +3442,7 @@ static int avic_incomplete_ipi_interception(struct vcpu_svm *svm)
case AVIC_IPI_FAILURE_INVALID_TARGET:
break;
case AVIC_IPI_FAILURE_INVALID_BACKING_PAGE:
- WARN_ONCE(1, "Invalid backing page\n");
+ //WARN_ONCE(1, "Invalid backing page\n");
break;
default:
pr_err("Unknown IPI interception\n");
@@ -3830,7 +3491,7 @@ static int avic_ldr_write(struct kvm_vcpu *vcpu, u8 g_physical_id, u32 ldr,
if (!entry)
return -EINVAL;
- new_entry = READ_ONCE(*entry);
+ READ_ONCE(*entry, new_entry);
new_entry &= ~AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK;
new_entry |= (g_physical_id & AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK);
if (valid)
@@ -3975,17 +3636,10 @@ static int avic_unaccelerated_access_interception(struct vcpu_svm *svm)
int ret = 0;
u32 offset = svm->vmcb->control.exit_info_1 &
AVIC_UNACCEL_ACCESS_OFFSET_MASK;
- u32 vector = svm->vmcb->control.exit_info_2 &
- AVIC_UNACCEL_ACCESS_VECTOR_MASK;
- bool write = (svm->vmcb->control.exit_info_1 >> 32) &
- AVIC_UNACCEL_ACCESS_WRITE_MASK;
bool trap = is_avic_unaccelerated_access_trap(offset);
- trace_kvm_avic_unaccelerated_access(svm->vcpu.vcpu_id, offset,
- trap, write, vector);
if (trap) {
/* Handling Trap */
- WARN_ONCE(!write, "svm: Handling trap read.\n");
ret = avic_unaccel_trap_write(svm);
} else {
/* Handling Fault */
@@ -4025,7 +3679,6 @@ static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = {
[SVM_EXIT_EXCP_BASE + BP_VECTOR] = bp_interception,
[SVM_EXIT_EXCP_BASE + UD_VECTOR] = ud_interception,
[SVM_EXIT_EXCP_BASE + PF_VECTOR] = pf_interception,
- [SVM_EXIT_EXCP_BASE + NM_VECTOR] = nm_interception,
[SVM_EXIT_EXCP_BASE + MC_VECTOR] = mc_interception,
[SVM_EXIT_EXCP_BASE + AC_VECTOR] = ac_interception,
[SVM_EXIT_INTR] = intr_interception,
@@ -4037,7 +3690,6 @@ static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = {
[SVM_EXIT_CPUID] = cpuid_interception,
[SVM_EXIT_IRET] = iret_interception,
[SVM_EXIT_INVD] = emulate_on_interception,
- [SVM_EXIT_PAUSE] = pause_interception,
[SVM_EXIT_HLT] = halt_interception,
[SVM_EXIT_INVLPG] = invlpg_interception,
[SVM_EXIT_INVLPGA] = invlpga_interception,
@@ -4046,7 +3698,6 @@ static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = {
[SVM_EXIT_TASK_SWITCH] = task_switch_interception,
[SVM_EXIT_SHUTDOWN] = shutdown_interception,
[SVM_EXIT_VMRUN] = vmrun_interception,
- [SVM_EXIT_VMMCALL] = vmmcall_interception,
[SVM_EXIT_VMLOAD] = vmload_interception,
[SVM_EXIT_VMSAVE] = vmsave_interception,
[SVM_EXIT_STGI] = stgi_interception,
@@ -4185,8 +3836,6 @@ static int handle_exit(struct kvm_vcpu *vcpu)
struct kvm_run *kvm_run = vcpu->run;
u32 exit_code = svm->vmcb->control.exit_code;
- trace_kvm_exit(exit_code, vcpu, KVM_ISA_SVM);
-
if (!is_cr_intercept(svm, INTERCEPT_CR0_WRITE))
vcpu->arch.cr0 = svm->vmcb->save.cr0;
if (npt_enabled)
@@ -4202,13 +3851,6 @@ static int handle_exit(struct kvm_vcpu *vcpu)
if (is_guest_mode(vcpu)) {
int vmexit;
- trace_kvm_nested_vmexit(svm->vmcb->save.rip, exit_code,
- svm->vmcb->control.exit_info_1,
- svm->vmcb->control.exit_info_2,
- svm->vmcb->control.exit_int_info,
- svm->vmcb->control.exit_int_info_err,
- KVM_ISA_SVM);
-
vmexit = nested_svm_exit_special(svm);
if (vmexit == NESTED_EXIT_CONTINUE)
@@ -4221,10 +3863,10 @@ static int handle_exit(struct kvm_vcpu *vcpu)
svm_complete_interrupts(svm);
if (svm->vmcb->control.exit_code == SVM_EXIT_ERR) {
- kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY;
+ kvm_run->exit_reason = GVM_EXIT_FAIL_ENTRY;
kvm_run->fail_entry.hardware_entry_failure_reason
= svm->vmcb->control.exit_code;
- pr_err("KVM: FAILED VMRUN WITH VMCB:\n");
+ pr_err("kvm: FAILED VMRUN WITH VMCB:\n");
dump_vmcb(vcpu);
return 0;
}
@@ -4240,7 +3882,7 @@ static int handle_exit(struct kvm_vcpu *vcpu)
if (exit_code >= ARRAY_SIZE(svm_exit_handlers)
|| !svm_exit_handlers[exit_code]) {
- WARN_ONCE(1, "svm: unexpected exit reason 0x%x\n", exit_code);
+ //WARN_ONCE(1, "svm: unexpected exit reason 0x%x\n", exit_code);
kvm_queue_exception(vcpu, UD_VECTOR);
return 1;
}
@@ -4297,7 +3939,6 @@ static void svm_set_irq(struct kvm_vcpu *vcpu)
BUG_ON(!(gif_set(svm)));
- trace_kvm_inj_virq(vcpu->arch.interrupt.nr);
++vcpu->stat.irq_injections;
svm->vmcb->control.event_inj = vcpu->arch.interrupt.nr |
@@ -4362,11 +4003,6 @@ static void svm_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
return;
}
-static void svm_sync_pir_to_irr(struct kvm_vcpu *vcpu)
-{
- return;
-}
-
static void svm_deliver_avic_intr(struct kvm_vcpu *vcpu, int vec)
{
kvm_lapic_set_irr(vec, vcpu->arch.apic);
@@ -4379,209 +4015,6 @@ static void svm_deliver_avic_intr(struct kvm_vcpu *vcpu, int vec)
kvm_vcpu_wake_up(vcpu);
}
-static void svm_ir_list_del(struct vcpu_svm *svm, struct amd_iommu_pi_data *pi)
-{
- unsigned long flags;
- struct amd_svm_iommu_ir *cur;
-
- spin_lock_irqsave(&svm->ir_list_lock, flags);
- list_for_each_entry(cur, &svm->ir_list, node) {
- if (cur->data != pi->ir_data)
- continue;
- list_del(&cur->node);
- kfree(cur);
- break;
- }
- spin_unlock_irqrestore(&svm->ir_list_lock, flags);
-}
-
-static int svm_ir_list_add(struct vcpu_svm *svm, struct amd_iommu_pi_data *pi)
-{
- int ret = 0;
- unsigned long flags;
- struct amd_svm_iommu_ir *ir;
-
- /**
- * In some cases, the existing irte is updaed and re-set,
- * so we need to check here if it's already been * added
- * to the ir_list.
- */
- if (pi->ir_data && (pi->prev_ga_tag != 0)) {
- struct kvm *kvm = svm->vcpu.kvm;
- u32 vcpu_id = AVIC_GATAG_TO_VCPUID(pi->prev_ga_tag);
- struct kvm_vcpu *prev_vcpu = kvm_get_vcpu_by_id(kvm, vcpu_id);
- struct vcpu_svm *prev_svm;
-
- if (!prev_vcpu) {
- ret = -EINVAL;
- goto out;
- }
-
- prev_svm = to_svm(prev_vcpu);
- svm_ir_list_del(prev_svm, pi);
- }
-
- /**
- * Allocating new amd_iommu_pi_data, which will get
- * add to the per-vcpu ir_list.
- */
- ir = kzalloc(sizeof(struct amd_svm_iommu_ir), GFP_KERNEL);
- if (!ir) {
- ret = -ENOMEM;
- goto out;
- }
- ir->data = pi->ir_data;
-
- spin_lock_irqsave(&svm->ir_list_lock, flags);
- list_add(&ir->node, &svm->ir_list);
- spin_unlock_irqrestore(&svm->ir_list_lock, flags);
-out:
- return ret;
-}
-
-/**
- * Note:
- * The HW cannot support posting multicast/broadcast
- * interrupts to a vCPU. So, we still use legacy interrupt
- * remapping for these kind of interrupts.
- *
- * For lowest-priority interrupts, we only support
- * those with single CPU as the destination, e.g. user
- * configures the interrupts via /proc/irq or uses
- * irqbalance to make the interrupts single-CPU.
- */
-static int
-get_pi_vcpu_info(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e,
- struct vcpu_data *vcpu_info, struct vcpu_svm **svm)
-{
- struct kvm_lapic_irq irq;
- struct kvm_vcpu *vcpu = NULL;
-
- kvm_set_msi_irq(kvm, e, &irq);
-
- if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu)) {
- pr_debug("SVM: %s: use legacy intr remap mode for irq %u\n",
- __func__, irq.vector);
- return -1;
- }
-
- pr_debug("SVM: %s: use GA mode for irq %u\n", __func__,
- irq.vector);
- *svm = to_svm(vcpu);
- vcpu_info->pi_desc_addr = page_to_phys((*svm)->avic_backing_page);
- vcpu_info->vector = irq.vector;
-
- return 0;
-}
-
-/*
- * svm_update_pi_irte - set IRTE for Posted-Interrupts
- *
- * @kvm: kvm
- * @host_irq: host irq of the interrupt
- * @guest_irq: gsi of the interrupt
- * @set: set or unset PI
- * returns 0 on success, < 0 on failure
- */
-static int svm_update_pi_irte(struct kvm *kvm, unsigned int host_irq,
- uint32_t guest_irq, bool set)
-{
- struct kvm_kernel_irq_routing_entry *e;
- struct kvm_irq_routing_table *irq_rt;
- int idx, ret = -EINVAL;
-
- if (!kvm_arch_has_assigned_device(kvm) ||
- !irq_remapping_cap(IRQ_POSTING_CAP))
- return 0;
-
- pr_debug("SVM: %s: host_irq=%#x, guest_irq=%#x, set=%#x\n",
- __func__, host_irq, guest_irq, set);
-
- idx = srcu_read_lock(&kvm->irq_srcu);
- irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu);
- WARN_ON(guest_irq >= irq_rt->nr_rt_entries);
-
- hlist_for_each_entry(e, &irq_rt->map[guest_irq], link) {
- struct vcpu_data vcpu_info;
- struct vcpu_svm *svm = NULL;
-
- if (e->type != KVM_IRQ_ROUTING_MSI)
- continue;
-
- /**
- * Here, we setup with legacy mode in the following cases:
- * 1. When cannot target interrupt to a specific vcpu.
- * 2. Unsetting posted interrupt.
- * 3. APIC virtialization is disabled for the vcpu.
- */
- if (!get_pi_vcpu_info(kvm, e, &vcpu_info, &svm) && set &&
- kvm_vcpu_apicv_active(&svm->vcpu)) {
- struct amd_iommu_pi_data pi;
-
- /* Try to enable guest_mode in IRTE */
- pi.base = page_to_phys(svm->avic_backing_page) & AVIC_HPA_MASK;
- pi.ga_tag = AVIC_GATAG(kvm->arch.avic_vm_id,
- svm->vcpu.vcpu_id);
- pi.is_guest_mode = true;
- pi.vcpu_data = &vcpu_info;
- ret = irq_set_vcpu_affinity(host_irq, &pi);
-
- /**
- * Here, we successfully setting up vcpu affinity in
- * IOMMU guest mode. Now, we need to store the posted
- * interrupt information in a per-vcpu ir_list so that
- * we can reference to them directly when we update vcpu
- * scheduling information in IOMMU irte.
- */
- if (!ret && pi.is_guest_mode)
- svm_ir_list_add(svm, &pi);
- } else {
- /* Use legacy mode in IRTE */
- struct amd_iommu_pi_data pi;
-
- /**
- * Here, pi is used to:
- * - Tell IOMMU to use legacy mode for this interrupt.
- * - Retrieve ga_tag of prior interrupt remapping data.
- */
- pi.is_guest_mode = false;
- ret = irq_set_vcpu_affinity(host_irq, &pi);
-
- /**
- * Check if the posted interrupt was previously
- * setup with the guest_mode by checking if the ga_tag
- * was cached. If so, we need to clean up the per-vcpu
- * ir_list.
- */
- if (!ret && pi.prev_ga_tag) {
- int id = AVIC_GATAG_TO_VCPUID(pi.prev_ga_tag);
- struct kvm_vcpu *vcpu;
-
- vcpu = kvm_get_vcpu_by_id(kvm, id);
- if (vcpu)
- svm_ir_list_del(to_svm(vcpu), &pi);
- }
- }
-
- if (!ret && svm) {
- trace_kvm_pi_irte_update(svm->vcpu.vcpu_id,
- host_irq, e->gsi,
- vcpu_info.vector,
- vcpu_info.pi_desc_addr, set);
- }
-
- if (ret < 0) {
- pr_err("%s: failed to update PI IRTE\n", __func__);
- goto out;
- }
- }
-
- ret = 0;
-out:
- srcu_read_unlock(&kvm->irq_srcu, idx);
- return ret;
-}
-
static int svm_nmi_allowed(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
@@ -4682,10 +4115,6 @@ static void svm_flush_tlb(struct kvm_vcpu *vcpu)
svm->asid_generation--;
}
-static void svm_prepare_guest_switch(struct kvm_vcpu *vcpu)
-{
-}
-
static inline void sync_cr8_to_lapic(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
@@ -4729,7 +4158,7 @@ static void svm_complete_interrupts(struct vcpu_svm *svm)
if ((svm->vcpu.arch.hflags & HF_IRET_MASK)
&& kvm_rip_read(&svm->vcpu) != svm->nmi_iret_rip) {
svm->vcpu.arch.hflags &= ~(HF_NMI_MASK | HF_IRET_MASK);
- kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
+ kvm_make_request(GVM_REQ_EVENT, &svm->vcpu);
}
svm->vcpu.arch.nmi_injected = false;
@@ -4739,7 +4168,7 @@ static void svm_complete_interrupts(struct vcpu_svm *svm)
if (!(exitintinfo & SVM_EXITINTINFO_VALID))
return;
- kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
+ kvm_make_request(GVM_REQ_EVENT, &svm->vcpu);
vector = exitintinfo & SVM_EXITINTINFO_VEC_MASK;
type = exitintinfo & SVM_EXITINTINFO_TYPE_MASK;
@@ -4813,87 +4242,14 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
local_irq_enable();
- asm volatile (
- "push %%" _ASM_BP "; \n\t"
- "mov %c[rbx](%[svm]), %%" _ASM_BX " \n\t"
- "mov %c[rcx](%[svm]), %%" _ASM_CX " \n\t"
- "mov %c[rdx](%[svm]), %%" _ASM_DX " \n\t"
- "mov %c[rsi](%[svm]), %%" _ASM_SI " \n\t"
- "mov %c[rdi](%[svm]), %%" _ASM_DI " \n\t"
- "mov %c[rbp](%[svm]), %%" _ASM_BP " \n\t"
-#ifdef CONFIG_X86_64
- "mov %c[r8](%[svm]), %%r8 \n\t"
- "mov %c[r9](%[svm]), %%r9 \n\t"
- "mov %c[r10](%[svm]), %%r10 \n\t"
- "mov %c[r11](%[svm]), %%r11 \n\t"
- "mov %c[r12](%[svm]), %%r12 \n\t"
- "mov %c[r13](%[svm]), %%r13 \n\t"
- "mov %c[r14](%[svm]), %%r14 \n\t"
- "mov %c[r15](%[svm]), %%r15 \n\t"
-#endif
-
- /* Enter guest mode */
- "push %%" _ASM_AX " \n\t"
- "mov %c[vmcb](%[svm]), %%" _ASM_AX " \n\t"
- __ex(SVM_VMLOAD) "\n\t"
- __ex(SVM_VMRUN) "\n\t"
- __ex(SVM_VMSAVE) "\n\t"
- "pop %%" _ASM_AX " \n\t"
-
- /* Save guest registers, load host registers */
- "mov %%" _ASM_BX ", %c[rbx](%[svm]) \n\t"
- "mov %%" _ASM_CX ", %c[rcx](%[svm]) \n\t"
- "mov %%" _ASM_DX ", %c[rdx](%[svm]) \n\t"
- "mov %%" _ASM_SI ", %c[rsi](%[svm]) \n\t"
- "mov %%" _ASM_DI ", %c[rdi](%[svm]) \n\t"
- "mov %%" _ASM_BP ", %c[rbp](%[svm]) \n\t"
-#ifdef CONFIG_X86_64
- "mov %%r8, %c[r8](%[svm]) \n\t"
- "mov %%r9, %c[r9](%[svm]) \n\t"
- "mov %%r10, %c[r10](%[svm]) \n\t"
- "mov %%r11, %c[r11](%[svm]) \n\t"
- "mov %%r12, %c[r12](%[svm]) \n\t"
- "mov %%r13, %c[r13](%[svm]) \n\t"
- "mov %%r14, %c[r14](%[svm]) \n\t"
- "mov %%r15, %c[r15](%[svm]) \n\t"
-#endif
- "pop %%" _ASM_BP
- :
- : [svm]"a"(svm),
- [vmcb]"i"(offsetof(struct vcpu_svm, vmcb_pa)),
- [rbx]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RBX])),
- [rcx]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RCX])),
- [rdx]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RDX])),
- [rsi]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RSI])),
- [rdi]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RDI])),
- [rbp]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RBP]))
-#ifdef CONFIG_X86_64
- , [r8]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R8])),
- [r9]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R9])),
- [r10]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R10])),
- [r11]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R11])),
- [r12]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R12])),
- [r13]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R13])),
- [r14]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R14])),
- [r15]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R15]))
-#endif
- : "cc", "memory"
-#ifdef CONFIG_X86_64
- , "rbx", "rcx", "rdx", "rsi", "rdi"
- , "r8", "r9", "r10", "r11" , "r12", "r13", "r14", "r15"
-#else
- , "ebx", "ecx", "edx", "esi", "edi"
-#endif
- );
+ __asm_svm_vcpu_run(svm);
#ifdef CONFIG_X86_64
wrmsrl(MSR_GS_BASE, svm->host.gs_base);
#else
loadsegment(fs, svm->host.fs);
-#ifndef CONFIG_X86_32_LAZY_GS
loadsegment(gs, svm->host.gs);
#endif
-#endif
reload_tss(vcpu);
@@ -4920,10 +4276,6 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
svm->vmcb->control.tlb_ctl = TLB_CONTROL_DO_NOTHING;
- /* if exit due to PF check for async PF */
- if (svm->vmcb->control.exit_code == SVM_EXIT_EXCP_BASE + PF_VECTOR)
- svm->apf_reason = kvm_read_and_reset_pf_reason();
-
if (npt_enabled) {
vcpu->arch.regs_avail &= ~(1 << VCPU_EXREG_PDPTR);
vcpu->arch.regs_dirty &= ~(1 << VCPU_EXREG_PDPTR);
@@ -4940,7 +4292,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
mark_all_clean(svm->vmcb);
}
-static void svm_set_cr3(struct kvm_vcpu *vcpu, unsigned long root)
+static void svm_set_cr3(struct kvm_vcpu *vcpu, size_t root)
{
struct vcpu_svm *svm = to_svm(vcpu);
@@ -4949,7 +4301,7 @@ static void svm_set_cr3(struct kvm_vcpu *vcpu, unsigned long root)
svm_flush_tlb(vcpu);
}
-static void set_tdp_cr3(struct kvm_vcpu *vcpu, unsigned long root)
+static void set_tdp_cr3(struct kvm_vcpu *vcpu, size_t root)
{
struct vcpu_svm *svm = to_svm(vcpu);
@@ -4974,17 +4326,6 @@ static int is_disabled(void)
return 0;
}
-static void
-svm_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall)
-{
- /*
- * Patch in the VMMCALL instruction:
- */
- hypercall[0] = 0x0f;
- hypercall[1] = 0x01;
- hypercall[2] = 0xd9;
-}
-
static void svm_check_processor_compat(void *rtn)
{
*(int *)rtn = 0;
@@ -5008,7 +4349,7 @@ static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
static void svm_cpuid_update(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
- struct kvm_cpuid_entry2 *entry;
+ struct kvm_cpuid_entry *entry;
/* Update nrips enabled cache */
svm->nrips_enabled = !!guest_cpuid_has_nrips(&svm->vcpu);
@@ -5021,7 +4362,7 @@ static void svm_cpuid_update(struct kvm_vcpu *vcpu)
entry->ecx &= ~bit(X86_FEATURE_X2APIC);
}
-static void svm_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry)
+static void svm_set_supported_cpuid(u32 func, struct kvm_cpuid_entry *entry)
{
switch (func) {
case 0x1:
@@ -5059,7 +4400,7 @@ static int svm_get_lpage_level(void)
static bool svm_rdtscp_supported(void)
{
- return boot_cpu_has(X86_FEATURE_RDTSCP);
+ return false;
}
static bool svm_invpcid_supported(void)
@@ -5082,14 +4423,6 @@ static bool svm_has_wbinvd_exit(void)
return true;
}
-static void svm_fpu_deactivate(struct kvm_vcpu *vcpu)
-{
- struct vcpu_svm *svm = to_svm(vcpu);
-
- set_exception_intercept(svm, NM_VECTOR);
- update_cr0_intercept(svm);
-}
-
#define PRE_EX(exit) { .exit_code = (exit), \
.stage = X86_ICPT_PRE_EXCEPT, }
#define POST_EX(exit) { .exit_code = (exit), \
@@ -5176,7 +4509,7 @@ static int svm_check_intercept(struct kvm_vcpu *vcpu,
icpt_info.exit_code += info->modrm_reg;
break;
case SVM_EXIT_WRITE_CR0: {
- unsigned long cr0, val;
+ size_t cr0, val;
u64 intercept;
if (info->intercept == x86_intercept_cr_write)
@@ -5280,14 +4613,10 @@ static void svm_handle_external_intr(struct kvm_vcpu *vcpu)
* We must have an instruction with interrupts enabled, so
* the timer interrupt isn't delayed by the interrupt shadow.
*/
- asm("nop");
+ __nop();
local_irq_disable();
}
-static void svm_sched_in(struct kvm_vcpu *vcpu, int cpu)
-{
-}
-
static inline void avic_post_state_restore(struct kvm_vcpu *vcpu)
{
if (avic_handle_apic_id_update(vcpu) != 0)
@@ -5297,7 +4626,7 @@ static inline void avic_post_state_restore(struct kvm_vcpu *vcpu)
avic_handle_ldr_update(vcpu);
}
-static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
+static struct kvm_x86_ops svm_x86_ops = {
.cpu_has_kvm_support = has_svm,
.disabled_by_bios = is_disabled,
.hardware_setup = svm_hardware_setup,
@@ -5315,7 +4644,8 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
.vm_init = avic_vm_init,
.vm_destroy = avic_vm_destroy,
- .prepare_guest_switch = svm_prepare_guest_switch,
+ .save_host_state = svm_save_host_state,
+ .load_host_state = svm_load_host_state,
.vcpu_load = svm_vcpu_load,
.vcpu_put = svm_vcpu_put,
.vcpu_blocking = svm_vcpu_blocking,
@@ -5348,11 +4678,6 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
.get_rflags = svm_get_rflags,
.set_rflags = svm_set_rflags,
- .get_pkru = svm_get_pkru,
-
- .fpu_activate = svm_fpu_activate,
- .fpu_deactivate = svm_fpu_deactivate,
-
.tlb_flush = svm_flush_tlb,
.run = svm_vcpu_run,
@@ -5360,7 +4685,6 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
.skip_emulated_instruction = skip_emulated_instruction,
.set_interrupt_shadow = svm_set_interrupt_shadow,
.get_interrupt_shadow = svm_get_interrupt_shadow,
- .patch_hypercall = svm_patch_hypercall,
.set_irq = svm_set_irq,
.set_nmi = svm_inject_nmi,
.queue_exception = svm_queue_exception,
@@ -5376,7 +4700,6 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
.get_enable_apicv = svm_get_enable_apicv,
.refresh_apicv_exec_ctrl = svm_refresh_apicv_exec_ctrl,
.load_eoi_exitmap = svm_load_eoi_exitmap,
- .sync_pir_to_irr = svm_sync_pir_to_irr,
.hwapic_irr_update = svm_hwapic_irr_update,
.hwapic_isr_update = svm_hwapic_isr_update,
.apicv_post_state_restore = avic_post_state_restore,
@@ -5407,23 +4730,15 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
.check_intercept = svm_check_intercept,
.handle_external_intr = svm_handle_external_intr,
- .sched_in = svm_sched_in,
-
- .pmu_ops = &amd_pmu_ops,
.deliver_posted_interrupt = svm_deliver_avic_intr,
- .update_pi_irte = svm_update_pi_irte,
};
-static int __init svm_init(void)
+int svm_init(void)
{
- return kvm_init(&svm_x86_ops, sizeof(struct vcpu_svm),
- __alignof__(struct vcpu_svm), THIS_MODULE);
+ return kvm_init(&svm_x86_ops, sizeof(struct vcpu_svm), 0);
}
-static void __exit svm_exit(void)
+void svm_exit(void)
{
kvm_exit();
}
-
-module_init(svm_init)
-module_exit(svm_exit)
diff --git a/arch/x86/kvm/svm_def.h b/arch/x86/kvm/svm_def.h
new file mode 100755
index 0000000..2b5ce8e
--- /dev/null
+++ b/arch/x86/kvm/svm_def.h
@@ -0,0 +1,176 @@
+/*
+ * Kernel-based Virtual Machine driver for Linux
+ *
+ * AMD SVM support
+ *
+ * Copyright (C) 2006 Qumranet, Inc.
+ * Copyright 2010 Red Hat, Inc. and/or its affiliates.
+ * Copyright 2019 Google LLC
+ *
+ * Authors:
+ * Yaniv Kamay <yaniv@qumranet.com>
+ * Avi Kivity <avi@qumranet.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+#define pr_fmt(fmt) "SVM: " fmt
+
+#include <linux/kvm_host.h>
+
+#include "irq.h"
+#include "mmu.h"
+#include "kvm_cache_regs.h"
+#include "x86.h"
+#include "cpuid.h"
+#include "pmu.h"
+
+#include <asm/svm.h>
+#include <asm/vmx.h>
+
+#include <__asm.h>
+
+#define IOPM_ALLOC_ORDER 2
+#define MSRPM_ALLOC_ORDER 1
+
+#define SEG_TYPE_LDT 2
+#define SEG_TYPE_BUSY_TSS16 3
+
+#define SVM_FEATURE_NPT (1 << 0)
+#define SVM_FEATURE_LBRV (1 << 1)
+#define SVM_FEATURE_SVML (1 << 2)
+#define SVM_FEATURE_NRIP (1 << 3)
+#define SVM_FEATURE_TSC_RATE (1 << 4)
+#define SVM_FEATURE_VMCB_CLEAN (1 << 5)
+#define SVM_FEATURE_FLUSH_ASID (1 << 6)
+#define SVM_FEATURE_DECODE_ASSIST (1 << 7)
+#define SVM_FEATURE_PAUSE_FILTER (1 << 10)
+
+#define SVM_AVIC_DOORBELL 0xc001011b
+
+#define NESTED_EXIT_HOST 0 /* Exit handled on host level */
+#define NESTED_EXIT_DONE 1 /* Exit caused nested vmexit */
+#define NESTED_EXIT_CONTINUE 2 /* Further checks needed */
+
+#define DEBUGCTL_RESERVED_BITS (~(0x3fULL))
+
+#define TSC_RATIO_RSVD 0xffffff0000000000ULL
+#define TSC_RATIO_MIN 0x0000000000000001ULL
+#define TSC_RATIO_MAX 0x000000ffffffffffULL
+
+#define AVIC_HPA_MASK ~((0xFFFULL << 52) | 0xFFF)
+
+/*
+ * 0xff is broadcast, so the max index allowed for physical APIC ID
+ * table is 0xfe. APIC IDs above 0xff are reserved.
+ */
+#define AVIC_MAX_PHYSICAL_ID_COUNT 255
+
+#define AVIC_UNACCEL_ACCESS_WRITE_MASK 1
+#define AVIC_UNACCEL_ACCESS_OFFSET_MASK 0xFF0
+#define AVIC_UNACCEL_ACCESS_VECTOR_MASK 0xFFFFFFFF
+
+/* AVIC GATAG is encoded using VM and VCPU IDs */
+#define AVIC_VCPU_ID_BITS 8
+#define AVIC_VCPU_ID_MASK ((1 << AVIC_VCPU_ID_BITS) - 1)
+
+#define AVIC_VM_ID_BITS 24
+#define AVIC_VM_ID_NR (1 << AVIC_VM_ID_BITS)
+#define AVIC_VM_ID_MASK ((1 << AVIC_VM_ID_BITS) - 1)
+
+#define AVIC_GATAG(x, y) (((x & AVIC_VM_ID_MASK) << AVIC_VCPU_ID_BITS) | \
+ (y & AVIC_VCPU_ID_MASK))
+#define AVIC_GATAG_TO_VMID(x) ((x >> AVIC_VCPU_ID_BITS) & AVIC_VM_ID_MASK)
+#define AVIC_GATAG_TO_VCPUID(x) (x & AVIC_VCPU_ID_MASK)
+
+static bool erratum_383_found __read_mostly;
+
+static const u32 host_save_user_msrs[] = {
+#ifdef CONFIG_X86_64
+ MSR_STAR, MSR_LSTAR, MSR_CSTAR, MSR_SYSCALL_MASK, MSR_KERNEL_GS_BASE,
+ MSR_FS_BASE,
+#endif
+ MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
+ MSR_TSC_AUX,
+};
+
+#define NR_HOST_SAVE_USER_MSRS ARRAY_SIZE(host_save_user_msrs)
+
+struct kvm_vcpu;
+
+struct nested_state {
+ struct vmcb *hsave;
+ u64 hsave_msr;
+ u64 vm_cr_msr;
+ u64 vmcb;
+
+ /* These are the merged vectors */
+ u32 *msrpm;
+
+ /* gpa pointers to the real vectors */
+ u64 vmcb_msrpm;
+ u64 vmcb_iopm;
+
+ /* A VMEXIT is required but not yet emulated */
+ bool exit_required;
+
+ /* cache for intercepts of the guest */
+ u32 intercept_cr;
+ u32 intercept_dr;
+ u32 intercept_exceptions;
+ u64 intercept;
+
+ /* Nested Paging related state */
+ u64 nested_cr3;
+};
+
+#define MSRPM_OFFSETS 16
+static u32 msrpm_offsets[MSRPM_OFFSETS] __read_mostly;
+
+/*
+ * Set osvw_len to higher value when updated Revision Guides
+ * are published and we know what the new status bits are
+ */
+static uint64_t osvw_len = 4, osvw_status;
+
+struct vcpu_svm {
+ struct kvm_vcpu vcpu;
+ struct vmcb *vmcb;
+ size_t vmcb_pa;
+ struct svm_cpu_data *svm_data;
+ uint64_t asid_generation;
+ uint64_t sysenter_esp;
+ uint64_t sysenter_eip;
+ uint64_t tsc_aux;
+
+ u64 next_rip;
+
+ u64 host_user_msrs[NR_HOST_SAVE_USER_MSRS];
+ struct {
+ u16 fs;
+ u16 gs;
+ u16 ldt;
+ u64 gs_base;
+ } host;
+
+ u32 *msrpm;
+
+ ulong nmi_iret_rip;
+
+ struct nested_state nested;
+
+ bool nmi_singlestep;
+
+ unsigned int3_injected;
+ size_t int3_rip;
+
+ /* cached guest cpuid flags for faster access */
+ bool nrips_enabled : 1;
+
+ u32 ldr_reg;
+ struct page *avic_backing_page;
+ u64 *avic_physical_id_cache;
+ bool avic_is_running;
+};
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
deleted file mode 100644
index 0a6cc67..0000000
--- a/arch/x86/kvm/trace.h
+++ /dev/null
@@ -1,1374 +0,0 @@
-#if !defined(_TRACE_KVM_H) || defined(TRACE_HEADER_MULTI_READ)
-#define _TRACE_KVM_H
-
-#include <linux/tracepoint.h>
-#include <asm/vmx.h>
-#include <asm/svm.h>
-#include <asm/clocksource.h>
-#include <asm/pvclock-abi.h>
-
-#undef TRACE_SYSTEM
-#define TRACE_SYSTEM kvm
-
-/*
- * Tracepoint for guest mode entry.
- */
-TRACE_EVENT(kvm_entry,
- TP_PROTO(unsigned int vcpu_id),
- TP_ARGS(vcpu_id),
-
- TP_STRUCT__entry(
- __field( unsigned int, vcpu_id )
- ),
-
- TP_fast_assign(
- __entry->vcpu_id = vcpu_id;
- ),
-
- TP_printk("vcpu %u", __entry->vcpu_id)
-);
-
-/*
- * Tracepoint for hypercall.
- */
-TRACE_EVENT(kvm_hypercall,
- TP_PROTO(unsigned long nr, unsigned long a0, unsigned long a1,
- unsigned long a2, unsigned long a3),
- TP_ARGS(nr, a0, a1, a2, a3),
-
- TP_STRUCT__entry(
- __field( unsigned long, nr )
- __field( unsigned long, a0 )
- __field( unsigned long, a1 )
- __field( unsigned long, a2 )
- __field( unsigned long, a3 )
- ),
-
- TP_fast_assign(
- __entry->nr = nr;
- __entry->a0 = a0;
- __entry->a1 = a1;
- __entry->a2 = a2;
- __entry->a3 = a3;
- ),
-
- TP_printk("nr 0x%lx a0 0x%lx a1 0x%lx a2 0x%lx a3 0x%lx",
- __entry->nr, __entry->a0, __entry->a1, __entry->a2,
- __entry->a3)
-);
-
-/*
- * Tracepoint for hypercall.
- */
-TRACE_EVENT(kvm_hv_hypercall,
- TP_PROTO(__u16 code, bool fast, __u16 rep_cnt, __u16 rep_idx,
- __u64 ingpa, __u64 outgpa),
- TP_ARGS(code, fast, rep_cnt, rep_idx, ingpa, outgpa),
-
- TP_STRUCT__entry(
- __field( __u16, rep_cnt )
- __field( __u16, rep_idx )
- __field( __u64, ingpa )
- __field( __u64, outgpa )
- __field( __u16, code )
- __field( bool, fast )
- ),
-
- TP_fast_assign(
- __entry->rep_cnt = rep_cnt;
- __entry->rep_idx = rep_idx;
- __entry->ingpa = ingpa;
- __entry->outgpa = outgpa;
- __entry->code = code;
- __entry->fast = fast;
- ),
-
- TP_printk("code 0x%x %s cnt 0x%x idx 0x%x in 0x%llx out 0x%llx",
- __entry->code, __entry->fast ? "fast" : "slow",
- __entry->rep_cnt, __entry->rep_idx, __entry->ingpa,
- __entry->outgpa)
-);
-
-/*
- * Tracepoint for PIO.
- */
-
-#define KVM_PIO_IN 0
-#define KVM_PIO_OUT 1
-
-TRACE_EVENT(kvm_pio,
- TP_PROTO(unsigned int rw, unsigned int port, unsigned int size,
- unsigned int count, void *data),
- TP_ARGS(rw, port, size, count, data),
-
- TP_STRUCT__entry(
- __field( unsigned int, rw )
- __field( unsigned int, port )
- __field( unsigned int, size )
- __field( unsigned int, count )
- __field( unsigned int, val )
- ),
-
- TP_fast_assign(
- __entry->rw = rw;
- __entry->port = port;
- __entry->size = size;
- __entry->count = count;
- if (size == 1)
- __entry->val = *(unsigned char *)data;
- else if (size == 2)
- __entry->val = *(unsigned short *)data;
- else
- __entry->val = *(unsigned int *)data;
- ),
-
- TP_printk("pio_%s at 0x%x size %d count %d val 0x%x %s",
- __entry->rw ? "write" : "read",
- __entry->port, __entry->size, __entry->count, __entry->val,
- __entry->count > 1 ? "(...)" : "")
-);
-
-/*
- * Tracepoint for fast mmio.
- */
-TRACE_EVENT(kvm_fast_mmio,
- TP_PROTO(u64 gpa),
- TP_ARGS(gpa),
-
- TP_STRUCT__entry(
- __field(u64, gpa)
- ),
-
- TP_fast_assign(
- __entry->gpa = gpa;
- ),
-
- TP_printk("fast mmio at gpa 0x%llx", __entry->gpa)
-);
-
-/*
- * Tracepoint for cpuid.
- */
-TRACE_EVENT(kvm_cpuid,
- TP_PROTO(unsigned int function, unsigned long rax, unsigned long rbx,
- unsigned long rcx, unsigned long rdx),
- TP_ARGS(function, rax, rbx, rcx, rdx),
-
- TP_STRUCT__entry(
- __field( unsigned int, function )
- __field( unsigned long, rax )
- __field( unsigned long, rbx )
- __field( unsigned long, rcx )
- __field( unsigned long, rdx )
- ),
-
- TP_fast_assign(
- __entry->function = function;
- __entry->rax = rax;
- __entry->rbx = rbx;
- __entry->rcx = rcx;
- __entry->rdx = rdx;
- ),
-
- TP_printk("func %x rax %lx rbx %lx rcx %lx rdx %lx",
- __entry->function, __entry->rax,
- __entry->rbx, __entry->rcx, __entry->rdx)
-);
-
-#define AREG(x) { APIC_##x, "APIC_" #x }
-
-#define kvm_trace_symbol_apic \
- AREG(ID), AREG(LVR), AREG(TASKPRI), AREG(ARBPRI), AREG(PROCPRI), \
- AREG(EOI), AREG(RRR), AREG(LDR), AREG(DFR), AREG(SPIV), AREG(ISR), \
- AREG(TMR), AREG(IRR), AREG(ESR), AREG(ICR), AREG(ICR2), AREG(LVTT), \
- AREG(LVTTHMR), AREG(LVTPC), AREG(LVT0), AREG(LVT1), AREG(LVTERR), \
- AREG(TMICT), AREG(TMCCT), AREG(TDCR), AREG(SELF_IPI), AREG(EFEAT), \
- AREG(ECTRL)
-/*
- * Tracepoint for apic access.
- */
-TRACE_EVENT(kvm_apic,
- TP_PROTO(unsigned int rw, unsigned int reg, unsigned int val),
- TP_ARGS(rw, reg, val),
-
- TP_STRUCT__entry(
- __field( unsigned int, rw )
- __field( unsigned int, reg )
- __field( unsigned int, val )
- ),
-
- TP_fast_assign(
- __entry->rw = rw;
- __entry->reg = reg;
- __entry->val = val;
- ),
-
- TP_printk("apic_%s %s = 0x%x",
- __entry->rw ? "write" : "read",
- __print_symbolic(__entry->reg, kvm_trace_symbol_apic),
- __entry->val)
-);
-
-#define trace_kvm_apic_read(reg, val) trace_kvm_apic(0, reg, val)
-#define trace_kvm_apic_write(reg, val) trace_kvm_apic(1, reg, val)
-
-#define KVM_ISA_VMX 1
-#define KVM_ISA_SVM 2
-
-/*
- * Tracepoint for kvm guest exit:
- */
-TRACE_EVENT(kvm_exit,
- TP_PROTO(unsigned int exit_reason, struct kvm_vcpu *vcpu, u32 isa),
- TP_ARGS(exit_reason, vcpu, isa),
-
- TP_STRUCT__entry(
- __field( unsigned int, exit_reason )
- __field( unsigned long, guest_rip )
- __field( u32, isa )
- __field( u64, info1 )
- __field( u64, info2 )
- ),
-
- TP_fast_assign(
- __entry->exit_reason = exit_reason;
- __entry->guest_rip = kvm_rip_read(vcpu);
- __entry->isa = isa;
- kvm_x86_ops->get_exit_info(vcpu, &__entry->info1,
- &__entry->info2);
- ),
-
- TP_printk("reason %s rip 0x%lx info %llx %llx",
- (__entry->isa == KVM_ISA_VMX) ?
- __print_symbolic(__entry->exit_reason, VMX_EXIT_REASONS) :
- __print_symbolic(__entry->exit_reason, SVM_EXIT_REASONS),
- __entry->guest_rip, __entry->info1, __entry->info2)
-);
-
-/*
- * Tracepoint for kvm interrupt injection:
- */
-TRACE_EVENT(kvm_inj_virq,
- TP_PROTO(unsigned int irq),
- TP_ARGS(irq),
-
- TP_STRUCT__entry(
- __field( unsigned int, irq )
- ),
-
- TP_fast_assign(
- __entry->irq = irq;
- ),
-
- TP_printk("irq %u", __entry->irq)
-);
-
-#define EXS(x) { x##_VECTOR, "#" #x }
-
-#define kvm_trace_sym_exc \
- EXS(DE), EXS(DB), EXS(BP), EXS(OF), EXS(BR), EXS(UD), EXS(NM), \
- EXS(DF), EXS(TS), EXS(NP), EXS(SS), EXS(GP), EXS(PF), \
- EXS(MF), EXS(AC), EXS(MC)
-
-/*
- * Tracepoint for kvm interrupt injection:
- */
-TRACE_EVENT(kvm_inj_exception,
- TP_PROTO(unsigned exception, bool has_error, unsigned error_code),
- TP_ARGS(exception, has_error, error_code),
-
- TP_STRUCT__entry(
- __field( u8, exception )
- __field( u8, has_error )
- __field( u32, error_code )
- ),
-
- TP_fast_assign(
- __entry->exception = exception;
- __entry->has_error = has_error;
- __entry->error_code = error_code;
- ),
-
- TP_printk("%s (0x%x)",
- __print_symbolic(__entry->exception, kvm_trace_sym_exc),
- /* FIXME: don't print error_code if not present */
- __entry->has_error ? __entry->error_code : 0)
-);
-
-/*
- * Tracepoint for page fault.
- */
-TRACE_EVENT(kvm_page_fault,
- TP_PROTO(unsigned long fault_address, unsigned int error_code),
- TP_ARGS(fault_address, error_code),
-
- TP_STRUCT__entry(
- __field( unsigned long, fault_address )
- __field( unsigned int, error_code )
- ),
-
- TP_fast_assign(
- __entry->fault_address = fault_address;
- __entry->error_code = error_code;
- ),
-
- TP_printk("address %lx error_code %x",
- __entry->fault_address, __entry->error_code)
-);
-
-/*
- * Tracepoint for guest MSR access.
- */
-TRACE_EVENT(kvm_msr,
- TP_PROTO(unsigned write, u32 ecx, u64 data, bool exception),
- TP_ARGS(write, ecx, data, exception),
-
- TP_STRUCT__entry(
- __field( unsigned, write )
- __field( u32, ecx )
- __field( u64, data )
- __field( u8, exception )
- ),
-
- TP_fast_assign(
- __entry->write = write;
- __entry->ecx = ecx;
- __entry->data = data;
- __entry->exception = exception;
- ),
-
- TP_printk("msr_%s %x = 0x%llx%s",
- __entry->write ? "write" : "read",
- __entry->ecx, __entry->data,
- __entry->exception ? " (#GP)" : "")
-);
-
-#define trace_kvm_msr_read(ecx, data) trace_kvm_msr(0, ecx, data, false)
-#define trace_kvm_msr_write(ecx, data) trace_kvm_msr(1, ecx, data, false)
-#define trace_kvm_msr_read_ex(ecx) trace_kvm_msr(0, ecx, 0, true)
-#define trace_kvm_msr_write_ex(ecx, data) trace_kvm_msr(1, ecx, data, true)
-
-/*
- * Tracepoint for guest CR access.
- */
-TRACE_EVENT(kvm_cr,
- TP_PROTO(unsigned int rw, unsigned int cr, unsigned long val),
- TP_ARGS(rw, cr, val),
-
- TP_STRUCT__entry(
- __field( unsigned int, rw )
- __field( unsigned int, cr )
- __field( unsigned long, val )
- ),
-
- TP_fast_assign(
- __entry->rw = rw;
- __entry->cr = cr;
- __entry->val = val;
- ),
-
- TP_printk("cr_%s %x = 0x%lx",
- __entry->rw ? "write" : "read",
- __entry->cr, __entry->val)
-);
-
-#define trace_kvm_cr_read(cr, val) trace_kvm_cr(0, cr, val)
-#define trace_kvm_cr_write(cr, val) trace_kvm_cr(1, cr, val)
-
-TRACE_EVENT(kvm_pic_set_irq,
- TP_PROTO(__u8 chip, __u8 pin, __u8 elcr, __u8 imr, bool coalesced),
- TP_ARGS(chip, pin, elcr, imr, coalesced),
-
- TP_STRUCT__entry(
- __field( __u8, chip )
- __field( __u8, pin )
- __field( __u8, elcr )
- __field( __u8, imr )
- __field( bool, coalesced )
- ),
-
- TP_fast_assign(
- __entry->chip = chip;
- __entry->pin = pin;
- __entry->elcr = elcr;
- __entry->imr = imr;
- __entry->coalesced = coalesced;
- ),
-
- TP_printk("chip %u pin %u (%s%s)%s",
- __entry->chip, __entry->pin,
- (__entry->elcr & (1 << __entry->pin)) ? "level":"edge",
- (__entry->imr & (1 << __entry->pin)) ? "|masked":"",
- __entry->coalesced ? " (coalesced)" : "")
-);
-
-#define kvm_apic_dst_shorthand \
- {0x0, "dst"}, \
- {0x1, "self"}, \
- {0x2, "all"}, \
- {0x3, "all-but-self"}
-
-TRACE_EVENT(kvm_apic_ipi,
- TP_PROTO(__u32 icr_low, __u32 dest_id),
- TP_ARGS(icr_low, dest_id),
-
- TP_STRUCT__entry(
- __field( __u32, icr_low )
- __field( __u32, dest_id )
- ),
-
- TP_fast_assign(
- __entry->icr_low = icr_low;
- __entry->dest_id = dest_id;
- ),
-
- TP_printk("dst %x vec %u (%s|%s|%s|%s|%s)",
- __entry->dest_id, (u8)__entry->icr_low,
- __print_symbolic((__entry->icr_low >> 8 & 0x7),
- kvm_deliver_mode),
- (__entry->icr_low & (1<<11)) ? "logical" : "physical",
- (__entry->icr_low & (1<<14)) ? "assert" : "de-assert",
- (__entry->icr_low & (1<<15)) ? "level" : "edge",
- __print_symbolic((__entry->icr_low >> 18 & 0x3),
- kvm_apic_dst_shorthand))
-);
-
-TRACE_EVENT(kvm_apic_accept_irq,
- TP_PROTO(__u32 apicid, __u16 dm, __u8 tm, __u8 vec),
- TP_ARGS(apicid, dm, tm, vec),
-
- TP_STRUCT__entry(
- __field( __u32, apicid )
- __field( __u16, dm )
- __field( __u8, tm )
- __field( __u8, vec )
- ),
-
- TP_fast_assign(
- __entry->apicid = apicid;
- __entry->dm = dm;
- __entry->tm = tm;
- __entry->vec = vec;
- ),
-
- TP_printk("apicid %x vec %u (%s|%s)",
- __entry->apicid, __entry->vec,
- __print_symbolic((__entry->dm >> 8 & 0x7), kvm_deliver_mode),
- __entry->tm ? "level" : "edge")
-);
-
-TRACE_EVENT(kvm_eoi,
- TP_PROTO(struct kvm_lapic *apic, int vector),
- TP_ARGS(apic, vector),
-
- TP_STRUCT__entry(
- __field( __u32, apicid )
- __field( int, vector )
- ),
-
- TP_fast_assign(
- __entry->apicid = apic->vcpu->vcpu_id;
- __entry->vector = vector;
- ),
-
- TP_printk("apicid %x vector %d", __entry->apicid, __entry->vector)
-);
-
-TRACE_EVENT(kvm_pv_eoi,
- TP_PROTO(struct kvm_lapic *apic, int vector),
- TP_ARGS(apic, vector),
-
- TP_STRUCT__entry(
- __field( __u32, apicid )
- __field( int, vector )
- ),
-
- TP_fast_assign(
- __entry->apicid = apic->vcpu->vcpu_id;
- __entry->vector = vector;
- ),
-
- TP_printk("apicid %x vector %d", __entry->apicid, __entry->vector)
-);
-
-/*
- * Tracepoint for nested VMRUN
- */
-TRACE_EVENT(kvm_nested_vmrun,
- TP_PROTO(__u64 rip, __u64 vmcb, __u64 nested_rip, __u32 int_ctl,
- __u32 event_inj, bool npt),
- TP_ARGS(rip, vmcb, nested_rip, int_ctl, event_inj, npt),
-
- TP_STRUCT__entry(
- __field( __u64, rip )
- __field( __u64, vmcb )
- __field( __u64, nested_rip )
- __field( __u32, int_ctl )
- __field( __u32, event_inj )
- __field( bool, npt )
- ),
-
- TP_fast_assign(
- __entry->rip = rip;
- __entry->vmcb = vmcb;
- __entry->nested_rip = nested_rip;
- __entry->int_ctl = int_ctl;
- __entry->event_inj = event_inj;
- __entry->npt = npt;
- ),
-
- TP_printk("rip: 0x%016llx vmcb: 0x%016llx nrip: 0x%016llx int_ctl: 0x%08x "
- "event_inj: 0x%08x npt: %s",
- __entry->rip, __entry->vmcb, __entry->nested_rip,
- __entry->int_ctl, __entry->event_inj,
- __entry->npt ? "on" : "off")
-);
-
-TRACE_EVENT(kvm_nested_intercepts,
- TP_PROTO(__u16 cr_read, __u16 cr_write, __u32 exceptions, __u64 intercept),
- TP_ARGS(cr_read, cr_write, exceptions, intercept),
-
- TP_STRUCT__entry(
- __field( __u16, cr_read )
- __field( __u16, cr_write )
- __field( __u32, exceptions )
- __field( __u64, intercept )
- ),
-
- TP_fast_assign(
- __entry->cr_read = cr_read;
- __entry->cr_write = cr_write;
- __entry->exceptions = exceptions;
- __entry->intercept = intercept;
- ),
-
- TP_printk("cr_read: %04x cr_write: %04x excp: %08x intercept: %016llx",
- __entry->cr_read, __entry->cr_write, __entry->exceptions,
- __entry->intercept)
-);
-/*
- * Tracepoint for #VMEXIT while nested
- */
-TRACE_EVENT(kvm_nested_vmexit,
- TP_PROTO(__u64 rip, __u32 exit_code,
- __u64 exit_info1, __u64 exit_info2,
- __u32 exit_int_info, __u32 exit_int_info_err, __u32 isa),
- TP_ARGS(rip, exit_code, exit_info1, exit_info2,
- exit_int_info, exit_int_info_err, isa),
-
- TP_STRUCT__entry(
- __field( __u64, rip )
- __field( __u32, exit_code )
- __field( __u64, exit_info1 )
- __field( __u64, exit_info2 )
- __field( __u32, exit_int_info )
- __field( __u32, exit_int_info_err )
- __field( __u32, isa )
- ),
-
- TP_fast_assign(
- __entry->rip = rip;
- __entry->exit_code = exit_code;
- __entry->exit_info1 = exit_info1;
- __entry->exit_info2 = exit_info2;
- __entry->exit_int_info = exit_int_info;
- __entry->exit_int_info_err = exit_int_info_err;
- __entry->isa = isa;
- ),
- TP_printk("rip: 0x%016llx reason: %s ext_inf1: 0x%016llx "
- "ext_inf2: 0x%016llx ext_int: 0x%08x ext_int_err: 0x%08x",
- __entry->rip,
- (__entry->isa == KVM_ISA_VMX) ?
- __print_symbolic(__entry->exit_code, VMX_EXIT_REASONS) :
- __print_symbolic(__entry->exit_code, SVM_EXIT_REASONS),
- __entry->exit_info1, __entry->exit_info2,
- __entry->exit_int_info, __entry->exit_int_info_err)
-);
-
-/*
- * Tracepoint for #VMEXIT reinjected to the guest
- */
-TRACE_EVENT(kvm_nested_vmexit_inject,
- TP_PROTO(__u32 exit_code,
- __u64 exit_info1, __u64 exit_info2,
- __u32 exit_int_info, __u32 exit_int_info_err, __u32 isa),
- TP_ARGS(exit_code, exit_info1, exit_info2,
- exit_int_info, exit_int_info_err, isa),
-
- TP_STRUCT__entry(
- __field( __u32, exit_code )
- __field( __u64, exit_info1 )
- __field( __u64, exit_info2 )
- __field( __u32, exit_int_info )
- __field( __u32, exit_int_info_err )
- __field( __u32, isa )
- ),
-
- TP_fast_assign(
- __entry->exit_code = exit_code;
- __entry->exit_info1 = exit_info1;
- __entry->exit_info2 = exit_info2;
- __entry->exit_int_info = exit_int_info;
- __entry->exit_int_info_err = exit_int_info_err;
- __entry->isa = isa;
- ),
-
- TP_printk("reason: %s ext_inf1: 0x%016llx "
- "ext_inf2: 0x%016llx ext_int: 0x%08x ext_int_err: 0x%08x",
- (__entry->isa == KVM_ISA_VMX) ?
- __print_symbolic(__entry->exit_code, VMX_EXIT_REASONS) :
- __print_symbolic(__entry->exit_code, SVM_EXIT_REASONS),
- __entry->exit_info1, __entry->exit_info2,
- __entry->exit_int_info, __entry->exit_int_info_err)
-);
-
-/*
- * Tracepoint for nested #vmexit because of interrupt pending
- */
-TRACE_EVENT(kvm_nested_intr_vmexit,
- TP_PROTO(__u64 rip),
- TP_ARGS(rip),
-
- TP_STRUCT__entry(
- __field( __u64, rip )
- ),
-
- TP_fast_assign(
- __entry->rip = rip
- ),
-
- TP_printk("rip: 0x%016llx", __entry->rip)
-);
-
-/*
- * Tracepoint for nested #vmexit because of interrupt pending
- */
-TRACE_EVENT(kvm_invlpga,
- TP_PROTO(__u64 rip, int asid, u64 address),
- TP_ARGS(rip, asid, address),
-
- TP_STRUCT__entry(
- __field( __u64, rip )
- __field( int, asid )
- __field( __u64, address )
- ),
-
- TP_fast_assign(
- __entry->rip = rip;
- __entry->asid = asid;
- __entry->address = address;
- ),
-
- TP_printk("rip: 0x%016llx asid: %d address: 0x%016llx",
- __entry->rip, __entry->asid, __entry->address)
-);
-
-/*
- * Tracepoint for nested #vmexit because of interrupt pending
- */
-TRACE_EVENT(kvm_skinit,
- TP_PROTO(__u64 rip, __u32 slb),
- TP_ARGS(rip, slb),
-
- TP_STRUCT__entry(
- __field( __u64, rip )
- __field( __u32, slb )
- ),
-
- TP_fast_assign(
- __entry->rip = rip;
- __entry->slb = slb;
- ),
-
- TP_printk("rip: 0x%016llx slb: 0x%08x",
- __entry->rip, __entry->slb)
-);
-
-#define KVM_EMUL_INSN_F_CR0_PE (1 << 0)
-#define KVM_EMUL_INSN_F_EFL_VM (1 << 1)
-#define KVM_EMUL_INSN_F_CS_D (1 << 2)
-#define KVM_EMUL_INSN_F_CS_L (1 << 3)
-
-#define kvm_trace_symbol_emul_flags \
- { 0, "real" }, \
- { KVM_EMUL_INSN_F_CR0_PE \
- | KVM_EMUL_INSN_F_EFL_VM, "vm16" }, \
- { KVM_EMUL_INSN_F_CR0_PE, "prot16" }, \
- { KVM_EMUL_INSN_F_CR0_PE \
- | KVM_EMUL_INSN_F_CS_D, "prot32" }, \
- { KVM_EMUL_INSN_F_CR0_PE \
- | KVM_EMUL_INSN_F_CS_L, "prot64" }
-
-#define kei_decode_mode(mode) ({ \
- u8 flags = 0xff; \
- switch (mode) { \
- case X86EMUL_MODE_REAL: \
- flags = 0; \
- break; \
- case X86EMUL_MODE_VM86: \
- flags = KVM_EMUL_INSN_F_EFL_VM; \
- break; \
- case X86EMUL_MODE_PROT16: \
- flags = KVM_EMUL_INSN_F_CR0_PE; \
- break; \
- case X86EMUL_MODE_PROT32: \
- flags = KVM_EMUL_INSN_F_CR0_PE \
- | KVM_EMUL_INSN_F_CS_D; \
- break; \
- case X86EMUL_MODE_PROT64: \
- flags = KVM_EMUL_INSN_F_CR0_PE \
- | KVM_EMUL_INSN_F_CS_L; \
- break; \
- } \
- flags; \
- })
-
-TRACE_EVENT(kvm_emulate_insn,
- TP_PROTO(struct kvm_vcpu *vcpu, __u8 failed),
- TP_ARGS(vcpu, failed),
-
- TP_STRUCT__entry(
- __field( __u64, rip )
- __field( __u32, csbase )
- __field( __u8, len )
- __array( __u8, insn, 15 )
- __field( __u8, flags )
- __field( __u8, failed )
- ),
-
- TP_fast_assign(
- __entry->csbase = kvm_x86_ops->get_segment_base(vcpu, VCPU_SREG_CS);
- __entry->len = vcpu->arch.emulate_ctxt.fetch.ptr
- - vcpu->arch.emulate_ctxt.fetch.data;
- __entry->rip = vcpu->arch.emulate_ctxt._eip - __entry->len;
- memcpy(__entry->insn,
- vcpu->arch.emulate_ctxt.fetch.data,
- 15);
- __entry->flags = kei_decode_mode(vcpu->arch.emulate_ctxt.mode);
- __entry->failed = failed;
- ),
-
- TP_printk("%x:%llx:%s (%s)%s",
- __entry->csbase, __entry->rip,
- __print_hex(__entry->insn, __entry->len),
- __print_symbolic(__entry->flags,
- kvm_trace_symbol_emul_flags),
- __entry->failed ? " failed" : ""
- )
- );
-
-#define trace_kvm_emulate_insn_start(vcpu) trace_kvm_emulate_insn(vcpu, 0)
-#define trace_kvm_emulate_insn_failed(vcpu) trace_kvm_emulate_insn(vcpu, 1)
-
-TRACE_EVENT(
- vcpu_match_mmio,
- TP_PROTO(gva_t gva, gpa_t gpa, bool write, bool gpa_match),
- TP_ARGS(gva, gpa, write, gpa_match),
-
- TP_STRUCT__entry(
- __field(gva_t, gva)
- __field(gpa_t, gpa)
- __field(bool, write)
- __field(bool, gpa_match)
- ),
-
- TP_fast_assign(
- __entry->gva = gva;
- __entry->gpa = gpa;
- __entry->write = write;
- __entry->gpa_match = gpa_match
- ),
-
- TP_printk("gva %#lx gpa %#llx %s %s", __entry->gva, __entry->gpa,
- __entry->write ? "Write" : "Read",
- __entry->gpa_match ? "GPA" : "GVA")
-);
-
-TRACE_EVENT(kvm_write_tsc_offset,
- TP_PROTO(unsigned int vcpu_id, __u64 previous_tsc_offset,
- __u64 next_tsc_offset),
- TP_ARGS(vcpu_id, previous_tsc_offset, next_tsc_offset),
-
- TP_STRUCT__entry(
- __field( unsigned int, vcpu_id )
- __field( __u64, previous_tsc_offset )
- __field( __u64, next_tsc_offset )
- ),
-
- TP_fast_assign(
- __entry->vcpu_id = vcpu_id;
- __entry->previous_tsc_offset = previous_tsc_offset;
- __entry->next_tsc_offset = next_tsc_offset;
- ),
-
- TP_printk("vcpu=%u prev=%llu next=%llu", __entry->vcpu_id,
- __entry->previous_tsc_offset, __entry->next_tsc_offset)
-);
-
-#ifdef CONFIG_X86_64
-
-#define host_clocks \
- {VCLOCK_NONE, "none"}, \
- {VCLOCK_TSC, "tsc"} \
-
-TRACE_EVENT(kvm_update_master_clock,
- TP_PROTO(bool use_master_clock, unsigned int host_clock, bool offset_matched),
- TP_ARGS(use_master_clock, host_clock, offset_matched),
-
- TP_STRUCT__entry(
- __field( bool, use_master_clock )
- __field( unsigned int, host_clock )
- __field( bool, offset_matched )
- ),
-
- TP_fast_assign(
- __entry->use_master_clock = use_master_clock;
- __entry->host_clock = host_clock;
- __entry->offset_matched = offset_matched;
- ),
-
- TP_printk("masterclock %d hostclock %s offsetmatched %u",
- __entry->use_master_clock,
- __print_symbolic(__entry->host_clock, host_clocks),
- __entry->offset_matched)
-);
-
-TRACE_EVENT(kvm_track_tsc,
- TP_PROTO(unsigned int vcpu_id, unsigned int nr_matched,
- unsigned int online_vcpus, bool use_master_clock,
- unsigned int host_clock),
- TP_ARGS(vcpu_id, nr_matched, online_vcpus, use_master_clock,
- host_clock),
-
- TP_STRUCT__entry(
- __field( unsigned int, vcpu_id )
- __field( unsigned int, nr_vcpus_matched_tsc )
- __field( unsigned int, online_vcpus )
- __field( bool, use_master_clock )
- __field( unsigned int, host_clock )
- ),
-
- TP_fast_assign(
- __entry->vcpu_id = vcpu_id;
- __entry->nr_vcpus_matched_tsc = nr_matched;
- __entry->online_vcpus = online_vcpus;
- __entry->use_master_clock = use_master_clock;
- __entry->host_clock = host_clock;
- ),
-
- TP_printk("vcpu_id %u masterclock %u offsetmatched %u nr_online %u"
- " hostclock %s",
- __entry->vcpu_id, __entry->use_master_clock,
- __entry->nr_vcpus_matched_tsc, __entry->online_vcpus,
- __print_symbolic(__entry->host_clock, host_clocks))
-);
-
-#endif /* CONFIG_X86_64 */
-
-/*
- * Tracepoint for PML full VMEXIT.
- */
-TRACE_EVENT(kvm_pml_full,
- TP_PROTO(unsigned int vcpu_id),
- TP_ARGS(vcpu_id),
-
- TP_STRUCT__entry(
- __field( unsigned int, vcpu_id )
- ),
-
- TP_fast_assign(
- __entry->vcpu_id = vcpu_id;
- ),
-
- TP_printk("vcpu %d: PML full", __entry->vcpu_id)
-);
-
-TRACE_EVENT(kvm_ple_window,
- TP_PROTO(bool grow, unsigned int vcpu_id, int new, int old),
- TP_ARGS(grow, vcpu_id, new, old),
-
- TP_STRUCT__entry(
- __field( bool, grow )
- __field( unsigned int, vcpu_id )
- __field( int, new )
- __field( int, old )
- ),
-
- TP_fast_assign(
- __entry->grow = grow;
- __entry->vcpu_id = vcpu_id;
- __entry->new = new;
- __entry->old = old;
- ),
-
- TP_printk("vcpu %u: ple_window %d (%s %d)",
- __entry->vcpu_id,
- __entry->new,
- __entry->grow ? "grow" : "shrink",
- __entry->old)
-);
-
-#define trace_kvm_ple_window_grow(vcpu_id, new, old) \
- trace_kvm_ple_window(true, vcpu_id, new, old)
-#define trace_kvm_ple_window_shrink(vcpu_id, new, old) \
- trace_kvm_ple_window(false, vcpu_id, new, old)
-
-TRACE_EVENT(kvm_pvclock_update,
- TP_PROTO(unsigned int vcpu_id, struct pvclock_vcpu_time_info *pvclock),
- TP_ARGS(vcpu_id, pvclock),
-
- TP_STRUCT__entry(
- __field( unsigned int, vcpu_id )
- __field( __u32, version )
- __field( __u64, tsc_timestamp )
- __field( __u64, system_time )
- __field( __u32, tsc_to_system_mul )
- __field( __s8, tsc_shift )
- __field( __u8, flags )
- ),
-
- TP_fast_assign(
- __entry->vcpu_id = vcpu_id;
- __entry->version = pvclock->version;
- __entry->tsc_timestamp = pvclock->tsc_timestamp;
- __entry->system_time = pvclock->system_time;
- __entry->tsc_to_system_mul = pvclock->tsc_to_system_mul;
- __entry->tsc_shift = pvclock->tsc_shift;
- __entry->flags = pvclock->flags;
- ),
-
- TP_printk("vcpu_id %u, pvclock { version %u, tsc_timestamp 0x%llx, "
- "system_time 0x%llx, tsc_to_system_mul 0x%x, tsc_shift %d, "
- "flags 0x%x }",
- __entry->vcpu_id,
- __entry->version,
- __entry->tsc_timestamp,
- __entry->system_time,
- __entry->tsc_to_system_mul,
- __entry->tsc_shift,
- __entry->flags)
-);
-
-TRACE_EVENT(kvm_wait_lapic_expire,
- TP_PROTO(unsigned int vcpu_id, s64 delta),
- TP_ARGS(vcpu_id, delta),
-
- TP_STRUCT__entry(
- __field( unsigned int, vcpu_id )
- __field( s64, delta )
- ),
-
- TP_fast_assign(
- __entry->vcpu_id = vcpu_id;
- __entry->delta = delta;
- ),
-
- TP_printk("vcpu %u: delta %lld (%s)",
- __entry->vcpu_id,
- __entry->delta,
- __entry->delta < 0 ? "early" : "late")
-);
-
-TRACE_EVENT(kvm_enter_smm,
- TP_PROTO(unsigned int vcpu_id, u64 smbase, bool entering),
- TP_ARGS(vcpu_id, smbase, entering),
-
- TP_STRUCT__entry(
- __field( unsigned int, vcpu_id )
- __field( u64, smbase )
- __field( bool, entering )
- ),
-
- TP_fast_assign(
- __entry->vcpu_id = vcpu_id;
- __entry->smbase = smbase;
- __entry->entering = entering;
- ),
-
- TP_printk("vcpu %u: %s SMM, smbase 0x%llx",
- __entry->vcpu_id,
- __entry->entering ? "entering" : "leaving",
- __entry->smbase)
-);
-
-/*
- * Tracepoint for VT-d posted-interrupts.
- */
-TRACE_EVENT(kvm_pi_irte_update,
- TP_PROTO(unsigned int host_irq, unsigned int vcpu_id,
- unsigned int gsi, unsigned int gvec,
- u64 pi_desc_addr, bool set),
- TP_ARGS(host_irq, vcpu_id, gsi, gvec, pi_desc_addr, set),
-
- TP_STRUCT__entry(
- __field( unsigned int, host_irq )
- __field( unsigned int, vcpu_id )
- __field( unsigned int, gsi )
- __field( unsigned int, gvec )
- __field( u64, pi_desc_addr )
- __field( bool, set )
- ),
-
- TP_fast_assign(
- __entry->host_irq = host_irq;
- __entry->vcpu_id = vcpu_id;
- __entry->gsi = gsi;
- __entry->gvec = gvec;
- __entry->pi_desc_addr = pi_desc_addr;
- __entry->set = set;
- ),
-
- TP_printk("VT-d PI is %s for irq %u, vcpu %u, gsi: 0x%x, "
- "gvec: 0x%x, pi_desc_addr: 0x%llx",
- __entry->set ? "enabled and being updated" : "disabled",
- __entry->host_irq,
- __entry->vcpu_id,
- __entry->gsi,
- __entry->gvec,
- __entry->pi_desc_addr)
-);
-
-/*
- * Tracepoint for kvm_hv_notify_acked_sint.
- */
-TRACE_EVENT(kvm_hv_notify_acked_sint,
- TP_PROTO(int vcpu_id, u32 sint),
- TP_ARGS(vcpu_id, sint),
-
- TP_STRUCT__entry(
- __field(int, vcpu_id)
- __field(u32, sint)
- ),
-
- TP_fast_assign(
- __entry->vcpu_id = vcpu_id;
- __entry->sint = sint;
- ),
-
- TP_printk("vcpu_id %d sint %u", __entry->vcpu_id, __entry->sint)
-);
-
-/*
- * Tracepoint for synic_set_irq.
- */
-TRACE_EVENT(kvm_hv_synic_set_irq,
- TP_PROTO(int vcpu_id, u32 sint, int vector, int ret),
- TP_ARGS(vcpu_id, sint, vector, ret),
-
- TP_STRUCT__entry(
- __field(int, vcpu_id)
- __field(u32, sint)
- __field(int, vector)
- __field(int, ret)
- ),
-
- TP_fast_assign(
- __entry->vcpu_id = vcpu_id;
- __entry->sint = sint;
- __entry->vector = vector;
- __entry->ret = ret;
- ),
-
- TP_printk("vcpu_id %d sint %u vector %d ret %d",
- __entry->vcpu_id, __entry->sint, __entry->vector,
- __entry->ret)
-);
-
-/*
- * Tracepoint for kvm_hv_synic_send_eoi.
- */
-TRACE_EVENT(kvm_hv_synic_send_eoi,
- TP_PROTO(int vcpu_id, int vector),
- TP_ARGS(vcpu_id, vector),
-
- TP_STRUCT__entry(
- __field(int, vcpu_id)
- __field(u32, sint)
- __field(int, vector)
- __field(int, ret)
- ),
-
- TP_fast_assign(
- __entry->vcpu_id = vcpu_id;
- __entry->vector = vector;
- ),
-
- TP_printk("vcpu_id %d vector %d", __entry->vcpu_id, __entry->vector)
-);
-
-/*
- * Tracepoint for synic_set_msr.
- */
-TRACE_EVENT(kvm_hv_synic_set_msr,
- TP_PROTO(int vcpu_id, u32 msr, u64 data, bool host),
- TP_ARGS(vcpu_id, msr, data, host),
-
- TP_STRUCT__entry(
- __field(int, vcpu_id)
- __field(u32, msr)
- __field(u64, data)
- __field(bool, host)
- ),
-
- TP_fast_assign(
- __entry->vcpu_id = vcpu_id;
- __entry->msr = msr;
- __entry->data = data;
- __entry->host = host
- ),
-
- TP_printk("vcpu_id %d msr 0x%x data 0x%llx host %d",
- __entry->vcpu_id, __entry->msr, __entry->data, __entry->host)
-);
-
-/*
- * Tracepoint for stimer_set_config.
- */
-TRACE_EVENT(kvm_hv_stimer_set_config,
- TP_PROTO(int vcpu_id, int timer_index, u64 config, bool host),
- TP_ARGS(vcpu_id, timer_index, config, host),
-
- TP_STRUCT__entry(
- __field(int, vcpu_id)
- __field(int, timer_index)
- __field(u64, config)
- __field(bool, host)
- ),
-
- TP_fast_assign(
- __entry->vcpu_id = vcpu_id;
- __entry->timer_index = timer_index;
- __entry->config = config;
- __entry->host = host;
- ),
-
- TP_printk("vcpu_id %d timer %d config 0x%llx host %d",
- __entry->vcpu_id, __entry->timer_index, __entry->config,
- __entry->host)
-);
-
-/*
- * Tracepoint for stimer_set_count.
- */
-TRACE_EVENT(kvm_hv_stimer_set_count,
- TP_PROTO(int vcpu_id, int timer_index, u64 count, bool host),
- TP_ARGS(vcpu_id, timer_index, count, host),
-
- TP_STRUCT__entry(
- __field(int, vcpu_id)
- __field(int, timer_index)
- __field(u64, count)
- __field(bool, host)
- ),
-
- TP_fast_assign(
- __entry->vcpu_id = vcpu_id;
- __entry->timer_index = timer_index;
- __entry->count = count;
- __entry->host = host;
- ),
-
- TP_printk("vcpu_id %d timer %d count %llu host %d",
- __entry->vcpu_id, __entry->timer_index, __entry->count,
- __entry->host)
-);
-
-/*
- * Tracepoint for stimer_start(periodic timer case).
- */
-TRACE_EVENT(kvm_hv_stimer_start_periodic,
- TP_PROTO(int vcpu_id, int timer_index, u64 time_now, u64 exp_time),
- TP_ARGS(vcpu_id, timer_index, time_now, exp_time),
-
- TP_STRUCT__entry(
- __field(int, vcpu_id)
- __field(int, timer_index)
- __field(u64, time_now)
- __field(u64, exp_time)
- ),
-
- TP_fast_assign(
- __entry->vcpu_id = vcpu_id;
- __entry->timer_index = timer_index;
- __entry->time_now = time_now;
- __entry->exp_time = exp_time;
- ),
-
- TP_printk("vcpu_id %d timer %d time_now %llu exp_time %llu",
- __entry->vcpu_id, __entry->timer_index, __entry->time_now,
- __entry->exp_time)
-);
-
-/*
- * Tracepoint for stimer_start(one-shot timer case).
- */
-TRACE_EVENT(kvm_hv_stimer_start_one_shot,
- TP_PROTO(int vcpu_id, int timer_index, u64 time_now, u64 count),
- TP_ARGS(vcpu_id, timer_index, time_now, count),
-
- TP_STRUCT__entry(
- __field(int, vcpu_id)
- __field(int, timer_index)
- __field(u64, time_now)
- __field(u64, count)
- ),
-
- TP_fast_assign(
- __entry->vcpu_id = vcpu_id;
- __entry->timer_index = timer_index;
- __entry->time_now = time_now;
- __entry->count = count;
- ),
-
- TP_printk("vcpu_id %d timer %d time_now %llu count %llu",
- __entry->vcpu_id, __entry->timer_index, __entry->time_now,
- __entry->count)
-);
-
-/*
- * Tracepoint for stimer_timer_callback.
- */
-TRACE_EVENT(kvm_hv_stimer_callback,
- TP_PROTO(int vcpu_id, int timer_index),
- TP_ARGS(vcpu_id, timer_index),
-
- TP_STRUCT__entry(
- __field(int, vcpu_id)
- __field(int, timer_index)
- ),
-
- TP_fast_assign(
- __entry->vcpu_id = vcpu_id;
- __entry->timer_index = timer_index;
- ),
-
- TP_printk("vcpu_id %d timer %d",
- __entry->vcpu_id, __entry->timer_index)
-);
-
-/*
- * Tracepoint for stimer_expiration.
- */
-TRACE_EVENT(kvm_hv_stimer_expiration,
- TP_PROTO(int vcpu_id, int timer_index, int msg_send_result),
- TP_ARGS(vcpu_id, timer_index, msg_send_result),
-
- TP_STRUCT__entry(
- __field(int, vcpu_id)
- __field(int, timer_index)
- __field(int, msg_send_result)
- ),
-
- TP_fast_assign(
- __entry->vcpu_id = vcpu_id;
- __entry->timer_index = timer_index;
- __entry->msg_send_result = msg_send_result;
- ),
-
- TP_printk("vcpu_id %d timer %d msg send result %d",
- __entry->vcpu_id, __entry->timer_index,
- __entry->msg_send_result)
-);
-
-/*
- * Tracepoint for stimer_cleanup.
- */
-TRACE_EVENT(kvm_hv_stimer_cleanup,
- TP_PROTO(int vcpu_id, int timer_index),
- TP_ARGS(vcpu_id, timer_index),
-
- TP_STRUCT__entry(
- __field(int, vcpu_id)
- __field(int, timer_index)
- ),
-
- TP_fast_assign(
- __entry->vcpu_id = vcpu_id;
- __entry->timer_index = timer_index;
- ),
-
- TP_printk("vcpu_id %d timer %d",
- __entry->vcpu_id, __entry->timer_index)
-);
-
-/*
- * Tracepoint for AMD AVIC
- */
-TRACE_EVENT(kvm_avic_incomplete_ipi,
- TP_PROTO(u32 vcpu, u32 icrh, u32 icrl, u32 id, u32 index),
- TP_ARGS(vcpu, icrh, icrl, id, index),
-
- TP_STRUCT__entry(
- __field(u32, vcpu)
- __field(u32, icrh)
- __field(u32, icrl)
- __field(u32, id)
- __field(u32, index)
- ),
-
- TP_fast_assign(
- __entry->vcpu = vcpu;
- __entry->icrh = icrh;
- __entry->icrl = icrl;
- __entry->id = id;
- __entry->index = index;
- ),
-
- TP_printk("vcpu=%u, icrh:icrl=%#010x:%08x, id=%u, index=%u\n",
- __entry->vcpu, __entry->icrh, __entry->icrl,
- __entry->id, __entry->index)
-);
-
-TRACE_EVENT(kvm_avic_unaccelerated_access,
- TP_PROTO(u32 vcpu, u32 offset, bool ft, bool rw, u32 vec),
- TP_ARGS(vcpu, offset, ft, rw, vec),
-
- TP_STRUCT__entry(
- __field(u32, vcpu)
- __field(u32, offset)
- __field(bool, ft)
- __field(bool, rw)
- __field(u32, vec)
- ),
-
- TP_fast_assign(
- __entry->vcpu = vcpu;
- __entry->offset = offset;
- __entry->ft = ft;
- __entry->rw = rw;
- __entry->vec = vec;
- ),
-
- TP_printk("vcpu=%u, offset=%#x(%s), %s, %s, vec=%#x\n",
- __entry->vcpu,
- __entry->offset,
- __print_symbolic(__entry->offset, kvm_trace_symbol_apic),
- __entry->ft ? "trap" : "fault",
- __entry->rw ? "write" : "read",
- __entry->vec)
-);
-
-TRACE_EVENT(kvm_hv_timer_state,
- TP_PROTO(unsigned int vcpu_id, unsigned int hv_timer_in_use),
- TP_ARGS(vcpu_id, hv_timer_in_use),
- TP_STRUCT__entry(
- __field(unsigned int, vcpu_id)
- __field(unsigned int, hv_timer_in_use)
- ),
- TP_fast_assign(
- __entry->vcpu_id = vcpu_id;
- __entry->hv_timer_in_use = hv_timer_in_use;
- ),
- TP_printk("vcpu_id %x hv_timer %x\n",
- __entry->vcpu_id,
- __entry->hv_timer_in_use)
-);
-#endif /* _TRACE_KVM_H */
-
-#undef TRACE_INCLUDE_PATH
-#define TRACE_INCLUDE_PATH arch/x86/kvm
-#undef TRACE_INCLUDE_FILE
-#define TRACE_INCLUDE_FILE trace
-
-/* This part must be outside protection */
-#include <trace/define_trace.h>
diff --git a/arch/x86/kvm/tss.h b/arch/x86/kvm/tss.h
index 622aa10..622aa10 100644..100755
--- a/arch/x86/kvm/tss.h
+++ b/arch/x86/kvm/tss.h
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 5382b82..4de8486 100644..100755
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -6,6 +6,7 @@
*
* Copyright (C) 2006 Qumranet, Inc.
* Copyright 2010 Red Hat, Inc. and/or its affiliates.
+ * Copyright 2019 Google LLC
*
* Authors:
* Avi Kivity <avi@qumranet.com>
@@ -22,639 +23,67 @@
#include "lapic.h"
#include <linux/kvm_host.h>
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/highmem.h>
-#include <linux/sched.h>
-#include <linux/moduleparam.h>
-#include <linux/mod_devicetable.h>
-#include <linux/trace_events.h>
-#include <linux/slab.h>
-#include <linux/tboot.h>
-#include <linux/hrtimer.h>
+#include <linux/list.h>
+#include <ntkrutils.h>
+#include <__asm.h>
#include "kvm_cache_regs.h"
#include "x86.h"
-
-#include <asm/cpu.h>
-#include <asm/io.h>
-#include <asm/desc.h>
#include <asm/vmx.h>
-#include <asm/virtext.h>
-#include <asm/mce.h>
-#include <asm/fpu/internal.h>
-#include <asm/perf_event.h>
-#include <asm/debugreg.h>
-#include <asm/kexec.h>
-#include <asm/apic.h>
-#include <asm/irq_remapping.h>
-
-#include "trace.h"
-#include "pmu.h"
-#define __ex(x) __kvm_handle_fault_on_reboot(x)
-#define __ex_clear(x, reg) \
- ____kvm_handle_fault_on_reboot(x, "xor " reg " , " reg)
+#include "pmu.h"
+// seperate struct definitions to vmx_def.h so that asmgen can include
+#include "vmx_def.h"
-MODULE_AUTHOR("Qumranet");
-MODULE_LICENSE("GPL");
+#pragma warning(disable : 4146)
+#pragma warning(disable : 4127)
+#pragma warning(disable : 4334)
-static const struct x86_cpu_id vmx_cpu_id[] = {
- X86_FEATURE_MATCH(X86_FEATURE_VMX),
- {}
-};
-MODULE_DEVICE_TABLE(x86cpu, vmx_cpu_id);
+#define DR6_RESERVED (0xFFFF0FF0)
-static bool __read_mostly enable_vpid = 1;
-module_param_named(vpid, enable_vpid, bool, 0444);
+static bool enable_vpid = 0;
-static bool __read_mostly flexpriority_enabled = 1;
-module_param_named(flexpriority, flexpriority_enabled, bool, S_IRUGO);
+static bool flexpriority_enabled = 1;
-static bool __read_mostly enable_ept = 1;
-module_param_named(ept, enable_ept, bool, S_IRUGO);
+static bool enable_ept = 1;
-static bool __read_mostly enable_unrestricted_guest = 1;
-module_param_named(unrestricted_guest,
- enable_unrestricted_guest, bool, S_IRUGO);
+static bool enable_unrestricted_guest = 1;
-static bool __read_mostly enable_ept_ad_bits = 1;
-module_param_named(eptad, enable_ept_ad_bits, bool, S_IRUGO);
+static bool enable_ept_ad_bits = 1;
-static bool __read_mostly emulate_invalid_guest_state = true;
-module_param(emulate_invalid_guest_state, bool, S_IRUGO);
+static bool emulate_invalid_guest_state = true;
-static bool __read_mostly vmm_exclusive = 1;
-module_param(vmm_exclusive, bool, S_IRUGO);
+static bool vmm_exclusive = 1;
-static bool __read_mostly fasteoi = 1;
-module_param(fasteoi, bool, S_IRUGO);
+static bool fasteoi = 1;
-static bool __read_mostly enable_apicv = 1;
-module_param(enable_apicv, bool, S_IRUGO);
+static bool enable_apicv = 1;
-static bool __read_mostly enable_shadow_vmcs = 1;
-module_param_named(enable_shadow_vmcs, enable_shadow_vmcs, bool, S_IRUGO);
+static bool enable_shadow_vmcs = 0;
/*
* If nested=1, nested virtualization is supported, i.e., guests may use
* VMX and be a hypervisor for its own guests. If nested=0, guests may not
* use VMX instructions.
*/
-static bool __read_mostly nested = 0;
-module_param(nested, bool, S_IRUGO);
-
-static u64 __read_mostly host_xss;
-
-static bool __read_mostly enable_pml = 1;
-module_param_named(pml, enable_pml, bool, S_IRUGO);
-
-#define KVM_VMX_TSC_MULTIPLIER_MAX 0xffffffffffffffffULL
-
-/* Guest_tsc -> host_tsc conversion requires 64-bit division. */
-static int __read_mostly cpu_preemption_timer_multi;
-static bool __read_mostly enable_preemption_timer = 1;
-#ifdef CONFIG_X86_64
-module_param_named(preemption_timer, enable_preemption_timer, bool, S_IRUGO);
-#endif
-
-#define KVM_GUEST_CR0_MASK (X86_CR0_NW | X86_CR0_CD)
-#define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST (X86_CR0_WP | X86_CR0_NE)
-#define KVM_VM_CR0_ALWAYS_ON \
- (KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST | X86_CR0_PG | X86_CR0_PE)
-#define KVM_CR4_GUEST_OWNED_BITS \
- (X86_CR4_PVI | X86_CR4_DE | X86_CR4_PCE | X86_CR4_OSFXSR \
- | X86_CR4_OSXMMEXCPT | X86_CR4_TSD)
-
-#define KVM_PMODE_VM_CR4_ALWAYS_ON (X86_CR4_PAE | X86_CR4_VMXE)
-#define KVM_RMODE_VM_CR4_ALWAYS_ON (X86_CR4_VME | X86_CR4_PAE | X86_CR4_VMXE)
-
-#define RMODE_GUEST_OWNED_EFLAGS_BITS (~(X86_EFLAGS_IOPL | X86_EFLAGS_VM))
-
-#define VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE 5
-
-/*
- * These 2 parameters are used to config the controls for Pause-Loop Exiting:
- * ple_gap: upper bound on the amount of time between two successive
- * executions of PAUSE in a loop. Also indicate if ple enabled.
- * According to test, this time is usually smaller than 128 cycles.
- * ple_window: upper bound on the amount of time a guest is allowed to execute
- * in a PAUSE loop. Tests indicate that most spinlocks are held for
- * less than 2^12 cycles
- * Time is measured based on a counter that runs at the same rate as the TSC,
- * refer SDM volume 3b section 21.6.13 & 22.1.3.
- */
-#define KVM_VMX_DEFAULT_PLE_GAP 128
-#define KVM_VMX_DEFAULT_PLE_WINDOW 4096
-#define KVM_VMX_DEFAULT_PLE_WINDOW_GROW 2
-#define KVM_VMX_DEFAULT_PLE_WINDOW_SHRINK 0
-#define KVM_VMX_DEFAULT_PLE_WINDOW_MAX \
- INT_MAX / KVM_VMX_DEFAULT_PLE_WINDOW_GROW
-
-static int ple_gap = KVM_VMX_DEFAULT_PLE_GAP;
-module_param(ple_gap, int, S_IRUGO);
-
-static int ple_window = KVM_VMX_DEFAULT_PLE_WINDOW;
-module_param(ple_window, int, S_IRUGO);
-
-/* Default doubles per-vcpu window every exit. */
-static int ple_window_grow = KVM_VMX_DEFAULT_PLE_WINDOW_GROW;
-module_param(ple_window_grow, int, S_IRUGO);
-
-/* Default resets per-vcpu window every exit to ple_window. */
-static int ple_window_shrink = KVM_VMX_DEFAULT_PLE_WINDOW_SHRINK;
-module_param(ple_window_shrink, int, S_IRUGO);
-
-/* Default is to compute the maximum so we can never overflow. */
-static int ple_window_actual_max = KVM_VMX_DEFAULT_PLE_WINDOW_MAX;
-static int ple_window_max = KVM_VMX_DEFAULT_PLE_WINDOW_MAX;
-module_param(ple_window_max, int, S_IRUGO);
-
-extern const ulong vmx_return;
-
-#define NR_AUTOLOAD_MSRS 8
-#define VMCS02_POOL_SIZE 1
-
-struct vmcs {
- u32 revision_id;
- u32 abort;
- char data[0];
-};
-
-/*
- * Track a VMCS that may be loaded on a certain CPU. If it is (cpu!=-1), also
- * remember whether it was VMLAUNCHed, and maintain a linked list of all VMCSs
- * loaded on this CPU (so we can clear them if the CPU goes down).
- */
-struct loaded_vmcs {
- struct vmcs *vmcs;
- struct vmcs *shadow_vmcs;
- int cpu;
- int launched;
- struct list_head loaded_vmcss_on_cpu_link;
-};
-
-struct shared_msr_entry {
- unsigned index;
- u64 data;
- u64 mask;
-};
-
-/*
- * struct vmcs12 describes the state that our guest hypervisor (L1) keeps for a
- * single nested guest (L2), hence the name vmcs12. Any VMX implementation has
- * a VMCS structure, and vmcs12 is our emulated VMX's VMCS. This structure is
- * stored in guest memory specified by VMPTRLD, but is opaque to the guest,
- * which must access it using VMREAD/VMWRITE/VMCLEAR instructions.
- * More than one of these structures may exist, if L1 runs multiple L2 guests.
- * nested_vmx_run() will use the data here to build a vmcs02: a VMCS for the
- * underlying hardware which will be used to run L2.
- * This structure is packed to ensure that its layout is identical across
- * machines (necessary for live migration).
- * If there are changes in this struct, VMCS12_REVISION must be changed.
- */
-typedef u64 natural_width;
-struct __packed vmcs12 {
- /* According to the Intel spec, a VMCS region must start with the
- * following two fields. Then follow implementation-specific data.
- */
- u32 revision_id;
- u32 abort;
-
- u32 launch_state; /* set to 0 by VMCLEAR, to 1 by VMLAUNCH */
- u32 padding[7]; /* room for future expansion */
-
- u64 io_bitmap_a;
- u64 io_bitmap_b;
- u64 msr_bitmap;
- u64 vm_exit_msr_store_addr;
- u64 vm_exit_msr_load_addr;
- u64 vm_entry_msr_load_addr;
- u64 tsc_offset;
- u64 virtual_apic_page_addr;
- u64 apic_access_addr;
- u64 posted_intr_desc_addr;
- u64 ept_pointer;
- u64 eoi_exit_bitmap0;
- u64 eoi_exit_bitmap1;
- u64 eoi_exit_bitmap2;
- u64 eoi_exit_bitmap3;
- u64 xss_exit_bitmap;
- u64 guest_physical_address;
- u64 vmcs_link_pointer;
- u64 guest_ia32_debugctl;
- u64 guest_ia32_pat;
- u64 guest_ia32_efer;
- u64 guest_ia32_perf_global_ctrl;
- u64 guest_pdptr0;
- u64 guest_pdptr1;
- u64 guest_pdptr2;
- u64 guest_pdptr3;
- u64 guest_bndcfgs;
- u64 host_ia32_pat;
- u64 host_ia32_efer;
- u64 host_ia32_perf_global_ctrl;
- u64 padding64[8]; /* room for future expansion */
- /*
- * To allow migration of L1 (complete with its L2 guests) between
- * machines of different natural widths (32 or 64 bit), we cannot have
- * unsigned long fields with no explict size. We use u64 (aliased
- * natural_width) instead. Luckily, x86 is little-endian.
- */
- natural_width cr0_guest_host_mask;
- natural_width cr4_guest_host_mask;
- natural_width cr0_read_shadow;
- natural_width cr4_read_shadow;
- natural_width cr3_target_value0;
- natural_width cr3_target_value1;
- natural_width cr3_target_value2;
- natural_width cr3_target_value3;
- natural_width exit_qualification;
- natural_width guest_linear_address;
- natural_width guest_cr0;
- natural_width guest_cr3;
- natural_width guest_cr4;
- natural_width guest_es_base;
- natural_width guest_cs_base;
- natural_width guest_ss_base;
- natural_width guest_ds_base;
- natural_width guest_fs_base;
- natural_width guest_gs_base;
- natural_width guest_ldtr_base;
- natural_width guest_tr_base;
- natural_width guest_gdtr_base;
- natural_width guest_idtr_base;
- natural_width guest_dr7;
- natural_width guest_rsp;
- natural_width guest_rip;
- natural_width guest_rflags;
- natural_width guest_pending_dbg_exceptions;
- natural_width guest_sysenter_esp;
- natural_width guest_sysenter_eip;
- natural_width host_cr0;
- natural_width host_cr3;
- natural_width host_cr4;
- natural_width host_fs_base;
- natural_width host_gs_base;
- natural_width host_tr_base;
- natural_width host_gdtr_base;
- natural_width host_idtr_base;
- natural_width host_ia32_sysenter_esp;
- natural_width host_ia32_sysenter_eip;
- natural_width host_rsp;
- natural_width host_rip;
- natural_width paddingl[8]; /* room for future expansion */
- u32 pin_based_vm_exec_control;
- u32 cpu_based_vm_exec_control;
- u32 exception_bitmap;
- u32 page_fault_error_code_mask;
- u32 page_fault_error_code_match;
- u32 cr3_target_count;
- u32 vm_exit_controls;
- u32 vm_exit_msr_store_count;
- u32 vm_exit_msr_load_count;
- u32 vm_entry_controls;
- u32 vm_entry_msr_load_count;
- u32 vm_entry_intr_info_field;
- u32 vm_entry_exception_error_code;
- u32 vm_entry_instruction_len;
- u32 tpr_threshold;
- u32 secondary_vm_exec_control;
- u32 vm_instruction_error;
- u32 vm_exit_reason;
- u32 vm_exit_intr_info;
- u32 vm_exit_intr_error_code;
- u32 idt_vectoring_info_field;
- u32 idt_vectoring_error_code;
- u32 vm_exit_instruction_len;
- u32 vmx_instruction_info;
- u32 guest_es_limit;
- u32 guest_cs_limit;
- u32 guest_ss_limit;
- u32 guest_ds_limit;
- u32 guest_fs_limit;
- u32 guest_gs_limit;
- u32 guest_ldtr_limit;
- u32 guest_tr_limit;
- u32 guest_gdtr_limit;
- u32 guest_idtr_limit;
- u32 guest_es_ar_bytes;
- u32 guest_cs_ar_bytes;
- u32 guest_ss_ar_bytes;
- u32 guest_ds_ar_bytes;
- u32 guest_fs_ar_bytes;
- u32 guest_gs_ar_bytes;
- u32 guest_ldtr_ar_bytes;
- u32 guest_tr_ar_bytes;
- u32 guest_interruptibility_info;
- u32 guest_activity_state;
- u32 guest_sysenter_cs;
- u32 host_ia32_sysenter_cs;
- u32 vmx_preemption_timer_value;
- u32 padding32[7]; /* room for future expansion */
- u16 virtual_processor_id;
- u16 posted_intr_nv;
- u16 guest_es_selector;
- u16 guest_cs_selector;
- u16 guest_ss_selector;
- u16 guest_ds_selector;
- u16 guest_fs_selector;
- u16 guest_gs_selector;
- u16 guest_ldtr_selector;
- u16 guest_tr_selector;
- u16 guest_intr_status;
- u16 host_es_selector;
- u16 host_cs_selector;
- u16 host_ss_selector;
- u16 host_ds_selector;
- u16 host_fs_selector;
- u16 host_gs_selector;
- u16 host_tr_selector;
-};
-
-/*
- * VMCS12_REVISION is an arbitrary id that should be changed if the content or
- * layout of struct vmcs12 is changed. MSR_IA32_VMX_BASIC returns this id, and
- * VMPTRLD verifies that the VMCS region that L1 is loading contains this id.
- */
-#define VMCS12_REVISION 0x11e57ed0
+static bool nested = 0;
-/*
- * VMCS12_SIZE is the number of bytes L1 should allocate for the VMXON region
- * and any VMCS region. Although only sizeof(struct vmcs12) are used by the
- * current implementation, 4K are reserved to avoid future complications.
- */
-#define VMCS12_SIZE 0x1000
+static u64 host_xss;
-/* Used to remember the last vmcs02 used for some recently used vmcs12s */
-struct vmcs02_list {
- struct list_head list;
- gpa_t vmptr;
- struct loaded_vmcs vmcs02;
-};
-
-/*
- * The nested_vmx structure is part of vcpu_vmx, and holds information we need
- * for correct emulation of VMX (i.e., nested VMX) on this vcpu.
- */
-struct nested_vmx {
- /* Has the level1 guest done vmxon? */
- bool vmxon;
- gpa_t vmxon_ptr;
-
- /* The guest-physical address of the current VMCS L1 keeps for L2 */
- gpa_t current_vmptr;
- /* The host-usable pointer to the above */
- struct page *current_vmcs12_page;
- struct vmcs12 *current_vmcs12;
- /*
- * Cache of the guest's VMCS, existing outside of guest memory.
- * Loaded from guest memory during VMPTRLD. Flushed to guest
- * memory during VMXOFF, VMCLEAR, VMPTRLD.
- */
- struct vmcs12 *cached_vmcs12;
- /*
- * Indicates if the shadow vmcs must be updated with the
- * data hold by vmcs12
- */
- bool sync_shadow_vmcs;
-
- /* vmcs02_list cache of VMCSs recently used to run L2 guests */
- struct list_head vmcs02_pool;
- int vmcs02_num;
- bool change_vmcs01_virtual_x2apic_mode;
- /* L2 must run next, and mustn't decide to exit to L1. */
- bool nested_run_pending;
- /*
- * Guest pages referred to in vmcs02 with host-physical pointers, so
- * we must keep them pinned while L2 runs.
- */
- struct page *apic_access_page;
- struct page *virtual_apic_page;
- struct page *pi_desc_page;
- struct pi_desc *pi_desc;
- bool pi_pending;
- u16 posted_intr_nv;
-
- unsigned long *msr_bitmap;
-
- struct hrtimer preemption_timer;
- bool preemption_timer_expired;
-
- /* to migrate it to L2 if VM_ENTRY_LOAD_DEBUG_CONTROLS is off */
- u64 vmcs01_debugctl;
-
- u16 vpid02;
- u16 last_vpid;
-
- u32 nested_vmx_procbased_ctls_low;
- u32 nested_vmx_procbased_ctls_high;
- u32 nested_vmx_true_procbased_ctls_low;
- u32 nested_vmx_secondary_ctls_low;
- u32 nested_vmx_secondary_ctls_high;
- u32 nested_vmx_pinbased_ctls_low;
- u32 nested_vmx_pinbased_ctls_high;
- u32 nested_vmx_exit_ctls_low;
- u32 nested_vmx_exit_ctls_high;
- u32 nested_vmx_true_exit_ctls_low;
- u32 nested_vmx_entry_ctls_low;
- u32 nested_vmx_entry_ctls_high;
- u32 nested_vmx_true_entry_ctls_low;
- u32 nested_vmx_misc_low;
- u32 nested_vmx_misc_high;
- u32 nested_vmx_ept_caps;
- u32 nested_vmx_vpid_caps;
-};
-
-#define POSTED_INTR_ON 0
-#define POSTED_INTR_SN 1
-
-/* Posted-Interrupt Descriptor */
-struct pi_desc {
- u32 pir[8]; /* Posted interrupt requested */
- union {
- struct {
- /* bit 256 - Outstanding Notification */
- u16 on : 1,
- /* bit 257 - Suppress Notification */
- sn : 1,
- /* bit 271:258 - Reserved */
- rsvd_1 : 14;
- /* bit 279:272 - Notification Vector */
- u8 nv;
- /* bit 287:280 - Reserved */
- u8 rsvd_2;
- /* bit 319:288 - Notification Destination */
- u32 ndst;
- };
- u64 control;
- };
- u32 rsvd[6];
-} __aligned(64);
-
-static bool pi_test_and_set_on(struct pi_desc *pi_desc)
-{
- return test_and_set_bit(POSTED_INTR_ON,
- (unsigned long *)&pi_desc->control);
-}
-
-static bool pi_test_and_clear_on(struct pi_desc *pi_desc)
-{
- return test_and_clear_bit(POSTED_INTR_ON,
- (unsigned long *)&pi_desc->control);
-}
-
-static int pi_test_and_set_pir(int vector, struct pi_desc *pi_desc)
-{
- return test_and_set_bit(vector, (unsigned long *)pi_desc->pir);
-}
+static bool enable_pml = 0;
-static inline void pi_clear_sn(struct pi_desc *pi_desc)
-{
- return clear_bit(POSTED_INTR_SN,
- (unsigned long *)&pi_desc->control);
-}
-
-static inline void pi_set_sn(struct pi_desc *pi_desc)
-{
- return set_bit(POSTED_INTR_SN,
- (unsigned long *)&pi_desc->control);
-}
-
-static inline int pi_test_on(struct pi_desc *pi_desc)
-{
- return test_bit(POSTED_INTR_ON,
- (unsigned long *)&pi_desc->control);
-}
-
-static inline int pi_test_sn(struct pi_desc *pi_desc)
-{
- return test_bit(POSTED_INTR_SN,
- (unsigned long *)&pi_desc->control);
-}
-
-struct vcpu_vmx {
- struct kvm_vcpu vcpu;
- unsigned long host_rsp;
- u8 fail;
- bool nmi_known_unmasked;
- u32 exit_intr_info;
- u32 idt_vectoring_info;
- ulong rflags;
- struct shared_msr_entry *guest_msrs;
- int nmsrs;
- int save_nmsrs;
- unsigned long host_idt_base;
-#ifdef CONFIG_X86_64
- u64 msr_host_kernel_gs_base;
- u64 msr_guest_kernel_gs_base;
-#endif
- u32 vm_entry_controls_shadow;
- u32 vm_exit_controls_shadow;
- /*
- * loaded_vmcs points to the VMCS currently used in this vcpu. For a
- * non-nested (L1) guest, it always points to vmcs01. For a nested
- * guest (L2), it points to a different VMCS.
- */
- struct loaded_vmcs vmcs01;
- struct loaded_vmcs *loaded_vmcs;
- bool __launched; /* temporary, used in vmx_vcpu_run */
- struct msr_autoload {
- unsigned nr;
- struct vmx_msr_entry guest[NR_AUTOLOAD_MSRS];
- struct vmx_msr_entry host[NR_AUTOLOAD_MSRS];
- } msr_autoload;
- struct {
- int loaded;
- u16 fs_sel, gs_sel, ldt_sel;
-#ifdef CONFIG_X86_64
- u16 ds_sel, es_sel;
-#endif
- int gs_ldt_reload_needed;
- int fs_reload_needed;
- u64 msr_host_bndcfgs;
- unsigned long vmcs_host_cr4; /* May not match real cr4 */
- } host_state;
- struct {
- int vm86_active;
- ulong save_rflags;
- struct kvm_segment segs[8];
- } rmode;
- struct {
- u32 bitmask; /* 4 bits per segment (1 bit per field) */
- struct kvm_save_segment {
- u16 selector;
- unsigned long base;
- u32 limit;
- u32 ar;
- } seg[8];
- } segment_cache;
- int vpid;
- bool emulation_required;
-
- /* Support for vnmi-less CPUs */
- int soft_vnmi_blocked;
- ktime_t entry_time;
- s64 vnmi_blocked_time;
- u32 exit_reason;
-
- /* Posted interrupt descriptor */
- struct pi_desc pi_desc;
-
- /* Support for a guest hypervisor (nested VMX) */
- struct nested_vmx nested;
-
- /* Dynamic PLE window. */
- int ple_window;
- bool ple_window_dirty;
-
- /* Support for PML */
-#define PML_ENTITY_NUM 512
- struct page *pml_pg;
-
- /* apic deadline value in host tsc */
- u64 hv_deadline_tsc;
-
- u64 current_tsc_ratio;
-
- bool guest_pkru_valid;
- u32 guest_pkru;
- u32 host_pkru;
-
- /*
- * Only bits masked by msr_ia32_feature_control_valid_bits can be set in
- * msr_ia32_feature_control. FEATURE_CONTROL_LOCKED is always included
- * in msr_ia32_feature_control_valid_bits.
- */
- u64 msr_ia32_feature_control;
- u64 msr_ia32_feature_control_valid_bits;
-};
-
-enum segment_cache_field {
- SEG_FIELD_SEL = 0,
- SEG_FIELD_BASE = 1,
- SEG_FIELD_LIMIT = 2,
- SEG_FIELD_AR = 3,
-
- SEG_FIELD_NR = 4
-};
+extern const size_t vmx_return;
static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu)
{
return container_of(vcpu, struct vcpu_vmx, vcpu);
}
-static struct pi_desc *vcpu_to_pi_desc(struct kvm_vcpu *vcpu)
-{
- return &(to_vmx(vcpu)->pi_desc);
-}
-
#define VMCS12_OFFSET(x) offsetof(struct vmcs12, x)
#define FIELD(number, name) [number] = VMCS12_OFFSET(name)
#define FIELD64(number, name) [number] = VMCS12_OFFSET(name), \
[number##_HIGH] = VMCS12_OFFSET(name)+4
-static unsigned long shadow_read_only_fields[] = {
+static size_t shadow_read_only_fields[] = {
/*
* We do NOT shadow fields that are modified when L0
* traps and emulates any vmx instruction (e.g. VMPTRLD,
@@ -680,7 +109,7 @@ static unsigned long shadow_read_only_fields[] = {
static int max_shadow_read_only_fields =
ARRAY_SIZE(shadow_read_only_fields);
-static unsigned long shadow_read_write_fields[] = {
+static size_t shadow_read_write_fields[] = {
TPR_THRESHOLD,
GUEST_RIP,
GUEST_RSP,
@@ -853,10 +282,8 @@ static const unsigned short vmcs_field_to_offset_table[] = {
FIELD(HOST_RIP, host_rip),
};
-static inline short vmcs_field_to_offset(unsigned long field)
+static inline short vmcs_field_to_offset(size_t field)
{
- BUILD_BUG_ON(ARRAY_SIZE(vmcs_field_to_offset_table) > SHRT_MAX);
-
if (field >= ARRAY_SIZE(vmcs_field_to_offset_table) ||
vmcs_field_to_offset_table[field] == 0)
return -ENOENT;
@@ -869,27 +296,31 @@ static inline struct vmcs12 *get_vmcs12(struct kvm_vcpu *vcpu)
return to_vmx(vcpu)->nested.cached_vmcs12;
}
-static struct page *nested_get_page(struct kvm_vcpu *vcpu, gpa_t addr)
+static PMDL nested_get_page(struct kvm_vcpu *vcpu, gpa_t addr)
{
- struct page *page = kvm_vcpu_gfn_to_page(vcpu, addr >> PAGE_SHIFT);
- if (is_error_page(page))
+ PMDL mdl;
+ size_t hva;
+
+ hva = kvm_vcpu_gfn_to_hva(vcpu, addr >> PAGE_SHIFT);
+ if (kvm_is_error_hva(hva))
return NULL;
- return page;
-}
+ mdl = IoAllocateMdl((void *)hva, PAGE_SIZE, FALSE, FALSE, NULL);
+ if (!mdl)
+ return NULL;
-static void nested_release_page(struct page *page)
-{
- kvm_release_page_dirty(page);
+ MmProbeAndLockPages(mdl, KernelMode, IoWriteAccess);
+
+ return mdl;
}
-static void nested_release_page_clean(struct page *page)
+static void nested_release_page(PMDL mdl)
{
- kvm_release_page_clean(page);
+ kvm_release_page(mdl);
}
-static unsigned long nested_ept_get_cr3(struct kvm_vcpu *vcpu);
-static u64 construct_eptp(unsigned long root_hpa);
+static size_t nested_ept_get_cr3(struct kvm_vcpu *vcpu);
+static u64 construct_eptp(size_t root_hpa);
static void kvm_cpu_vmxon(u64 addr);
static void kvm_cpu_vmxoff(void);
static bool vmx_xsaves_supported(void);
@@ -904,14 +335,10 @@ static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx);
static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx);
static int alloc_identity_pagetable(struct kvm *kvm);
+
static DEFINE_PER_CPU(struct vmcs *, vmxarea);
-static DEFINE_PER_CPU(struct vmcs *, current_vmcs);
-/*
- * We maintain a per-CPU linked-list of VMCS loaded on that CPU. This is needed
- * when a CPU is brought down, and we need to VMCLEAR all VMCSs loaded on it.
- */
-static DEFINE_PER_CPU(struct list_head, loaded_vmcss_on_cpu);
static DEFINE_PER_CPU(struct desc_ptr, host_gdt);
+static DEFINE_PER_CPU(struct desc_ptr, host_idt);
/*
* We maintian a per-CPU linked-list of vCPU, so in wakeup_handler() we
@@ -920,16 +347,16 @@ static DEFINE_PER_CPU(struct desc_ptr, host_gdt);
static DEFINE_PER_CPU(struct list_head, blocked_vcpu_on_cpu);
static DEFINE_PER_CPU(spinlock_t, blocked_vcpu_on_cpu_lock);
-static unsigned long *vmx_io_bitmap_a;
-static unsigned long *vmx_io_bitmap_b;
-static unsigned long *vmx_msr_bitmap_legacy;
-static unsigned long *vmx_msr_bitmap_longmode;
-static unsigned long *vmx_msr_bitmap_legacy_x2apic;
-static unsigned long *vmx_msr_bitmap_longmode_x2apic;
-static unsigned long *vmx_msr_bitmap_legacy_x2apic_apicv_inactive;
-static unsigned long *vmx_msr_bitmap_longmode_x2apic_apicv_inactive;
-static unsigned long *vmx_vmread_bitmap;
-static unsigned long *vmx_vmwrite_bitmap;
+static size_t *vmx_io_bitmap_a;
+static size_t *vmx_io_bitmap_b;
+static size_t *vmx_msr_bitmap_legacy;
+static size_t *vmx_msr_bitmap_longmode;
+static size_t *vmx_msr_bitmap_legacy_x2apic;
+static size_t *vmx_msr_bitmap_longmode_x2apic;
+static size_t *vmx_msr_bitmap_legacy_x2apic_apicv_inactive;
+static size_t *vmx_msr_bitmap_longmode_x2apic_apicv_inactive;
+static size_t *vmx_vmread_bitmap;
+static size_t *vmx_vmwrite_bitmap;
static bool cpu_has_load_ia32_efer;
static bool cpu_has_load_perf_global_ctrl;
@@ -982,17 +409,6 @@ static u64 host_efer;
static void ept_save_pdptrs(struct kvm_vcpu *vcpu);
-/*
- * Keep MSR_STAR at the end, as setup_msrs() will try to optimize it
- * away by decrementing the array size.
- */
-static const u32 vmx_msr_index[] = {
-#ifdef CONFIG_X86_64
- MSR_SYSCALL_MASK, MSR_LSTAR, MSR_CSTAR,
-#endif
- MSR_EFER, MSR_TSC_AUX, MSR_STAR,
-};
-
static inline bool is_exception_n(u32 intr_info, u8 vector)
{
return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK |
@@ -1015,11 +431,6 @@ static inline bool is_page_fault(u32 intr_info)
return is_exception_n(intr_info, PF_VECTOR);
}
-static inline bool is_no_device(u32 intr_info)
-{
- return is_exception_n(intr_info, NM_VECTOR);
-}
-
static inline bool is_invalid_opcode(u32 intr_info)
{
return is_exception_n(intr_info, UD_VECTOR);
@@ -1083,69 +494,10 @@ static inline bool cpu_has_vmx_virtual_intr_delivery(void)
SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY;
}
-/*
- * Comment's format: document - errata name - stepping - processor name.
- * Refer from
- * https://www.virtualbox.org/svn/vbox/trunk/src/VBox/VMM/VMMR0/HMR0.cpp
- */
-static u32 vmx_preemption_cpu_tfms[] = {
-/* 323344.pdf - BA86 - D0 - Xeon 7500 Series */
-0x000206E6,
-/* 323056.pdf - AAX65 - C2 - Xeon L3406 */
-/* 322814.pdf - AAT59 - C2 - i7-600, i5-500, i5-400 and i3-300 Mobile */
-/* 322911.pdf - AAU65 - C2 - i5-600, i3-500 Desktop and Pentium G6950 */
-0x00020652,
-/* 322911.pdf - AAU65 - K0 - i5-600, i3-500 Desktop and Pentium G6950 */
-0x00020655,
-/* 322373.pdf - AAO95 - B1 - Xeon 3400 Series */
-/* 322166.pdf - AAN92 - B1 - i7-800 and i5-700 Desktop */
-/*
- * 320767.pdf - AAP86 - B1 -
- * i7-900 Mobile Extreme, i7-800 and i7-700 Mobile
- */
-0x000106E5,
-/* 321333.pdf - AAM126 - C0 - Xeon 3500 */
-0x000106A0,
-/* 321333.pdf - AAM126 - C1 - Xeon 3500 */
-0x000106A1,
-/* 320836.pdf - AAJ124 - C0 - i7-900 Desktop Extreme and i7-900 Desktop */
-0x000106A4,
- /* 321333.pdf - AAM126 - D0 - Xeon 3500 */
- /* 321324.pdf - AAK139 - D0 - Xeon 5500 */
- /* 320836.pdf - AAJ124 - D0 - i7-900 Extreme and i7-900 Desktop */
-0x000106A5,
-};
-
-static inline bool cpu_has_broken_vmx_preemption_timer(void)
-{
- u32 eax = cpuid_eax(0x00000001), i;
-
- /* Clear the reserved bits */
- eax &= ~(0x3U << 14 | 0xfU << 28);
- for (i = 0; i < ARRAY_SIZE(vmx_preemption_cpu_tfms); i++)
- if (eax == vmx_preemption_cpu_tfms[i])
- return true;
-
- return false;
-}
-
-static inline bool cpu_has_vmx_preemption_timer(void)
-{
- return vmcs_config.pin_based_exec_ctrl &
- PIN_BASED_VMX_PREEMPTION_TIMER;
-}
-
-static inline bool cpu_has_vmx_posted_intr(void)
-{
- return IS_ENABLED(CONFIG_X86_LOCAL_APIC) &&
- vmcs_config.pin_based_exec_ctrl & PIN_BASED_POSTED_INTR;
-}
-
static inline bool cpu_has_vmx_apicv(void)
{
return cpu_has_vmx_apic_register_virt() &&
- cpu_has_vmx_virtual_intr_delivery() &&
- cpu_has_vmx_posted_intr();
+ cpu_has_vmx_virtual_intr_delivery();
}
static inline bool cpu_has_vmx_flexpriority(void)
@@ -1211,12 +563,6 @@ static inline bool cpu_has_vmx_unrestricted_guest(void)
SECONDARY_EXEC_UNRESTRICTED_GUEST;
}
-static inline bool cpu_has_vmx_ple(void)
-{
- return vmcs_config.cpu_based_2nd_exec_ctrl &
- SECONDARY_EXEC_PAUSE_LOOP_EXITING;
-}
-
static inline bool cpu_has_vmx_basic_inout(void)
{
return (((u64)vmcs_config.basic_cap << 32) & VMX_BASIC_INOUT);
@@ -1273,12 +619,6 @@ static inline bool cpu_has_vmx_pml(void)
return vmcs_config.cpu_based_2nd_exec_ctrl & SECONDARY_EXEC_ENABLE_PML;
}
-static inline bool cpu_has_vmx_tsc_scaling(void)
-{
- return vmcs_config.cpu_based_2nd_exec_ctrl &
- SECONDARY_EXEC_TSC_SCALING;
-}
-
static inline bool report_flexpriority(void)
{
return flexpriority_enabled;
@@ -1301,12 +641,6 @@ static inline bool nested_cpu_has_virtual_nmis(struct vmcs12 *vmcs12)
return vmcs12->pin_based_vm_exec_control & PIN_BASED_VIRTUAL_NMIS;
}
-static inline bool nested_cpu_has_preemption_timer(struct vmcs12 *vmcs12)
-{
- return vmcs12->pin_based_vm_exec_control &
- PIN_BASED_VMX_PREEMPTION_TIMER;
-}
-
static inline int nested_cpu_has_ept(struct vmcs12 *vmcs12)
{
return nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_EPT);
@@ -1338,11 +672,6 @@ static inline bool nested_cpu_has_vid(struct vmcs12 *vmcs12)
return nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY);
}
-static inline bool nested_cpu_has_posted_intr(struct vmcs12 *vmcs12)
-{
- return vmcs12->pin_based_vm_exec_control & PIN_BASED_POSTED_INTR;
-}
-
static inline bool is_exception(u32 intr_info)
{
return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK))
@@ -1351,65 +680,20 @@ static inline bool is_exception(u32 intr_info)
static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
u32 exit_intr_info,
- unsigned long exit_qualification);
+ size_t exit_qualification);
static void nested_vmx_entry_failure(struct kvm_vcpu *vcpu,
struct vmcs12 *vmcs12,
- u32 reason, unsigned long qualification);
-
-static int __find_msr_index(struct vcpu_vmx *vmx, u32 msr)
-{
- int i;
-
- for (i = 0; i < vmx->nmsrs; ++i)
- if (vmx_msr_index[vmx->guest_msrs[i].index] == msr)
- return i;
- return -1;
-}
+ u32 reason, size_t qualification);
-static inline void __invvpid(int ext, u16 vpid, gva_t gva)
-{
- struct {
- u64 vpid : 16;
- u64 rsvd : 48;
- u64 gva;
- } operand = { vpid, 0, gva };
-
- asm volatile (__ex(ASM_VMX_INVVPID)
- /* CF==1 or ZF==1 --> rc = -1 */
- "; ja 1f ; ud2 ; 1:"
- : : "a"(&operand), "c"(ext) : "cc", "memory");
-}
-
-static inline void __invept(int ext, u64 eptp, gpa_t gpa)
-{
- struct {
- u64 eptp, gpa;
- } operand = {eptp, gpa};
-
- asm volatile (__ex(ASM_VMX_INVEPT)
- /* CF==1 or ZF==1 --> rc = -1 */
- "; ja 1f ; ud2 ; 1:\n"
- : : "a" (&operand), "c" (ext) : "cc", "memory");
-}
-
-static struct shared_msr_entry *find_msr_entry(struct vcpu_vmx *vmx, u32 msr)
-{
- int i;
-
- i = __find_msr_index(vmx, msr);
- if (i >= 0)
- return &vmx->guest_msrs[i];
- return NULL;
-}
+#define __invvpid(a, b, c)
+#define __invept(a, b, c)
static void vmcs_clear(struct vmcs *vmcs)
{
u64 phys_addr = __pa(vmcs);
u8 error;
- asm volatile (__ex(ASM_VMX_VMCLEAR_RAX) "; setna %0"
- : "=qm"(error) : "a"(&phys_addr), "m"(phys_addr)
- : "cc", "memory");
+ error = __vmx_vmclear(&phys_addr);
if (error)
printk(KERN_ERR "kvm: vmclear fail: %p/%llx\n",
vmcs, phys_addr);
@@ -1429,85 +713,12 @@ static void vmcs_load(struct vmcs *vmcs)
u64 phys_addr = __pa(vmcs);
u8 error;
- asm volatile (__ex(ASM_VMX_VMPTRLD_RAX) "; setna %0"
- : "=qm"(error) : "a"(&phys_addr), "m"(phys_addr)
- : "cc", "memory");
- if (error)
+ error = __vmx_vmptrld(&phys_addr);
+ if (error) {
+ DbgBreakPoint();
printk(KERN_ERR "kvm: vmptrld %p/%llx failed\n",
- vmcs, phys_addr);
-}
-
-#ifdef CONFIG_KEXEC_CORE
-/*
- * This bitmap is used to indicate whether the vmclear
- * operation is enabled on all cpus. All disabled by
- * default.
- */
-static cpumask_t crash_vmclear_enabled_bitmap = CPU_MASK_NONE;
-
-static inline void crash_enable_local_vmclear(int cpu)
-{
- cpumask_set_cpu(cpu, &crash_vmclear_enabled_bitmap);
-}
-
-static inline void crash_disable_local_vmclear(int cpu)
-{
- cpumask_clear_cpu(cpu, &crash_vmclear_enabled_bitmap);
-}
-
-static inline int crash_local_vmclear_enabled(int cpu)
-{
- return cpumask_test_cpu(cpu, &crash_vmclear_enabled_bitmap);
-}
-
-static void crash_vmclear_local_loaded_vmcss(void)
-{
- int cpu = raw_smp_processor_id();
- struct loaded_vmcs *v;
-
- if (!crash_local_vmclear_enabled(cpu))
- return;
-
- list_for_each_entry(v, &per_cpu(loaded_vmcss_on_cpu, cpu),
- loaded_vmcss_on_cpu_link)
- vmcs_clear(v->vmcs);
-}
-#else
-static inline void crash_enable_local_vmclear(int cpu) { }
-static inline void crash_disable_local_vmclear(int cpu) { }
-#endif /* CONFIG_KEXEC_CORE */
-
-static void __loaded_vmcs_clear(void *arg)
-{
- struct loaded_vmcs *loaded_vmcs = arg;
- int cpu = raw_smp_processor_id();
-
- if (loaded_vmcs->cpu != cpu)
- return; /* vcpu migration can race with cpu offline */
- if (per_cpu(current_vmcs, cpu) == loaded_vmcs->vmcs)
- per_cpu(current_vmcs, cpu) = NULL;
- crash_disable_local_vmclear(cpu);
- list_del(&loaded_vmcs->loaded_vmcss_on_cpu_link);
-
- /*
- * we should ensure updating loaded_vmcs->loaded_vmcss_on_cpu_link
- * is before setting loaded_vmcs->vcpu to -1 which is done in
- * loaded_vmcs_init. Otherwise, other cpu can see vcpu = -1 fist
- * then adds the vmcs into percpu list before it is deleted.
- */
- smp_wmb();
-
- loaded_vmcs_init(loaded_vmcs);
- crash_enable_local_vmclear(cpu);
-}
-
-static void loaded_vmcs_clear(struct loaded_vmcs *loaded_vmcs)
-{
- int cpu = loaded_vmcs->cpu;
-
- if (cpu != -1)
- smp_call_function_single(cpu,
- __loaded_vmcs_clear, loaded_vmcs, 1);
+ vmcs, phys_addr);
+ }
}
static inline void vpid_sync_vcpu_single(int vpid)
@@ -1549,154 +760,118 @@ static inline void ept_sync_context(u64 eptp)
}
}
-static __always_inline void vmcs_check16(unsigned long field)
-{
- BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6001) == 0x2000,
- "16-bit accessor invalid for 64-bit field");
- BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6001) == 0x2001,
- "16-bit accessor invalid for 64-bit high field");
- BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x4000,
- "16-bit accessor invalid for 32-bit high field");
- BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x6000,
- "16-bit accessor invalid for natural width field");
-}
+#define VMCS_RW_DEBUG
-static __always_inline void vmcs_check32(unsigned long field)
-{
- BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0,
- "32-bit accessor invalid for 16-bit field");
- BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x6000,
- "32-bit accessor invalid for natural width field");
-}
+static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu);
+static void vmx_vcpu_put(struct kvm_vcpu *vcpu);
-static __always_inline void vmcs_check64(unsigned long field)
+static __forceinline size_t __vmcs_readl(struct kvm_vcpu* vcpu, size_t field)
{
- BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0,
- "64-bit accessor invalid for 16-bit field");
- BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6001) == 0x2001,
- "64-bit accessor invalid for 64-bit high field");
- BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x4000,
- "64-bit accessor invalid for 32-bit field");
- BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x6000,
- "64-bit accessor invalid for natural width field");
-}
+ size_t value;
-static __always_inline void vmcs_checkl(unsigned long field)
-{
- BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0,
- "Natural width accessor invalid for 16-bit field");
- BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6001) == 0x2000,
- "Natural width accessor invalid for 64-bit field");
- BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6001) == 0x2001,
- "Natural width accessor invalid for 64-bit high field");
- BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x4000,
- "Natural width accessor invalid for 32-bit field");
-}
+ preempt_disable();
+ vmcs_load(to_vmx(vcpu)->loaded_vmcs->vmcs);
-static __always_inline unsigned long __vmcs_readl(unsigned long field)
-{
- unsigned long value;
+ __vmx_vmread(field, &value);
+
+ vmcs_clear(to_vmx(vcpu)->loaded_vmcs->vmcs);
+ preempt_enable();
- asm volatile (__ex_clear(ASM_VMX_VMREAD_RDX_RAX, "%0")
- : "=a"(value) : "d"(field) : "cc");
return value;
}
-static __always_inline u16 vmcs_read16(unsigned long field)
+static __forceinline u16 vmcs_read16(struct kvm_vcpu* vcpu, size_t field)
{
- vmcs_check16(field);
- return __vmcs_readl(field);
+ return __vmcs_readl(vcpu, field);
}
-static __always_inline u32 vmcs_read32(unsigned long field)
+static __forceinline u32 vmcs_read32(struct kvm_vcpu* vcpu, size_t field)
{
- vmcs_check32(field);
- return __vmcs_readl(field);
+ return __vmcs_readl(vcpu, field);
}
-static __always_inline u64 vmcs_read64(unsigned long field)
+static __forceinline u64 vmcs_read64(struct kvm_vcpu* vcpu, size_t field)
{
- vmcs_check64(field);
#ifdef CONFIG_X86_64
- return __vmcs_readl(field);
+ return __vmcs_readl(vcpu, field);
#else
return __vmcs_readl(field) | ((u64)__vmcs_readl(field+1) << 32);
#endif
}
-static __always_inline unsigned long vmcs_readl(unsigned long field)
+static __forceinline size_t vmcs_readl(struct kvm_vcpu* vcpu, size_t field)
{
- vmcs_checkl(field);
- return __vmcs_readl(field);
+ return __vmcs_readl(vcpu, field);
}
-static noinline void vmwrite_error(unsigned long field, unsigned long value)
+static __declspec(noinline) void vmwrite_error(struct kvm_vcpu* vcpu, size_t field, size_t value)
{
printk(KERN_ERR "vmwrite error: reg %lx value %lx (err %d)\n",
- field, value, vmcs_read32(VM_INSTRUCTION_ERROR));
+ field, value, vmcs_read32(vcpu, VM_INSTRUCTION_ERROR));
+#if 0
dump_stack();
+#endif
}
-static __always_inline void __vmcs_writel(unsigned long field, unsigned long value)
+static __always_inline void __vmcs_writel(struct kvm_vcpu* vcpu, size_t field, size_t value)
{
u8 error;
- asm volatile (__ex(ASM_VMX_VMWRITE_RAX_RDX) "; setna %0"
- : "=q"(error) : "a"(value), "d"(field) : "cc");
- if (unlikely(error))
- vmwrite_error(field, value);
+ preempt_disable();
+ vmcs_load(to_vmx(vcpu)->loaded_vmcs->vmcs);
+
+ error = __vmx_vmwrite(field, value);
+ if (unlikely(error)) {
+ DbgBreakPoint();
+ vmwrite_error(vcpu, field, value);
+ }
+
+ vmcs_clear(to_vmx(vcpu)->loaded_vmcs->vmcs);
+ preempt_enable();
}
-static __always_inline void vmcs_write16(unsigned long field, u16 value)
+static __always_inline void vmcs_write16(struct kvm_vcpu* vcpu, size_t field, u16 value)
{
- vmcs_check16(field);
- __vmcs_writel(field, value);
+ __vmcs_writel(vcpu, field, value);
}
-static __always_inline void vmcs_write32(unsigned long field, u32 value)
+static __always_inline void vmcs_write32(struct kvm_vcpu* vcpu, size_t field, u32 value)
{
- vmcs_check32(field);
- __vmcs_writel(field, value);
+ __vmcs_writel(vcpu, field, value);
}
-static __always_inline void vmcs_write64(unsigned long field, u64 value)
+static __always_inline void vmcs_write64(struct kvm_vcpu* vcpu, size_t field, u64 value)
{
- vmcs_check64(field);
- __vmcs_writel(field, value);
+ __vmcs_writel(vcpu, field, value);
#ifndef CONFIG_X86_64
asm volatile ("");
__vmcs_writel(field+1, value >> 32);
#endif
}
-static __always_inline void vmcs_writel(unsigned long field, unsigned long value)
+static __always_inline void vmcs_writel(struct kvm_vcpu* vcpu, size_t field, size_t value)
{
- vmcs_checkl(field);
- __vmcs_writel(field, value);
+ __vmcs_writel(vcpu, field, value);
}
-static __always_inline void vmcs_clear_bits(unsigned long field, u32 mask)
+static __always_inline void vmcs_clear_bits(struct kvm_vcpu* vcpu, size_t field, u32 mask)
{
- BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x2000,
- "vmcs_clear_bits does not support 64-bit fields");
- __vmcs_writel(field, __vmcs_readl(field) & ~mask);
+ __vmcs_writel(vcpu, field, __vmcs_readl(vcpu, field) & ~mask);
}
-static __always_inline void vmcs_set_bits(unsigned long field, u32 mask)
+static __always_inline void vmcs_set_bits(struct kvm_vcpu* vcpu, size_t field, u32 mask)
{
- BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x2000,
- "vmcs_set_bits does not support 64-bit fields");
- __vmcs_writel(field, __vmcs_readl(field) | mask);
+ __vmcs_writel(vcpu, field, __vmcs_readl(vcpu, field) | mask);
}
static inline void vm_entry_controls_reset_shadow(struct vcpu_vmx *vmx)
{
- vmx->vm_entry_controls_shadow = vmcs_read32(VM_ENTRY_CONTROLS);
+ vmx->vm_entry_controls_shadow = vmcs_read32(&vmx->vcpu, VM_ENTRY_CONTROLS);
}
static inline void vm_entry_controls_init(struct vcpu_vmx *vmx, u32 val)
{
- vmcs_write32(VM_ENTRY_CONTROLS, val);
+ vmcs_write32(&vmx->vcpu, VM_ENTRY_CONTROLS, val);
vmx->vm_entry_controls_shadow = val;
}
@@ -1724,12 +899,12 @@ static inline void vm_entry_controls_clearbit(struct vcpu_vmx *vmx, u32 val)
static inline void vm_exit_controls_reset_shadow(struct vcpu_vmx *vmx)
{
- vmx->vm_exit_controls_shadow = vmcs_read32(VM_EXIT_CONTROLS);
+ vmx->vm_exit_controls_shadow = vmcs_read32(&vmx->vcpu, VM_EXIT_CONTROLS);
}
static inline void vm_exit_controls_init(struct vcpu_vmx *vmx, u32 val)
{
- vmcs_write32(VM_EXIT_CONTROLS, val);
+ vmcs_write32(&vmx->vcpu, VM_EXIT_CONTROLS, val);
vmx->vm_exit_controls_shadow = val;
}
@@ -1780,7 +955,7 @@ static u16 vmx_read_guest_seg_selector(struct vcpu_vmx *vmx, unsigned seg)
u16 *p = &vmx->segment_cache.seg[seg].selector;
if (!vmx_segment_cache_test_set(vmx, seg, SEG_FIELD_SEL))
- *p = vmcs_read16(kvm_vmx_segment_fields[seg].selector);
+ *p = vmcs_read16(&vmx->vcpu, kvm_vmx_segment_fields[seg].selector);
return *p;
}
@@ -1789,7 +964,7 @@ static ulong vmx_read_guest_seg_base(struct vcpu_vmx *vmx, unsigned seg)
ulong *p = &vmx->segment_cache.seg[seg].base;
if (!vmx_segment_cache_test_set(vmx, seg, SEG_FIELD_BASE))
- *p = vmcs_readl(kvm_vmx_segment_fields[seg].base);
+ *p = vmcs_readl(&vmx->vcpu, kvm_vmx_segment_fields[seg].base);
return *p;
}
@@ -1798,7 +973,7 @@ static u32 vmx_read_guest_seg_limit(struct vcpu_vmx *vmx, unsigned seg)
u32 *p = &vmx->segment_cache.seg[seg].limit;
if (!vmx_segment_cache_test_set(vmx, seg, SEG_FIELD_LIMIT))
- *p = vmcs_read32(kvm_vmx_segment_fields[seg].limit);
+ *p = vmcs_read32(&vmx->vcpu, kvm_vmx_segment_fields[seg].limit);
return *p;
}
@@ -1807,7 +982,7 @@ static u32 vmx_read_guest_seg_ar(struct vcpu_vmx *vmx, unsigned seg)
u32 *p = &vmx->segment_cache.seg[seg].ar;
if (!vmx_segment_cache_test_set(vmx, seg, SEG_FIELD_AR))
- *p = vmcs_read32(kvm_vmx_segment_fields[seg].ar_bytes);
+ *p = vmcs_read32(&vmx->vcpu, kvm_vmx_segment_fields[seg].ar_bytes);
return *p;
}
@@ -1816,17 +991,15 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu)
u32 eb;
eb = (1u << PF_VECTOR) | (1u << UD_VECTOR) | (1u << MC_VECTOR) |
- (1u << NM_VECTOR) | (1u << DB_VECTOR) | (1u << AC_VECTOR);
+ (1u << DB_VECTOR) | (1u << AC_VECTOR);
if ((vcpu->guest_debug &
- (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP)) ==
- (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP))
+ (GVM_GUESTDBG_ENABLE | GVM_GUESTDBG_USE_SW_BP)) ==
+ (GVM_GUESTDBG_ENABLE | GVM_GUESTDBG_USE_SW_BP))
eb |= 1u << BP_VECTOR;
if (to_vmx(vcpu)->rmode.vm86_active)
eb = ~0;
if (enable_ept)
eb &= ~(1u << PF_VECTOR); /* bypass_guest_pf = 0 */
- if (vcpu->fpu_active)
- eb &= ~(1u << NM_VECTOR);
/* When we are running a nested L2 guest and L1 specified for it a
* certain exception bitmap, we must trap the same exceptions and pass
@@ -1836,11 +1009,11 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu)
if (is_guest_mode(vcpu))
eb |= get_vmcs12(vcpu)->exception_bitmap;
- vmcs_write32(EXCEPTION_BITMAP, eb);
+ vmcs_write32(vcpu, EXCEPTION_BITMAP, eb);
}
static void clear_atomic_switch_msr_special(struct vcpu_vmx *vmx,
- unsigned long entry, unsigned long exit)
+ size_t entry, size_t exit)
{
vm_entry_controls_clearbit(vmx, entry);
vm_exit_controls_clearbit(vmx, exit);
@@ -1879,17 +1052,17 @@ static void clear_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr)
--m->nr;
m->guest[i] = m->guest[m->nr];
m->host[i] = m->host[m->nr];
- vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, m->nr);
- vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, m->nr);
+ vmcs_write32(&vmx->vcpu, VM_ENTRY_MSR_LOAD_COUNT, m->nr);
+ vmcs_write32(&vmx->vcpu, VM_EXIT_MSR_LOAD_COUNT, m->nr);
}
static void add_atomic_switch_msr_special(struct vcpu_vmx *vmx,
- unsigned long entry, unsigned long exit,
- unsigned long guest_val_vmcs, unsigned long host_val_vmcs,
+ size_t entry, size_t exit,
+ size_t guest_val_vmcs, size_t host_val_vmcs,
u64 guest_val, u64 host_val)
{
- vmcs_write64(guest_val_vmcs, guest_val);
- vmcs_write64(host_val_vmcs, host_val);
+ vmcs_write64(&vmx->vcpu, guest_val_vmcs, guest_val);
+ vmcs_write64(&vmx->vcpu, host_val_vmcs, host_val);
vm_entry_controls_setbit(vmx, entry);
vm_exit_controls_setbit(vmx, exit);
}
@@ -1942,8 +1115,8 @@ static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr,
return;
} else if (i == m->nr) {
++m->nr;
- vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, m->nr);
- vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, m->nr);
+ //vmcs_write32(&vmx->vcpu, VM_ENTRY_MSR_LOAD_COUNT, m->nr);
+ //vmcs_write32(&vmx->vcpu, VM_EXIT_MSR_LOAD_COUNT, m->nr);
}
m->guest[i].index = msr;
@@ -1965,7 +1138,7 @@ static void reload_tss(void)
load_TR_desc();
}
-static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset)
+static bool update_transition_efer(struct vcpu_vmx *vmx)
{
u64 guest_efer = vmx->vcpu.arch.efer;
u64 ignore_bits = 0;
@@ -1995,36 +1168,20 @@ static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset)
clear_atomic_switch_msr(vmx, MSR_EFER);
- /*
- * On EPT, we can't emulate NX, so we must switch EFER atomically.
- * On CPUs that support "load IA32_EFER", always switch EFER
- * atomically, since it's faster than switching it manually.
- */
- if (cpu_has_load_ia32_efer ||
- (enable_ept && ((vmx->vcpu.arch.efer ^ host_efer) & EFER_NX))) {
- if (!(guest_efer & EFER_LMA))
- guest_efer &= ~EFER_LME;
- if (guest_efer != host_efer)
- add_atomic_switch_msr(vmx, MSR_EFER,
- guest_efer, host_efer);
- return false;
- } else {
- guest_efer &= ~ignore_bits;
- guest_efer |= host_efer & ignore_bits;
-
- vmx->guest_msrs[efer_offset].data = guest_efer;
- vmx->guest_msrs[efer_offset].mask = ~ignore_bits;
-
- return true;
- }
+ if (!(guest_efer & EFER_LMA))
+ guest_efer &= ~EFER_LME;
+ if (guest_efer != host_efer)
+ add_atomic_switch_msr(vmx, MSR_EFER,
+ guest_efer, host_efer);
+ return false;
}
-static unsigned long segment_base(u16 selector)
+static size_t segment_base(u16 selector)
{
struct desc_ptr *gdt = this_cpu_ptr(&host_gdt);
struct desc_struct *d;
- unsigned long table_base;
- unsigned long v;
+ size_t table_base;
+ size_t v;
if (!(selector & ~3))
return 0;
@@ -2043,47 +1200,40 @@ static unsigned long segment_base(u16 selector)
v = get_desc_base(d);
#ifdef CONFIG_X86_64
if (d->s == 0 && (d->type == 2 || d->type == 9 || d->type == 11))
- v |= ((unsigned long)((struct ldttss_desc64 *)d)->base3) << 32;
+ v |= ((size_t)((struct ldttss_desc64 *)d)->base3) << 32;
#endif
return v;
}
-static inline unsigned long kvm_read_tr_base(void)
+static inline size_t kvm_read_tr_base(void)
{
- u16 tr;
- asm("str %0" : "=g"(tr));
+ u16 tr = 0;
+ tr = gvm_read_tr();
return segment_base(tr);
}
static void vmx_save_host_state(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
- int i;
- if (vmx->host_state.loaded)
- return;
-
- vmx->host_state.loaded = 1;
/*
- * Set host fs and gs selectors. Unfortunately, 22.2.3 does not
+ * Set host fs and gs selectors. Unfortunately, 26.2.3 does not
* allow segment selectors with cpl > 0 or ti == 1.
*/
- vmx->host_state.ldt_sel = kvm_read_ldt();
- vmx->host_state.gs_ldt_reload_needed = vmx->host_state.ldt_sel;
savesegment(fs, vmx->host_state.fs_sel);
if (!(vmx->host_state.fs_sel & 7)) {
- vmcs_write16(HOST_FS_SELECTOR, vmx->host_state.fs_sel);
+ vmcs_write16(vcpu, HOST_FS_SELECTOR, vmx->host_state.fs_sel);
vmx->host_state.fs_reload_needed = 0;
} else {
- vmcs_write16(HOST_FS_SELECTOR, 0);
+ vmcs_write16(vcpu, HOST_FS_SELECTOR, 0);
vmx->host_state.fs_reload_needed = 1;
}
savesegment(gs, vmx->host_state.gs_sel);
if (!(vmx->host_state.gs_sel & 7))
- vmcs_write16(HOST_GS_SELECTOR, vmx->host_state.gs_sel);
+ vmcs_write16(vcpu, HOST_GS_SELECTOR, vmx->host_state.gs_sel);
else {
- vmcs_write16(HOST_GS_SELECTOR, 0);
- vmx->host_state.gs_ldt_reload_needed = 1;
+ vmcs_write16(vcpu, HOST_GS_SELECTOR, 0);
+ vmx->host_state.gs_reload_needed = 1;
}
#ifdef CONFIG_X86_64
@@ -2092,8 +1242,8 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu)
#endif
#ifdef CONFIG_X86_64
- vmcs_writel(HOST_FS_BASE, read_msr(MSR_FS_BASE));
- vmcs_writel(HOST_GS_BASE, read_msr(MSR_GS_BASE));
+ vmcs_writel(vcpu, HOST_FS_BASE, read_msr(MSR_FS_BASE));
+ vmcs_writel(vcpu, HOST_GS_BASE, read_msr(MSR_GS_BASE));
#else
vmcs_writel(HOST_FS_BASE, segment_base(vmx->host_state.fs_sel));
vmcs_writel(HOST_GS_BASE, segment_base(vmx->host_state.gs_sel));
@@ -2106,25 +1256,16 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu)
#endif
if (boot_cpu_has(X86_FEATURE_MPX))
rdmsrl(MSR_IA32_BNDCFGS, vmx->host_state.msr_host_bndcfgs);
- for (i = 0; i < vmx->save_nmsrs; ++i)
- kvm_set_shared_msr(vmx->guest_msrs[i].index,
- vmx->guest_msrs[i].data,
- vmx->guest_msrs[i].mask);
}
static void __vmx_load_host_state(struct vcpu_vmx *vmx)
{
- if (!vmx->host_state.loaded)
- return;
-
++vmx->vcpu.stat.host_state_reload;
- vmx->host_state.loaded = 0;
#ifdef CONFIG_X86_64
if (is_long_mode(&vmx->vcpu))
rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);
#endif
- if (vmx->host_state.gs_ldt_reload_needed) {
- kvm_load_ldt(vmx->host_state.ldt_sel);
+ if (vmx->host_state.gs_reload_needed) {
#ifdef CONFIG_X86_64
load_gs_index(vmx->host_state.gs_sel);
#else
@@ -2145,74 +1286,14 @@ static void __vmx_load_host_state(struct vcpu_vmx *vmx)
#endif
if (vmx->host_state.msr_host_bndcfgs)
wrmsrl(MSR_IA32_BNDCFGS, vmx->host_state.msr_host_bndcfgs);
- /*
- * If the FPU is not active (through the host task or
- * the guest vcpu), then restore the cr0.TS bit.
- */
- if (!fpregs_active() && !vmx->vcpu.guest_fpu_loaded)
- stts();
load_gdt(this_cpu_ptr(&host_gdt));
+ load_idt(this_cpu_ptr(&host_idt));
}
-static void vmx_load_host_state(struct vcpu_vmx *vmx)
+static void vmx_load_host_state(struct kvm_vcpu *vcpu)
{
- preempt_disable();
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
__vmx_load_host_state(vmx);
- preempt_enable();
-}
-
-static void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu)
-{
- struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
- struct pi_desc old, new;
- unsigned int dest;
-
- if (!kvm_arch_has_assigned_device(vcpu->kvm) ||
- !irq_remapping_cap(IRQ_POSTING_CAP) ||
- !kvm_vcpu_apicv_active(vcpu))
- return;
-
- do {
- old.control = new.control = pi_desc->control;
-
- /*
- * If 'nv' field is POSTED_INTR_WAKEUP_VECTOR, there
- * are two possible cases:
- * 1. After running 'pre_block', context switch
- * happened. For this case, 'sn' was set in
- * vmx_vcpu_put(), so we need to clear it here.
- * 2. After running 'pre_block', we were blocked,
- * and woken up by some other guy. For this case,
- * we don't need to do anything, 'pi_post_block'
- * will do everything for us. However, we cannot
- * check whether it is case #1 or case #2 here
- * (maybe, not needed), so we also clear sn here,
- * I think it is not a big deal.
- */
- if (pi_desc->nv != POSTED_INTR_WAKEUP_VECTOR) {
- if (vcpu->cpu != cpu) {
- dest = cpu_physical_id(cpu);
-
- if (x2apic_enabled())
- new.ndst = dest;
- else
- new.ndst = (dest << 8) & 0xFF00;
- }
-
- /* set 'NV' to 'notification vector' */
- new.nv = POSTED_INTR_VECTOR;
- }
-
- /* Allow posting non-urgent interrupts */
- new.sn = 0;
- } while (cmpxchg(&pi_desc->control, old.control,
- new.control) != old.control);
-}
-
-static void decache_tsc_multiplier(struct vcpu_vmx *vmx)
-{
- vmx->current_tsc_ratio = vmx->vcpu.arch.tsc_scaling_ratio;
- vmcs_write64(TSC_MULTIPLIER, vmx->current_tsc_ratio);
}
/*
@@ -2227,104 +1308,26 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
if (!vmm_exclusive)
kvm_cpu_vmxon(phys_addr);
- else if (!already_loaded)
- loaded_vmcs_clear(vmx->loaded_vmcs);
-
- if (!already_loaded) {
- local_irq_disable();
- crash_disable_local_vmclear(cpu);
-
- /*
- * Read loaded_vmcs->cpu should be before fetching
- * loaded_vmcs->loaded_vmcss_on_cpu_link.
- * See the comments in __loaded_vmcs_clear().
- */
- smp_rmb();
-
- list_add(&vmx->loaded_vmcs->loaded_vmcss_on_cpu_link,
- &per_cpu(loaded_vmcss_on_cpu, cpu));
- crash_enable_local_vmclear(cpu);
- local_irq_enable();
- }
-
- if (per_cpu(current_vmcs, cpu) != vmx->loaded_vmcs->vmcs) {
- per_cpu(current_vmcs, cpu) = vmx->loaded_vmcs->vmcs;
- vmcs_load(vmx->loaded_vmcs->vmcs);
- }
if (!already_loaded) {
- struct desc_ptr *gdt = this_cpu_ptr(&host_gdt);
- unsigned long sysenter_esp;
-
- kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
+ kvm_make_request(GVM_REQ_TLB_FLUSH, vcpu);
/*
* Linux uses per-cpu TSS and GDT, so set these when switching
* processors.
*/
- vmcs_writel(HOST_TR_BASE, kvm_read_tr_base()); /* 22.2.4 */
- vmcs_writel(HOST_GDTR_BASE, gdt->address); /* 22.2.4 */
-
- rdmsrl(MSR_IA32_SYSENTER_ESP, sysenter_esp);
- vmcs_writel(HOST_IA32_SYSENTER_ESP, sysenter_esp); /* 22.2.3 */
vmx->loaded_vmcs->cpu = cpu;
}
-
- /* Setup TSC multiplier */
- if (kvm_has_tsc_control &&
- vmx->current_tsc_ratio != vcpu->arch.tsc_scaling_ratio)
- decache_tsc_multiplier(vmx);
-
- vmx_vcpu_pi_load(vcpu, cpu);
- vmx->host_pkru = read_pkru();
-}
-
-static void vmx_vcpu_pi_put(struct kvm_vcpu *vcpu)
-{
- struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
-
- if (!kvm_arch_has_assigned_device(vcpu->kvm) ||
- !irq_remapping_cap(IRQ_POSTING_CAP) ||
- !kvm_vcpu_apicv_active(vcpu))
- return;
-
- /* Set SN when the vCPU is preempted */
- if (vcpu->preempted)
- pi_set_sn(pi_desc);
}
static void vmx_vcpu_put(struct kvm_vcpu *vcpu)
{
- vmx_vcpu_pi_put(vcpu);
-
- __vmx_load_host_state(to_vmx(vcpu));
if (!vmm_exclusive) {
- __loaded_vmcs_clear(to_vmx(vcpu)->loaded_vmcs);
- vcpu->cpu = -1;
kvm_cpu_vmxoff();
}
}
-static void vmx_fpu_activate(struct kvm_vcpu *vcpu)
-{
- ulong cr0;
-
- if (vcpu->fpu_active)
- return;
- vcpu->fpu_active = 1;
- cr0 = vmcs_readl(GUEST_CR0);
- cr0 &= ~(X86_CR0_TS | X86_CR0_MP);
- cr0 |= kvm_read_cr0_bits(vcpu, X86_CR0_TS | X86_CR0_MP);
- vmcs_writel(GUEST_CR0, cr0);
- update_exception_bitmap(vcpu);
- vcpu->arch.cr0_guest_owned_bits = X86_CR0_TS;
- if (is_guest_mode(vcpu))
- vcpu->arch.cr0_guest_owned_bits &=
- ~get_vmcs12(vcpu)->cr0_guest_host_mask;
- vmcs_writel(CR0_GUEST_HOST_MASK, ~vcpu->arch.cr0_guest_owned_bits);
-}
-
static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu);
/*
@@ -2332,51 +1335,24 @@ static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu);
* of the real cr0 used to run the guest (guest_cr0), and the bits shadowed by
* its hypervisor (cr0_read_shadow).
*/
-static inline unsigned long nested_read_cr0(struct vmcs12 *fields)
+static inline size_t nested_read_cr0(struct vmcs12 *fields)
{
return (fields->guest_cr0 & ~fields->cr0_guest_host_mask) |
(fields->cr0_read_shadow & fields->cr0_guest_host_mask);
}
-static inline unsigned long nested_read_cr4(struct vmcs12 *fields)
+static inline size_t nested_read_cr4(struct vmcs12 *fields)
{
return (fields->guest_cr4 & ~fields->cr4_guest_host_mask) |
(fields->cr4_read_shadow & fields->cr4_guest_host_mask);
}
-static void vmx_fpu_deactivate(struct kvm_vcpu *vcpu)
+static size_t vmx_get_rflags(struct kvm_vcpu *vcpu)
{
- /* Note that there is no vcpu->fpu_active = 0 here. The caller must
- * set this *before* calling this function.
- */
- vmx_decache_cr0_guest_bits(vcpu);
- vmcs_set_bits(GUEST_CR0, X86_CR0_TS | X86_CR0_MP);
- update_exception_bitmap(vcpu);
- vcpu->arch.cr0_guest_owned_bits = 0;
- vmcs_writel(CR0_GUEST_HOST_MASK, ~vcpu->arch.cr0_guest_owned_bits);
- if (is_guest_mode(vcpu)) {
- /*
- * L1's specified read shadow might not contain the TS bit,
- * so now that we turned on shadowing of this bit, we need to
- * set this bit of the shadow. Like in nested_vmx_run we need
- * nested_read_cr0(vmcs12), but vmcs12->guest_cr0 is not yet
- * up-to-date here because we just decached cr0.TS (and we'll
- * only update vmcs12->guest_cr0 on nested exit).
- */
- struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
- vmcs12->guest_cr0 = (vmcs12->guest_cr0 & ~X86_CR0_TS) |
- (vcpu->arch.cr0 & X86_CR0_TS);
- vmcs_writel(CR0_READ_SHADOW, nested_read_cr0(vmcs12));
- } else
- vmcs_writel(CR0_READ_SHADOW, vcpu->arch.cr0);
-}
-
-static unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu)
-{
- unsigned long rflags, save_rflags;
+ size_t rflags, save_rflags;
if (!test_bit(VCPU_EXREG_RFLAGS, (ulong *)&vcpu->arch.regs_avail)) {
__set_bit(VCPU_EXREG_RFLAGS, (ulong *)&vcpu->arch.regs_avail);
- rflags = vmcs_readl(GUEST_RFLAGS);
+ rflags = vmcs_readl(vcpu, GUEST_RFLAGS);
if (to_vmx(vcpu)->rmode.vm86_active) {
rflags &= RMODE_GUEST_OWNED_EFLAGS_BITS;
save_rflags = to_vmx(vcpu)->rmode.save_rflags;
@@ -2387,7 +1363,7 @@ static unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu)
return to_vmx(vcpu)->rflags;
}
-static void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
+static void vmx_set_rflags(struct kvm_vcpu *vcpu, size_t rflags)
{
__set_bit(VCPU_EXREG_RFLAGS, (ulong *)&vcpu->arch.regs_avail);
to_vmx(vcpu)->rflags = rflags;
@@ -2395,49 +1371,44 @@ static void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
to_vmx(vcpu)->rmode.save_rflags = rflags;
rflags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM;
}
- vmcs_writel(GUEST_RFLAGS, rflags);
-}
-
-static u32 vmx_get_pkru(struct kvm_vcpu *vcpu)
-{
- return to_vmx(vcpu)->guest_pkru;
+ vmcs_writel(vcpu, GUEST_RFLAGS, rflags);
}
static u32 vmx_get_interrupt_shadow(struct kvm_vcpu *vcpu)
{
- u32 interruptibility = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
+ u32 interruptibility = vmcs_read32(vcpu, GUEST_INTERRUPTIBILITY_INFO);
int ret = 0;
if (interruptibility & GUEST_INTR_STATE_STI)
- ret |= KVM_X86_SHADOW_INT_STI;
+ ret |= GVM_X86_SHADOW_INT_STI;
if (interruptibility & GUEST_INTR_STATE_MOV_SS)
- ret |= KVM_X86_SHADOW_INT_MOV_SS;
+ ret |= GVM_X86_SHADOW_INT_MOV_SS;
return ret;
}
static void vmx_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask)
{
- u32 interruptibility_old = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
+ u32 interruptibility_old = vmcs_read32(vcpu, GUEST_INTERRUPTIBILITY_INFO);
u32 interruptibility = interruptibility_old;
interruptibility &= ~(GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS);
- if (mask & KVM_X86_SHADOW_INT_MOV_SS)
+ if (mask & GVM_X86_SHADOW_INT_MOV_SS)
interruptibility |= GUEST_INTR_STATE_MOV_SS;
- else if (mask & KVM_X86_SHADOW_INT_STI)
+ else if (mask & GVM_X86_SHADOW_INT_STI)
interruptibility |= GUEST_INTR_STATE_STI;
if ((interruptibility != interruptibility_old))
- vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, interruptibility);
+ vmcs_write32(vcpu, GUEST_INTERRUPTIBILITY_INFO, interruptibility);
}
static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
{
- unsigned long rip;
+ size_t rip;
rip = kvm_rip_read(vcpu);
- rip += vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
+ rip += vmcs_read32(vcpu, VM_EXIT_INSTRUCTION_LEN);
kvm_rip_write(vcpu, rip);
/* skipping an emulated instruction also counts */
@@ -2445,7 +1416,7 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
}
/*
- * KVM wants to inject page-faults which it got to the guest. This function
+ * kvm wants to inject page-faults which it got to the guest. This function
* checks whether in a nested guest, we need to inject them to L1 or L2.
*/
static int nested_vmx_check_exception(struct kvm_vcpu *vcpu, unsigned nr)
@@ -2456,8 +1427,8 @@ static int nested_vmx_check_exception(struct kvm_vcpu *vcpu, unsigned nr)
return 0;
nested_vmx_vmexit(vcpu, to_vmx(vcpu)->exit_reason,
- vmcs_read32(VM_EXIT_INTR_INFO),
- vmcs_readl(EXIT_QUALIFICATION));
+ vmcs_read32(vcpu, VM_EXIT_INTR_INFO),
+ vmcs_readl(vcpu, EXIT_QUALIFICATION));
return 1;
}
@@ -2473,7 +1444,7 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
return;
if (has_error_code) {
- vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, error_code);
+ vmcs_write32(vcpu, VM_ENTRY_EXCEPTION_ERROR_CODE, error_code);
intr_info |= INTR_INFO_DELIVER_CODE_MASK;
}
@@ -2482,18 +1453,18 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
if (kvm_exception_is_soft(nr))
inc_eip = vcpu->arch.event_exit_inst_len;
if (kvm_inject_realmode_interrupt(vcpu, nr, inc_eip) != EMULATE_DONE)
- kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
+ kvm_make_request(GVM_REQ_TRIPLE_FAULT, vcpu);
return;
}
if (kvm_exception_is_soft(nr)) {
- vmcs_write32(VM_ENTRY_INSTRUCTION_LEN,
+ vmcs_write32(vcpu, VM_ENTRY_INSTRUCTION_LEN,
vmx->vcpu.arch.event_exit_inst_len);
intr_info |= INTR_TYPE_SOFT_EXCEPTION;
} else
intr_info |= INTR_TYPE_HARD_EXCEPTION;
- vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr_info);
+ vmcs_write32(vcpu, VM_ENTRY_INTR_INFO_FIELD, intr_info);
}
static bool vmx_rdtscp_supported(void)
@@ -2506,26 +1477,15 @@ static bool vmx_invpcid_supported(void)
return cpu_has_vmx_invpcid() && enable_ept;
}
-/*
- * Swap MSR entry in host/guest MSR entry array.
- */
-static void move_msr_up(struct vcpu_vmx *vmx, int from, int to)
-{
- struct shared_msr_entry tmp;
-
- tmp = vmx->guest_msrs[to];
- vmx->guest_msrs[to] = vmx->guest_msrs[from];
- vmx->guest_msrs[from] = tmp;
-}
-
static void vmx_set_msr_bitmap(struct kvm_vcpu *vcpu)
{
- unsigned long *msr_bitmap;
+ size_t *msr_bitmap;
if (is_guest_mode(vcpu))
msr_bitmap = to_vmx(vcpu)->nested.msr_bitmap;
- else if (cpu_has_secondary_exec_ctrls() &&
- (vmcs_read32(SECONDARY_VM_EXEC_CONTROL) &
+ else
+ if (cpu_has_secondary_exec_ctrls() &&
+ (vmcs_read32(vcpu, SECONDARY_VM_EXEC_CONTROL) &
SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE)) {
if (enable_apicv && kvm_vcpu_apicv_active(vcpu)) {
if (is_long_mode(vcpu))
@@ -2545,7 +1505,7 @@ static void vmx_set_msr_bitmap(struct kvm_vcpu *vcpu)
msr_bitmap = vmx_msr_bitmap_legacy;
}
- vmcs_write64(MSR_BITMAP, __pa(msr_bitmap));
+ vmcs_write64(vcpu, MSR_BITMAP, __pa(msr_bitmap));
}
/*
@@ -2555,37 +1515,33 @@ static void vmx_set_msr_bitmap(struct kvm_vcpu *vcpu)
*/
static void setup_msrs(struct vcpu_vmx *vmx)
{
- int save_nmsrs, index;
+ u64 value;
- save_nmsrs = 0;
#ifdef CONFIG_X86_64
if (is_long_mode(&vmx->vcpu)) {
- index = __find_msr_index(vmx, MSR_SYSCALL_MASK);
- if (index >= 0)
- move_msr_up(vmx, index, save_nmsrs++);
- index = __find_msr_index(vmx, MSR_LSTAR);
- if (index >= 0)
- move_msr_up(vmx, index, save_nmsrs++);
- index = __find_msr_index(vmx, MSR_CSTAR);
- if (index >= 0)
- move_msr_up(vmx, index, save_nmsrs++);
- index = __find_msr_index(vmx, MSR_TSC_AUX);
- if (index >= 0 && guest_cpuid_has_rdtscp(&vmx->vcpu))
- move_msr_up(vmx, index, save_nmsrs++);
+ if (!rdmsrl_safe(MSR_SYSCALL_MASK, &value) &&
+ !wrmsrl_safe(MSR_SYSCALL_MASK, value))
+ add_atomic_switch_msr(vmx, MSR_SYSCALL_MASK, 0, value);
+ if (!rdmsrl_safe(MSR_LSTAR, &value) &&
+ !wrmsrl_safe(MSR_LSTAR, value))
+ add_atomic_switch_msr(vmx, MSR_LSTAR, 0, value);
+ if (!rdmsrl_safe(MSR_CSTAR, &value) &&
+ !wrmsrl_safe(MSR_CSTAR, value))
+ add_atomic_switch_msr(vmx, MSR_CSTAR, 0, value);
+ if (!rdmsrl_safe(MSR_GS_BASE, &value) &&
+ !wrmsrl_safe(MSR_GS_BASE, value))
+ add_atomic_switch_msr(vmx, MSR_GS_BASE, 0, value);
/*
* MSR_STAR is only needed on long mode guests, and only
* if efer.sce is enabled.
*/
- index = __find_msr_index(vmx, MSR_STAR);
- if ((index >= 0) && (vmx->vcpu.arch.efer & EFER_SCE))
- move_msr_up(vmx, index, save_nmsrs++);
+ if (vmx->vcpu.arch.efer & EFER_SCE)
+ if (!rdmsrl_safe(MSR_STAR, &value) &&
+ !wrmsrl_safe(MSR_STAR, value))
+ add_atomic_switch_msr(vmx, MSR_STAR, 0, value);
}
#endif
- index = __find_msr_index(vmx, MSR_EFER);
- if (index >= 0 && update_transition_efer(vmx, index))
- move_msr_up(vmx, index, save_nmsrs++);
-
- vmx->save_nmsrs = save_nmsrs;
+ update_transition_efer(vmx);
if (cpu_has_vmx_msr_bitmap())
vmx_set_msr_bitmap(&vmx->vcpu);
@@ -2600,9 +1556,10 @@ static u64 guest_read_tsc(struct kvm_vcpu *vcpu)
{
u64 host_tsc, tsc_offset;
- host_tsc = rdtsc();
- tsc_offset = vmcs_read64(TSC_OFFSET);
- return kvm_scale_tsc(vcpu, host_tsc) + tsc_offset;
+ host_tsc = __rdtsc();
+ tsc_offset = vmcs_read64(vcpu, TSC_OFFSET);
+ //return kvm_scale_tsc(vcpu, host_tsc) + tsc_offset;
+ return host_tsc + tsc_offset;
}
/*
@@ -2610,6 +1567,7 @@ static u64 guest_read_tsc(struct kvm_vcpu *vcpu)
*/
static void vmx_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
{
+ vmcs_write64(vcpu, TSC_OFFSET, offset);
if (is_guest_mode(vcpu)) {
/*
* We're here if L1 chose not to trap WRMSR to TSC. According
@@ -2620,19 +1578,17 @@ static void vmx_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
struct vmcs12 *vmcs12;
/* recalculate vmcs02.TSC_OFFSET: */
vmcs12 = get_vmcs12(vcpu);
- vmcs_write64(TSC_OFFSET, offset +
+ vmcs_write64(vcpu, TSC_OFFSET, offset +
(nested_cpu_has(vmcs12, CPU_BASED_USE_TSC_OFFSETING) ?
vmcs12->tsc_offset : 0));
} else {
- trace_kvm_write_tsc_offset(vcpu->vcpu_id,
- vmcs_read64(TSC_OFFSET), offset);
- vmcs_write64(TSC_OFFSET, offset);
+ vmcs_write64(vcpu, TSC_OFFSET, offset);
}
}
static bool guest_cpuid_has_vmx(struct kvm_vcpu *vcpu)
{
- struct kvm_cpuid_entry2 *best = kvm_find_cpuid_entry(vcpu, 1, 0);
+ struct kvm_cpuid_entry *best = kvm_find_cpuid_entry(vcpu, 1, 0);
return best && (best->ecx & (1 << (X86_FEATURE_VMX & 31)));
}
@@ -2685,11 +1641,7 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
PIN_BASED_NMI_EXITING |
PIN_BASED_VIRTUAL_NMIS;
vmx->nested.nested_vmx_pinbased_ctls_high |=
- PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR |
- PIN_BASED_VMX_PREEMPTION_TIMER;
- if (kvm_vcpu_apicv_active(&vmx->vcpu))
- vmx->nested.nested_vmx_pinbased_ctls_high |=
- PIN_BASED_POSTED_INTR;
+ PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR;
/* exit controls */
rdmsr(MSR_IA32_VMX_EXIT_CTLS,
@@ -2804,7 +1756,7 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
vmx->nested.nested_vmx_ept_caps = 0;
/*
- * Old versions of KVM use the single-context version without
+ * Old versions of kvm use the single-context version without
* checking for support, so declare that it is supported even
* though it is treated as global context. The alternative is
* not failing the single-context invvpid, and it is worse.
@@ -2957,18 +1909,15 @@ static inline bool vmx_feature_control_msr_valid(struct kvm_vcpu *vcpu,
*/
static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
{
- struct shared_msr_entry *msr;
-
switch (msr_info->index) {
#ifdef CONFIG_X86_64
case MSR_FS_BASE:
- msr_info->data = vmcs_readl(GUEST_FS_BASE);
+ msr_info->data = vmcs_readl(vcpu, GUEST_FS_BASE);
break;
case MSR_GS_BASE:
- msr_info->data = vmcs_readl(GUEST_GS_BASE);
+ msr_info->data = vmcs_readl(vcpu, GUEST_GS_BASE);
break;
case MSR_KERNEL_GS_BASE:
- vmx_load_host_state(to_vmx(vcpu));
msr_info->data = to_vmx(vcpu)->msr_guest_kernel_gs_base;
break;
#endif
@@ -2978,33 +1927,22 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
msr_info->data = guest_read_tsc(vcpu);
break;
case MSR_IA32_SYSENTER_CS:
- msr_info->data = vmcs_read32(GUEST_SYSENTER_CS);
+ msr_info->data = vmcs_read32(vcpu, GUEST_SYSENTER_CS);
break;
case MSR_IA32_SYSENTER_EIP:
- msr_info->data = vmcs_readl(GUEST_SYSENTER_EIP);
+ msr_info->data = vmcs_readl(vcpu, GUEST_SYSENTER_EIP);
break;
case MSR_IA32_SYSENTER_ESP:
- msr_info->data = vmcs_readl(GUEST_SYSENTER_ESP);
+ msr_info->data = vmcs_readl(vcpu, GUEST_SYSENTER_ESP);
break;
case MSR_IA32_BNDCFGS:
if (!kvm_mpx_supported())
return 1;
- msr_info->data = vmcs_read64(GUEST_BNDCFGS);
- break;
- case MSR_IA32_MCG_EXT_CTL:
- if (!msr_info->host_initiated &&
- !(to_vmx(vcpu)->msr_ia32_feature_control &
- FEATURE_CONTROL_LMCE))
- return 1;
- msr_info->data = vcpu->arch.mcg_ext_ctl;
+ msr_info->data = vmcs_read64(vcpu, GUEST_BNDCFGS);
break;
case MSR_IA32_FEATURE_CONTROL:
msr_info->data = to_vmx(vcpu)->msr_ia32_feature_control;
break;
- case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC:
- if (!nested_vmx_allowed(vcpu))
- return 1;
- return vmx_get_vmx_msr(vcpu, msr_info->index, &msr_info->data);
case MSR_IA32_XSS:
if (!vmx_xsaves_supported())
return 1;
@@ -3013,12 +1951,24 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
case MSR_TSC_AUX:
if (!guest_cpuid_has_rdtscp(vcpu) && !msr_info->host_initiated)
return 1;
+ case MSR_SYSCALL_MASK:
+ case MSR_LSTAR:
+ case MSR_CSTAR:
+ case MSR_STAR:
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+ int i = 0;
+ for (i = 0; i < vmx->msr_autoload.nr; i++)
+ if (vmx->msr_autoload.guest[i].index == msr_info->index)
+ msr_info->data = vmx->msr_autoload.guest[i].value;
+ break;
/* Otherwise falls through */
default:
- msr = find_msr_entry(to_vmx(vcpu), msr_info->index);
- if (msr) {
- msr_info->data = msr->data;
- break;
+ if (msr_info->index >= MSR_IA32_VMX_BASIC
+ && msr_info->index <= MSR_IA32_VMX_VMFUNC) {
+ if (!nested_vmx_allowed(vcpu))
+ return 1;
+ return vmx_get_vmx_msr(vcpu, msr_info->index,
+ &msr_info->data);
}
return kvm_get_msr_common(vcpu, msr_info);
}
@@ -3036,10 +1986,10 @@ static void vmx_leave_nested(struct kvm_vcpu *vcpu);
static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
- struct shared_msr_entry *msr;
int ret = 0;
u32 msr_index = msr_info->index;
u64 data = msr_info->data;
+ u64 host_value = 0;
switch (msr_index) {
case MSR_EFER:
@@ -3048,30 +1998,29 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
#ifdef CONFIG_X86_64
case MSR_FS_BASE:
vmx_segment_cache_clear(vmx);
- vmcs_writel(GUEST_FS_BASE, data);
+ vmcs_writel(vcpu, GUEST_FS_BASE, data);
break;
case MSR_GS_BASE:
vmx_segment_cache_clear(vmx);
- vmcs_writel(GUEST_GS_BASE, data);
+ vmcs_writel(vcpu, GUEST_GS_BASE, data);
break;
case MSR_KERNEL_GS_BASE:
- vmx_load_host_state(vmx);
vmx->msr_guest_kernel_gs_base = data;
break;
#endif
case MSR_IA32_SYSENTER_CS:
- vmcs_write32(GUEST_SYSENTER_CS, data);
+ vmcs_write32(vcpu, GUEST_SYSENTER_CS, data);
break;
case MSR_IA32_SYSENTER_EIP:
- vmcs_writel(GUEST_SYSENTER_EIP, data);
+ vmcs_writel(vcpu, GUEST_SYSENTER_EIP, data);
break;
case MSR_IA32_SYSENTER_ESP:
- vmcs_writel(GUEST_SYSENTER_ESP, data);
+ vmcs_writel(vcpu, GUEST_SYSENTER_ESP, data);
break;
case MSR_IA32_BNDCFGS:
if (!kvm_mpx_supported())
return 1;
- vmcs_write64(GUEST_BNDCFGS, data);
+ vmcs_write64(vcpu, GUEST_BNDCFGS, data);
break;
case MSR_IA32_TSC:
kvm_write_tsc(vcpu, msr_info);
@@ -3080,7 +2029,7 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) {
if (!kvm_mtrr_valid(vcpu, MSR_IA32_CR_PAT, data))
return 1;
- vmcs_write64(GUEST_IA32_PAT, data);
+ vmcs_write64(vcpu, GUEST_IA32_PAT, data);
vcpu->arch.pat = data;
break;
}
@@ -3089,14 +2038,6 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
case MSR_IA32_TSC_ADJUST:
ret = kvm_set_msr_common(vcpu, msr_info);
break;
- case MSR_IA32_MCG_EXT_CTL:
- if ((!msr_info->host_initiated &&
- !(to_vmx(vcpu)->msr_ia32_feature_control &
- FEATURE_CONTROL_LMCE)) ||
- (data & ~MCG_EXT_CTL_LMCE_EN))
- return 1;
- vcpu->arch.mcg_ext_ctl = data;
- break;
case MSR_IA32_FEATURE_CONTROL:
if (!vmx_feature_control_msr_valid(vcpu, data) ||
(to_vmx(vcpu)->msr_ia32_feature_control &
@@ -3106,14 +2047,12 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
if (msr_info->host_initiated && data == 0)
vmx_leave_nested(vcpu);
break;
- case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC:
- return 1; /* they are read-only */
case MSR_IA32_XSS:
if (!vmx_xsaves_supported())
return 1;
/*
* The only supported bit as of Skylake is bit 8, but
- * it is not supported on KVM.
+ * it is not supported on kvm.
*/
if (data != 0)
return 1;
@@ -3130,22 +2069,18 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
/* Check reserved bit, higher 32 bits should be zero */
if ((data >> 32) != 0)
return 1;
+ case MSR_SYSCALL_MASK:
+ case MSR_LSTAR:
+ case MSR_CSTAR:
+ case MSR_STAR:
+ if (!rdmsrl_safe(msr_index, &host_value))
+ add_atomic_switch_msr(vmx, msr_index, data, host_value);
+ break;
/* Otherwise falls through */
default:
- msr = find_msr_entry(vmx, msr_index);
- if (msr) {
- u64 old_msr_data = msr->data;
- msr->data = data;
- if (msr - vmx->guest_msrs < vmx->save_nmsrs) {
- preempt_disable();
- ret = kvm_set_shared_msr(msr->index, msr->data,
- msr->mask);
- preempt_enable();
- if (ret)
- msr->data = old_msr_data;
- }
- break;
- }
+ if (msr_index >= MSR_IA32_VMX_BASIC
+ && msr_index <= MSR_IA32_VMX_VMFUNC)
+ return 1; /* they are read-only */
ret = kvm_set_msr_common(vcpu, msr_info);
}
@@ -3154,13 +2089,13 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
static void vmx_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg)
{
- __set_bit(reg, (unsigned long *)&vcpu->arch.regs_avail);
+ __set_bit(reg, (size_t *)&vcpu->arch.regs_avail);
switch (reg) {
case VCPU_REGS_RSP:
- vcpu->arch.regs[VCPU_REGS_RSP] = vmcs_readl(GUEST_RSP);
+ vcpu->arch.regs[VCPU_REGS_RSP] = vmcs_readl(vcpu, GUEST_RSP);
break;
case VCPU_REGS_RIP:
- vcpu->arch.regs[VCPU_REGS_RIP] = vmcs_readl(GUEST_RIP);
+ vcpu->arch.regs[VCPU_REGS_RIP] = vmcs_readl(vcpu, GUEST_RIP);
break;
case VCPU_EXREG_PDPTR:
if (enable_ept)
@@ -3171,32 +2106,28 @@ static void vmx_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg)
}
}
-static __init int cpu_has_kvm_support(void)
+static int cpu_has_kvm_support(void)
{
return cpu_has_vmx();
}
-static __init int vmx_disabled_by_bios(void)
+static int vmx_disabled_by_bios(void)
{
u64 msr;
- rdmsrl(MSR_IA32_FEATURE_CONTROL, msr);
+ if (rdmsrl_safe(MSR_IA32_FEATURE_CONTROL, &msr))
+ return 0;
+
if (msr & FEATURE_CONTROL_LOCKED) {
- /* launched w/ TXT and VMX disabled */
- if (!(msr & FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX)
- && tboot_enabled())
- return 1;
/* launched w/o TXT and VMX only enabled w/ TXT */
if (!(msr & FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX)
- && (msr & FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX)
- && !tboot_enabled()) {
+ && (msr & FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX)) {
printk(KERN_WARNING "kvm: disable TXT in the BIOS or "
- "activate TXT before enabling KVM\n");
+ "activate TXT before enabling kvm\n");
return 1;
}
/* launched w/o TXT and VMX disabled */
- if (!(msr & FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX)
- && !tboot_enabled())
+ if (!(msr & FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX))
return 1;
}
@@ -3205,11 +2136,9 @@ static __init int vmx_disabled_by_bios(void)
static void kvm_cpu_vmxon(u64 addr)
{
- intel_pt_handle_vmx(1);
-
- asm volatile (ASM_VMX_VMXON_RAX
- : : "a"(&addr), "m"(addr)
- : "memory", "cc");
+ u8 rc = __vmx_on(&addr);
+ if (rc)
+ printk(KERN_CRIT "rc is %d\n", rc);
}
static int hardware_enable(void)
@@ -3221,27 +2150,13 @@ static int hardware_enable(void)
if (cr4_read_shadow() & X86_CR4_VMXE)
return -EBUSY;
- INIT_LIST_HEAD(&per_cpu(loaded_vmcss_on_cpu, cpu));
INIT_LIST_HEAD(&per_cpu(blocked_vcpu_on_cpu, cpu));
spin_lock_init(&per_cpu(blocked_vcpu_on_cpu_lock, cpu));
- /*
- * Now we can enable the vmclear operation in kdump
- * since the loaded_vmcss_on_cpu list on this cpu
- * has been initialized.
- *
- * Though the cpu is not in VMX operation now, there
- * is no problem to enable the vmclear operation
- * for the loaded_vmcss_on_cpu list is empty!
- */
- crash_enable_local_vmclear(cpu);
-
rdmsrl(MSR_IA32_FEATURE_CONTROL, old);
test_bits = FEATURE_CONTROL_LOCKED;
test_bits |= FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX;
- if (tboot_enabled())
- test_bits |= FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX;
if ((old & test_bits) != test_bits) {
/* enable and lock */
@@ -3255,44 +2170,31 @@ static int hardware_enable(void)
}
native_store_gdt(this_cpu_ptr(&host_gdt));
+ native_store_idt(this_cpu_ptr(&host_idt));
return 0;
}
-static void vmclear_local_loaded_vmcss(void)
-{
- int cpu = raw_smp_processor_id();
- struct loaded_vmcs *v, *n;
-
- list_for_each_entry_safe(v, n, &per_cpu(loaded_vmcss_on_cpu, cpu),
- loaded_vmcss_on_cpu_link)
- __loaded_vmcs_clear(v);
-}
-
-
/* Just like cpu_vmxoff(), but with the __kvm_handle_fault_on_reboot()
* tricks.
*/
static void kvm_cpu_vmxoff(void)
{
- asm volatile (__ex(ASM_VMX_VMXOFF) : : : "cc");
-
- intel_pt_handle_vmx(0);
+ __vmx_off();
}
static void hardware_disable(void)
{
if (vmm_exclusive) {
- vmclear_local_loaded_vmcss();
kvm_cpu_vmxoff();
}
cr4_clear_bits(X86_CR4_VMXE);
}
-static __init int adjust_vmx_controls(u32 ctl_min, u32 ctl_opt,
+static int adjust_vmx_controls(u32 ctl_min, u32 ctl_opt,
u32 msr, u32 *result)
{
- u32 vmx_msr_low, vmx_msr_high;
+ u32 vmx_msr_low = 0, vmx_msr_high = 0;
u32 ctl = ctl_min | ctl_opt;
rdmsr(msr, vmx_msr_low, vmx_msr_high);
@@ -3308,17 +2210,17 @@ static __init int adjust_vmx_controls(u32 ctl_min, u32 ctl_opt,
return 0;
}
-static __init bool allow_1_setting(u32 msr, u32 ctl)
+static bool allow_1_setting(u32 msr, u32 ctl)
{
- u32 vmx_msr_low, vmx_msr_high;
+ u32 vmx_msr_low = 0, vmx_msr_high = 0;
rdmsr(msr, vmx_msr_low, vmx_msr_high);
return vmx_msr_high & ctl;
}
-static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
+static int setup_vmcs_config(struct vmcs_config *vmcs_conf)
{
- u32 vmx_msr_low, vmx_msr_high;
+ u32 vmx_msr_low = 0, vmx_msr_high = 0;
u32 min, opt, min2, opt2;
u32 _pin_based_exec_control = 0;
u32 _cpu_based_exec_control = 0;
@@ -3360,15 +2262,13 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
SECONDARY_EXEC_ENABLE_VPID |
SECONDARY_EXEC_ENABLE_EPT |
SECONDARY_EXEC_UNRESTRICTED_GUEST |
- SECONDARY_EXEC_PAUSE_LOOP_EXITING |
SECONDARY_EXEC_RDTSCP |
SECONDARY_EXEC_ENABLE_INVPCID |
SECONDARY_EXEC_APIC_REGISTER_VIRT |
SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
SECONDARY_EXEC_SHADOW_VMCS |
SECONDARY_EXEC_XSAVES |
- SECONDARY_EXEC_ENABLE_PML |
- SECONDARY_EXEC_TSC_SCALING;
+ SECONDARY_EXEC_ENABLE_PML;
if (adjust_vmx_controls(min2, opt2,
MSR_IA32_VMX_PROCBASED_CTLS2,
&_cpu_based_2nd_exec_control) < 0)
@@ -3407,18 +2307,11 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
return -EIO;
min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING;
- opt = PIN_BASED_VIRTUAL_NMIS | PIN_BASED_POSTED_INTR |
- PIN_BASED_VMX_PREEMPTION_TIMER;
+ opt = PIN_BASED_VIRTUAL_NMIS;
if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PINBASED_CTLS,
&_pin_based_exec_control) < 0)
return -EIO;
- if (cpu_has_broken_vmx_preemption_timer())
- _pin_based_exec_control &= ~PIN_BASED_VMX_PREEMPTION_TIMER;
- if (!(_cpu_based_2nd_exec_control &
- SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY))
- _pin_based_exec_control &= ~PIN_BASED_POSTED_INTR;
-
min = VM_ENTRY_LOAD_DEBUG_CONTROLS;
opt = VM_ENTRY_LOAD_IA32_PAT | VM_ENTRY_LOAD_BNDCFGS;
if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_ENTRY_CTLS,
@@ -3442,7 +2335,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
return -EIO;
vmcs_conf->size = vmx_msr_high & 0x1fff;
- vmcs_conf->order = get_order(vmcs_conf->size);
+ /* should always 0 */
+ vmcs_conf->order = 0;
vmcs_conf->basic_cap = vmx_msr_high & ~0x1fff;
vmcs_conf->revision_id = vmx_msr_low;
@@ -3478,8 +2372,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
* BA97 (model 46)
*
*/
- if (cpu_has_load_perf_global_ctrl && boot_cpu_data.x86 == 0x6) {
- switch (boot_cpu_data.x86_model) {
+ if (cpu_has_load_perf_global_ctrl && x86_cpuid_family() == 0x6) {
+ switch (x86_cpuid_model()) {
case 26:
case 30:
case 37:
@@ -3500,16 +2394,13 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
return 0;
}
-static struct vmcs *alloc_vmcs_cpu(int cpu)
+static struct vmcs *alloc_vmcs_cpu(void)
{
- int node = cpu_to_node(cpu);
- struct page *pages;
struct vmcs *vmcs;
- pages = __alloc_pages_node(node, GFP_KERNEL, vmcs_config.order);
- if (!pages)
+ vmcs = ExAllocatePoolWithTag(NonPagedPool, PAGE_SIZE, GVM_POOL_TAG);
+ if (!vmcs)
return NULL;
- vmcs = page_address(pages);
memset(vmcs, 0, vmcs_config.size);
vmcs->revision_id = vmcs_config.revision_id; /* vmcs revision id */
return vmcs;
@@ -3517,12 +2408,12 @@ static struct vmcs *alloc_vmcs_cpu(int cpu)
static struct vmcs *alloc_vmcs(void)
{
- return alloc_vmcs_cpu(raw_smp_processor_id());
+ return alloc_vmcs_cpu();
}
static void free_vmcs(struct vmcs *vmcs)
{
- free_pages((unsigned long)vmcs, vmcs_config.order);
+ ExFreePoolWithTag(vmcs, GVM_POOL_TAG);
}
/*
@@ -3532,7 +2423,6 @@ static void free_loaded_vmcs(struct loaded_vmcs *loaded_vmcs)
{
if (!loaded_vmcs->vmcs)
return;
- loaded_vmcs_clear(loaded_vmcs);
free_vmcs(loaded_vmcs->vmcs);
loaded_vmcs->vmcs = NULL;
WARN_ON(loaded_vmcs->shadow_vmcs != NULL);
@@ -3583,14 +2473,14 @@ static void init_vmcs_shadow_fields(void)
vmx_vmread_bitmap);
}
-static __init int alloc_kvm_area(void)
+static int alloc_kvm_area(void)
{
int cpu;
for_each_possible_cpu(cpu) {
struct vmcs *vmcs;
- vmcs = alloc_vmcs_cpu(cpu);
+ vmcs = alloc_vmcs_cpu();
if (!vmcs) {
free_kvm_area();
return -ENOMEM;
@@ -3627,7 +2517,7 @@ static void fix_pmode_seg(struct kvm_vcpu *vcpu, int seg,
static void enter_pmode(struct kvm_vcpu *vcpu)
{
- unsigned long flags;
+ size_t flags;
struct vcpu_vmx *vmx = to_vmx(vcpu);
/*
@@ -3647,13 +2537,13 @@ static void enter_pmode(struct kvm_vcpu *vcpu)
vmx_set_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_TR], VCPU_SREG_TR);
- flags = vmcs_readl(GUEST_RFLAGS);
+ flags = vmcs_readl(vcpu, GUEST_RFLAGS);
flags &= RMODE_GUEST_OWNED_EFLAGS_BITS;
flags |= vmx->rmode.save_rflags & ~RMODE_GUEST_OWNED_EFLAGS_BITS;
- vmcs_writel(GUEST_RFLAGS, flags);
+ vmcs_writel(vcpu, GUEST_RFLAGS, flags);
- vmcs_writel(GUEST_CR4, (vmcs_readl(GUEST_CR4) & ~X86_CR4_VME) |
- (vmcs_readl(CR4_READ_SHADOW) & X86_CR4_VME));
+ vmcs_writel(vcpu, GUEST_CR4, (vmcs_readl(vcpu, GUEST_CR4) & ~X86_CR4_VME) |
+ (vmcs_readl(vcpu, CR4_READ_SHADOW) & X86_CR4_VME));
update_exception_bitmap(vcpu);
@@ -3665,7 +2555,7 @@ static void enter_pmode(struct kvm_vcpu *vcpu)
fix_pmode_seg(vcpu, VCPU_SREG_GS, &vmx->rmode.segs[VCPU_SREG_GS]);
}
-static void fix_rmode_seg(int seg, struct kvm_segment *save)
+static void fix_rmode_seg(struct kvm_vcpu* vcpu, int seg, struct kvm_segment *save)
{
const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
struct kvm_segment var = *save;
@@ -3692,15 +2582,15 @@ static void fix_rmode_seg(int seg, struct kvm_segment *save)
"protected mode (seg=%d)", seg);
}
- vmcs_write16(sf->selector, var.selector);
- vmcs_write32(sf->base, var.base);
- vmcs_write32(sf->limit, var.limit);
- vmcs_write32(sf->ar_bytes, vmx_segment_access_rights(&var));
+ vmcs_write16(vcpu, sf->selector, var.selector);
+ vmcs_write32(vcpu, sf->base, var.base);
+ vmcs_write32(vcpu, sf->limit, var.limit);
+ vmcs_write32(vcpu, sf->ar_bytes, vmx_segment_access_rights(&var));
}
static void enter_rmode(struct kvm_vcpu *vcpu)
{
- unsigned long flags;
+ size_t flags;
struct vcpu_vmx *vmx = to_vmx(vcpu);
vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_TR], VCPU_SREG_TR);
@@ -3714,34 +2604,34 @@ static void enter_rmode(struct kvm_vcpu *vcpu)
vmx->rmode.vm86_active = 1;
/*
- * Very old userspace does not call KVM_SET_TSS_ADDR before entering
+ * Very old userspace does not call GVM_SET_TSS_ADDR before entering
* vcpu. Warn the user that an update is overdue.
*/
if (!vcpu->kvm->arch.tss_addr)
- printk_once(KERN_WARNING "kvm: KVM_SET_TSS_ADDR need to be "
+ printk_once(KERN_WARNING "kvm: GVM_SET_TSS_ADDR need to be "
"called before entering vcpu\n");
vmx_segment_cache_clear(vmx);
- vmcs_writel(GUEST_TR_BASE, vcpu->kvm->arch.tss_addr);
- vmcs_write32(GUEST_TR_LIMIT, RMODE_TSS_SIZE - 1);
- vmcs_write32(GUEST_TR_AR_BYTES, 0x008b);
+ vmcs_writel(vcpu, GUEST_TR_BASE, vcpu->kvm->arch.tss_addr);
+ vmcs_write32(vcpu, GUEST_TR_LIMIT, RMODE_TSS_SIZE - 1);
+ vmcs_write32(vcpu, GUEST_TR_AR_BYTES, 0x008b);
- flags = vmcs_readl(GUEST_RFLAGS);
+ flags = vmcs_readl(vcpu, GUEST_RFLAGS);
vmx->rmode.save_rflags = flags;
flags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM;
- vmcs_writel(GUEST_RFLAGS, flags);
- vmcs_writel(GUEST_CR4, vmcs_readl(GUEST_CR4) | X86_CR4_VME);
+ vmcs_writel(vcpu, GUEST_RFLAGS, flags);
+ vmcs_writel(vcpu, GUEST_CR4, vmcs_readl(vcpu, GUEST_CR4) | X86_CR4_VME);
update_exception_bitmap(vcpu);
- fix_rmode_seg(VCPU_SREG_SS, &vmx->rmode.segs[VCPU_SREG_SS]);
- fix_rmode_seg(VCPU_SREG_CS, &vmx->rmode.segs[VCPU_SREG_CS]);
- fix_rmode_seg(VCPU_SREG_ES, &vmx->rmode.segs[VCPU_SREG_ES]);
- fix_rmode_seg(VCPU_SREG_DS, &vmx->rmode.segs[VCPU_SREG_DS]);
- fix_rmode_seg(VCPU_SREG_GS, &vmx->rmode.segs[VCPU_SREG_GS]);
- fix_rmode_seg(VCPU_SREG_FS, &vmx->rmode.segs[VCPU_SREG_FS]);
+ fix_rmode_seg(vcpu, VCPU_SREG_SS, &vmx->rmode.segs[VCPU_SREG_SS]);
+ fix_rmode_seg(vcpu, VCPU_SREG_CS, &vmx->rmode.segs[VCPU_SREG_CS]);
+ fix_rmode_seg(vcpu, VCPU_SREG_ES, &vmx->rmode.segs[VCPU_SREG_ES]);
+ fix_rmode_seg(vcpu, VCPU_SREG_DS, &vmx->rmode.segs[VCPU_SREG_DS]);
+ fix_rmode_seg(vcpu, VCPU_SREG_GS, &vmx->rmode.segs[VCPU_SREG_GS]);
+ fix_rmode_seg(vcpu, VCPU_SREG_FS, &vmx->rmode.segs[VCPU_SREG_FS]);
kvm_mmu_reset_context(vcpu);
}
@@ -3749,41 +2639,31 @@ static void enter_rmode(struct kvm_vcpu *vcpu)
static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
- struct shared_msr_entry *msr = find_msr_entry(vmx, MSR_EFER);
-
- if (!msr)
- return;
- /*
- * Force kernel_gs_base reloading before EFER changes, as control
- * of this msr depends on is_long_mode().
- */
- vmx_load_host_state(to_vmx(vcpu));
vcpu->arch.efer = efer;
- if (efer & EFER_LMA) {
+ if (efer & EFER_LMA)
vm_entry_controls_setbit(to_vmx(vcpu), VM_ENTRY_IA32E_MODE);
- msr->data = efer;
- } else {
+ else
vm_entry_controls_clearbit(to_vmx(vcpu), VM_ENTRY_IA32E_MODE);
- msr->data = efer & ~EFER_LME;
- }
setup_msrs(vmx);
}
#ifdef CONFIG_X86_64
+#define pr_debug_ratelimited DbgPrint
+
static void enter_lmode(struct kvm_vcpu *vcpu)
{
u32 guest_tr_ar;
vmx_segment_cache_clear(to_vmx(vcpu));
- guest_tr_ar = vmcs_read32(GUEST_TR_AR_BYTES);
+ guest_tr_ar = vmcs_read32(vcpu, GUEST_TR_AR_BYTES);
if ((guest_tr_ar & VMX_AR_TYPE_MASK) != VMX_AR_TYPE_BUSY_64_TSS) {
pr_debug_ratelimited("%s: tss fixup for long mode. \n",
__func__);
- vmcs_write32(GUEST_TR_AR_BYTES,
+ vmcs_write32(vcpu, GUEST_TR_AR_BYTES,
(guest_tr_ar & ~VMX_AR_TYPE_MASK)
| VMX_AR_TYPE_BUSY_64_TSS);
}
@@ -3818,13 +2698,13 @@ static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu)
ulong cr0_guest_owned_bits = vcpu->arch.cr0_guest_owned_bits;
vcpu->arch.cr0 &= ~cr0_guest_owned_bits;
- vcpu->arch.cr0 |= vmcs_readl(GUEST_CR0) & cr0_guest_owned_bits;
+ vcpu->arch.cr0 |= vmcs_readl(vcpu, GUEST_CR0) & cr0_guest_owned_bits;
}
static void vmx_decache_cr3(struct kvm_vcpu *vcpu)
{
if (enable_ept && is_paging(vcpu))
- vcpu->arch.cr3 = vmcs_readl(GUEST_CR3);
+ vcpu->arch.cr3 = vmcs_readl(vcpu, GUEST_CR3);
__set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail);
}
@@ -3833,7 +2713,7 @@ static void vmx_decache_cr4_guest_bits(struct kvm_vcpu *vcpu)
ulong cr4_guest_owned_bits = vcpu->arch.cr4_guest_owned_bits;
vcpu->arch.cr4 &= ~cr4_guest_owned_bits;
- vcpu->arch.cr4 |= vmcs_readl(GUEST_CR4) & cr4_guest_owned_bits;
+ vcpu->arch.cr4 |= vmcs_readl(vcpu, GUEST_CR4) & cr4_guest_owned_bits;
}
static void ept_load_pdptrs(struct kvm_vcpu *vcpu)
@@ -3841,14 +2721,14 @@ static void ept_load_pdptrs(struct kvm_vcpu *vcpu)
struct kvm_mmu *mmu = vcpu->arch.walk_mmu;
if (!test_bit(VCPU_EXREG_PDPTR,
- (unsigned long *)&vcpu->arch.regs_dirty))
+ (size_t *)&vcpu->arch.regs_dirty))
return;
if (is_paging(vcpu) && is_pae(vcpu) && !is_long_mode(vcpu)) {
- vmcs_write64(GUEST_PDPTR0, mmu->pdptrs[0]);
- vmcs_write64(GUEST_PDPTR1, mmu->pdptrs[1]);
- vmcs_write64(GUEST_PDPTR2, mmu->pdptrs[2]);
- vmcs_write64(GUEST_PDPTR3, mmu->pdptrs[3]);
+ vmcs_write64(vcpu, GUEST_PDPTR0, mmu->pdptrs[0]);
+ vmcs_write64(vcpu, GUEST_PDPTR1, mmu->pdptrs[1]);
+ vmcs_write64(vcpu, GUEST_PDPTR2, mmu->pdptrs[2]);
+ vmcs_write64(vcpu, GUEST_PDPTR3, mmu->pdptrs[3]);
}
}
@@ -3857,38 +2737,38 @@ static void ept_save_pdptrs(struct kvm_vcpu *vcpu)
struct kvm_mmu *mmu = vcpu->arch.walk_mmu;
if (is_paging(vcpu) && is_pae(vcpu) && !is_long_mode(vcpu)) {
- mmu->pdptrs[0] = vmcs_read64(GUEST_PDPTR0);
- mmu->pdptrs[1] = vmcs_read64(GUEST_PDPTR1);
- mmu->pdptrs[2] = vmcs_read64(GUEST_PDPTR2);
- mmu->pdptrs[3] = vmcs_read64(GUEST_PDPTR3);
+ mmu->pdptrs[0] = vmcs_read64(vcpu, GUEST_PDPTR0);
+ mmu->pdptrs[1] = vmcs_read64(vcpu, GUEST_PDPTR1);
+ mmu->pdptrs[2] = vmcs_read64(vcpu, GUEST_PDPTR2);
+ mmu->pdptrs[3] = vmcs_read64(vcpu, GUEST_PDPTR3);
}
__set_bit(VCPU_EXREG_PDPTR,
- (unsigned long *)&vcpu->arch.regs_avail);
+ (size_t *)&vcpu->arch.regs_avail);
__set_bit(VCPU_EXREG_PDPTR,
- (unsigned long *)&vcpu->arch.regs_dirty);
+ (size_t *)&vcpu->arch.regs_dirty);
}
-static int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4);
+static int vmx_set_cr4(struct kvm_vcpu *vcpu, size_t cr4);
-static void ept_update_paging_mode_cr0(unsigned long *hw_cr0,
- unsigned long cr0,
+static void ept_update_paging_mode_cr0(size_t *hw_cr0,
+ size_t cr0,
struct kvm_vcpu *vcpu)
{
if (!test_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail))
vmx_decache_cr3(vcpu);
if (!(cr0 & X86_CR0_PG)) {
/* From paging/starting to nonpaging */
- vmcs_write32(CPU_BASED_VM_EXEC_CONTROL,
- vmcs_read32(CPU_BASED_VM_EXEC_CONTROL) |
+ vmcs_write32(vcpu, CPU_BASED_VM_EXEC_CONTROL,
+ vmcs_read32(vcpu, CPU_BASED_VM_EXEC_CONTROL) |
(CPU_BASED_CR3_LOAD_EXITING |
CPU_BASED_CR3_STORE_EXITING));
vcpu->arch.cr0 = cr0;
vmx_set_cr4(vcpu, kvm_read_cr4(vcpu));
} else if (!is_paging(vcpu)) {
/* From nonpaging to paging */
- vmcs_write32(CPU_BASED_VM_EXEC_CONTROL,
- vmcs_read32(CPU_BASED_VM_EXEC_CONTROL) &
+ vmcs_write32(vcpu, CPU_BASED_VM_EXEC_CONTROL,
+ vmcs_read32(vcpu, CPU_BASED_VM_EXEC_CONTROL) &
~(CPU_BASED_CR3_LOAD_EXITING |
CPU_BASED_CR3_STORE_EXITING));
vcpu->arch.cr0 = cr0;
@@ -3899,16 +2779,16 @@ static void ept_update_paging_mode_cr0(unsigned long *hw_cr0,
*hw_cr0 &= ~X86_CR0_WP;
}
-static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
+static void vmx_set_cr0(struct kvm_vcpu *vcpu, size_t cr0)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
- unsigned long hw_cr0;
+ size_t hw_cr0;
- hw_cr0 = (cr0 & ~KVM_GUEST_CR0_MASK);
+ hw_cr0 = (cr0 & ~GVM_GUEST_CR0_MASK);
if (enable_unrestricted_guest)
- hw_cr0 |= KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST;
+ hw_cr0 |= GVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST;
else {
- hw_cr0 |= KVM_VM_CR0_ALWAYS_ON;
+ hw_cr0 |= GVM_VM_CR0_ALWAYS_ON;
if (vmx->rmode.vm86_active && (cr0 & X86_CR0_PE))
enter_pmode(vcpu);
@@ -3929,18 +2809,15 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
if (enable_ept)
ept_update_paging_mode_cr0(&hw_cr0, cr0, vcpu);
- if (!vcpu->fpu_active)
- hw_cr0 |= X86_CR0_TS | X86_CR0_MP;
-
- vmcs_writel(CR0_READ_SHADOW, cr0);
- vmcs_writel(GUEST_CR0, hw_cr0);
+ vmcs_writel(vcpu, CR0_READ_SHADOW, cr0);
+ vmcs_writel(vcpu, GUEST_CR0, hw_cr0);
vcpu->arch.cr0 = cr0;
/* depends on vcpu->arch.cr0 to be set to a new value */
vmx->emulation_required = emulation_required(vcpu);
}
-static u64 construct_eptp(unsigned long root_hpa)
+static u64 construct_eptp(size_t root_hpa)
{
u64 eptp;
@@ -3954,15 +2831,15 @@ static u64 construct_eptp(unsigned long root_hpa)
return eptp;
}
-static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
+static void vmx_set_cr3(struct kvm_vcpu *vcpu, size_t cr3)
{
- unsigned long guest_cr3;
+ size_t guest_cr3;
u64 eptp;
guest_cr3 = cr3;
if (enable_ept) {
eptp = construct_eptp(cr3);
- vmcs_write64(EPT_POINTER, eptp);
+ vmcs_write64(vcpu, EPT_POINTER, eptp);
if (is_paging(vcpu) || is_guest_mode(vcpu))
guest_cr3 = kvm_read_cr3(vcpu);
else
@@ -3971,21 +2848,21 @@ static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
}
vmx_flush_tlb(vcpu);
- vmcs_writel(GUEST_CR3, guest_cr3);
+ vmcs_writel(vcpu, GUEST_CR3, guest_cr3);
}
-static int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
+static int vmx_set_cr4(struct kvm_vcpu *vcpu, size_t cr4)
{
/*
* Pass through host's Machine Check Enable value to hw_cr4, which
* is in force while we are in guest mode. Do not let guests control
* this bit, even if host CR4.MCE == 0.
*/
- unsigned long hw_cr4 =
+ size_t hw_cr4 =
(cr4_read_shadow() & X86_CR4_MCE) |
(cr4 & ~X86_CR4_MCE) |
(to_vmx(vcpu)->rmode.vm86_active ?
- KVM_RMODE_VM_CR4_ALWAYS_ON : KVM_PMODE_VM_CR4_ALWAYS_ON);
+ GVM_RMODE_VM_CR4_ALWAYS_ON : GVM_PMODE_VM_CR4_ALWAYS_ON);
if (cr4 & X86_CR4_VMXE) {
/*
@@ -3994,7 +2871,7 @@ static int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
* So basically the check on whether to allow nested VMX
* is here.
*/
- if (!nested_vmx_allowed(vcpu))
+ //if (!nested_vmx_allowed(vcpu))
return 1;
}
if (to_vmx(vcpu)->nested.vmxon &&
@@ -4025,8 +2902,8 @@ static int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
*/
hw_cr4 &= ~(X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_PKE);
- vmcs_writel(CR4_READ_SHADOW, cr4);
- vmcs_writel(GUEST_CR4, hw_cr4);
+ vmcs_writel(vcpu, CR4_READ_SHADOW, cr4);
+ vmcs_writel(vcpu, GUEST_CR4, hw_cr4);
return 0;
}
@@ -4121,15 +2998,15 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu,
if (vmx->rmode.vm86_active && seg != VCPU_SREG_LDTR) {
vmx->rmode.segs[seg] = *var;
if (seg == VCPU_SREG_TR)
- vmcs_write16(sf->selector, var->selector);
+ vmcs_write16(vcpu, sf->selector, var->selector);
else if (var->s)
- fix_rmode_seg(seg, &vmx->rmode.segs[seg]);
+ fix_rmode_seg(vcpu, seg, &vmx->rmode.segs[seg]);
goto out;
}
- vmcs_writel(sf->base, var->base);
- vmcs_write32(sf->limit, var->limit);
- vmcs_write16(sf->selector, var->selector);
+ vmcs_writel(vcpu, sf->base, var->base);
+ vmcs_write32(vcpu, sf->limit, var->limit);
+ vmcs_write16(vcpu, sf->selector, var->selector);
/*
* Fix the "Accessed" bit in AR field of segment registers for older
@@ -4145,7 +3022,7 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu,
if (enable_unrestricted_guest && (seg != VCPU_SREG_LDTR))
var->type |= 0x1; /* Accessed */
- vmcs_write32(sf->ar_bytes, vmx_segment_access_rights(var));
+ vmcs_write32(vcpu, sf->ar_bytes, vmx_segment_access_rights(var));
out:
vmx->emulation_required = emulation_required(vcpu);
@@ -4161,26 +3038,26 @@ static void vmx_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l)
static void vmx_get_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
{
- dt->size = vmcs_read32(GUEST_IDTR_LIMIT);
- dt->address = vmcs_readl(GUEST_IDTR_BASE);
+ dt->size = vmcs_read32(vcpu, GUEST_IDTR_LIMIT);
+ dt->address = vmcs_readl(vcpu, GUEST_IDTR_BASE);
}
static void vmx_set_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
{
- vmcs_write32(GUEST_IDTR_LIMIT, dt->size);
- vmcs_writel(GUEST_IDTR_BASE, dt->address);
+ vmcs_write32(vcpu, GUEST_IDTR_LIMIT, dt->size);
+ vmcs_writel(vcpu, GUEST_IDTR_BASE, dt->address);
}
static void vmx_get_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
{
- dt->size = vmcs_read32(GUEST_GDTR_LIMIT);
- dt->address = vmcs_readl(GUEST_GDTR_BASE);
+ dt->size = vmcs_read32(vcpu, GUEST_GDTR_LIMIT);
+ dt->address = vmcs_readl(vcpu, GUEST_GDTR_BASE);
}
static void vmx_set_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
{
- vmcs_write32(GUEST_GDTR_LIMIT, dt->size);
- vmcs_writel(GUEST_GDTR_BASE, dt->address);
+ vmcs_write32(vcpu, GUEST_GDTR_LIMIT, dt->size);
+ vmcs_writel(vcpu, GUEST_GDTR_BASE, dt->address);
}
static bool rmode_segment_valid(struct kvm_vcpu *vcpu, int seg)
@@ -4454,24 +3331,24 @@ out2:
return r;
}
-static void seg_setup(int seg)
+static void seg_setup(struct kvm_vcpu *vcpu, int seg)
{
const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
unsigned int ar;
- vmcs_write16(sf->selector, 0);
- vmcs_writel(sf->base, 0);
- vmcs_write32(sf->limit, 0xffff);
+ vmcs_write16(vcpu, sf->selector, 0);
+ vmcs_writel(vcpu, sf->base, 0);
+ vmcs_write32(vcpu, sf->limit, 0xffff);
ar = 0x93;
if (seg == VCPU_SREG_CS)
ar |= 0x08; /* code segment */
- vmcs_write32(sf->ar_bytes, ar);
+ vmcs_write32(vcpu, sf->ar_bytes, ar);
}
static int alloc_apic_access_page(struct kvm *kvm)
{
- struct page *page;
+ pfn_t pfn;
int r = 0;
mutex_lock(&kvm->slots_lock);
@@ -4482,17 +3359,12 @@ static int alloc_apic_access_page(struct kvm *kvm)
if (r)
goto out;
- page = gfn_to_page(kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT);
- if (is_error_page(page)) {
+ pfn = gfn_to_pfn(kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT);
+ if (is_error_noslot_pfn(pfn)) {
r = -EFAULT;
goto out;
}
- /*
- * Do not pin the page in memory, so that memory hot-unplug
- * is able to migrate it.
- */
- put_page(page);
kvm->arch.apic_access_page_done = true;
out:
mutex_unlock(&kvm->slots_lock);
@@ -4540,10 +3412,10 @@ static void free_vpid(int vpid)
#define MSR_TYPE_R 1
#define MSR_TYPE_W 2
-static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap,
+static void __vmx_disable_intercept_for_msr(size_t *msr_bitmap,
u32 msr, int type)
{
- int f = sizeof(unsigned long);
+ int f = sizeof(size_t);
if (!cpu_has_vmx_msr_bitmap())
return;
@@ -4575,10 +3447,10 @@ static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap,
}
}
-static void __vmx_enable_intercept_for_msr(unsigned long *msr_bitmap,
+static void __vmx_enable_intercept_for_msr(size_t *msr_bitmap,
u32 msr, int type)
{
- int f = sizeof(unsigned long);
+ int f = sizeof(size_t);
if (!cpu_has_vmx_msr_bitmap())
return;
@@ -4614,11 +3486,11 @@ static void __vmx_enable_intercept_for_msr(unsigned long *msr_bitmap,
* If a msr is allowed by L0, we should check whether it is allowed by L1.
* The corresponding bit will be cleared unless both of L0 and L1 allow it.
*/
-static void nested_vmx_disable_intercept_for_msr(unsigned long *msr_bitmap_l1,
- unsigned long *msr_bitmap_nested,
+static void nested_vmx_disable_intercept_for_msr(size_t *msr_bitmap_l1,
+ size_t *msr_bitmap_nested,
u32 msr, int type)
{
- int f = sizeof(unsigned long);
+ int f = sizeof(size_t);
if (!cpu_has_vmx_msr_bitmap()) {
WARN_ON(1);
@@ -4715,125 +3587,6 @@ static bool vmx_get_enable_apicv(void)
return enable_apicv;
}
-static int vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu)
-{
- struct vcpu_vmx *vmx = to_vmx(vcpu);
- int max_irr;
- void *vapic_page;
- u16 status;
-
- if (vmx->nested.pi_desc &&
- vmx->nested.pi_pending) {
- vmx->nested.pi_pending = false;
- if (!pi_test_and_clear_on(vmx->nested.pi_desc))
- return 0;
-
- max_irr = find_last_bit(
- (unsigned long *)vmx->nested.pi_desc->pir, 256);
-
- if (max_irr == 256)
- return 0;
-
- vapic_page = kmap(vmx->nested.virtual_apic_page);
- if (!vapic_page) {
- WARN_ON(1);
- return -ENOMEM;
- }
- __kvm_apic_update_irr(vmx->nested.pi_desc->pir, vapic_page);
- kunmap(vmx->nested.virtual_apic_page);
-
- status = vmcs_read16(GUEST_INTR_STATUS);
- if ((u8)max_irr > ((u8)status & 0xff)) {
- status &= ~0xff;
- status |= (u8)max_irr;
- vmcs_write16(GUEST_INTR_STATUS, status);
- }
- }
- return 0;
-}
-
-static inline bool kvm_vcpu_trigger_posted_interrupt(struct kvm_vcpu *vcpu)
-{
-#ifdef CONFIG_SMP
- if (vcpu->mode == IN_GUEST_MODE) {
- struct vcpu_vmx *vmx = to_vmx(vcpu);
-
- /*
- * Currently, we don't support urgent interrupt,
- * all interrupts are recognized as non-urgent
- * interrupt, so we cannot post interrupts when
- * 'SN' is set.
- *
- * If the vcpu is in guest mode, it means it is
- * running instead of being scheduled out and
- * waiting in the run queue, and that's the only
- * case when 'SN' is set currently, warning if
- * 'SN' is set.
- */
- WARN_ON_ONCE(pi_test_sn(&vmx->pi_desc));
-
- apic->send_IPI_mask(get_cpu_mask(vcpu->cpu),
- POSTED_INTR_VECTOR);
- return true;
- }
-#endif
- return false;
-}
-
-static int vmx_deliver_nested_posted_interrupt(struct kvm_vcpu *vcpu,
- int vector)
-{
- struct vcpu_vmx *vmx = to_vmx(vcpu);
-
- if (is_guest_mode(vcpu) &&
- vector == vmx->nested.posted_intr_nv) {
- /* the PIR and ON have been set by L1. */
- kvm_vcpu_trigger_posted_interrupt(vcpu);
- /*
- * If a posted intr is not recognized by hardware,
- * we will accomplish it in the next vmentry.
- */
- vmx->nested.pi_pending = true;
- kvm_make_request(KVM_REQ_EVENT, vcpu);
- return 0;
- }
- return -1;
-}
-/*
- * Send interrupt to vcpu via posted interrupt way.
- * 1. If target vcpu is running(non-root mode), send posted interrupt
- * notification to vcpu and hardware will sync PIR to vIRR atomically.
- * 2. If target vcpu isn't running(root mode), kick it to pick up the
- * interrupt from PIR in next vmentry.
- */
-static void vmx_deliver_posted_interrupt(struct kvm_vcpu *vcpu, int vector)
-{
- struct vcpu_vmx *vmx = to_vmx(vcpu);
- int r;
-
- r = vmx_deliver_nested_posted_interrupt(vcpu, vector);
- if (!r)
- return;
-
- if (pi_test_and_set_pir(vector, &vmx->pi_desc))
- return;
-
- r = pi_test_and_set_on(&vmx->pi_desc);
- kvm_make_request(KVM_REQ_EVENT, vcpu);
- if (r || !kvm_vcpu_trigger_posted_interrupt(vcpu))
- kvm_vcpu_kick(vcpu);
-}
-
-static void vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu)
-{
- struct vcpu_vmx *vmx = to_vmx(vcpu);
-
- if (!pi_test_and_clear_on(&vmx->pi_desc))
- return;
-
- kvm_apic_update_irr(vcpu, vmx->pi_desc.pir);
-}
-
/*
* Set up the vmcs's constant host-state fields, i.e., host-state fields that
* will not change in the lifetime of the guest.
@@ -4842,71 +3595,63 @@ static void vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu)
*/
static void vmx_set_constant_host_state(struct vcpu_vmx *vmx)
{
- u32 low32, high32;
- unsigned long tmpl;
- struct desc_ptr dt;
- unsigned long cr4;
+ u32 low32 = 0, high32 = 0;
+ size_t tmpl;
+ size_t cr4;
+ struct kvm_vcpu *vcpu = &vmx->vcpu;
- vmcs_writel(HOST_CR0, read_cr0() & ~X86_CR0_TS); /* 22.2.3 */
- vmcs_writel(HOST_CR3, read_cr3()); /* 22.2.3 FIXME: shadow tables */
+ vmcs_writel(vcpu, HOST_CR0, read_cr0() & ~X86_CR0_TS); /* 22.2.3 */
+ vmcs_writel(vcpu, HOST_CR3, read_cr3()); /* 22.2.3 FIXME: shadow tables */
/* Save the most likely value for this task's CR4 in the VMCS. */
cr4 = cr4_read_shadow();
- vmcs_writel(HOST_CR4, cr4); /* 22.2.3, 22.2.5 */
+ vmcs_writel(vcpu, HOST_CR4, cr4); /* 22.2.3, 22.2.5 */
vmx->host_state.vmcs_host_cr4 = cr4;
- vmcs_write16(HOST_CS_SELECTOR, __KERNEL_CS); /* 22.2.4 */
+ vmcs_write16(vcpu, HOST_CS_SELECTOR, __KERNEL_CS); /* 22.2.4 */
#ifdef CONFIG_X86_64
/*
* Load null selectors, so we can avoid reloading them in
* __vmx_load_host_state(), in case userspace uses the null selectors
* too (the expected case).
*/
- vmcs_write16(HOST_DS_SELECTOR, 0);
- vmcs_write16(HOST_ES_SELECTOR, 0);
+ vmcs_write16(vcpu, HOST_DS_SELECTOR, 0);
+ vmcs_write16(vcpu, HOST_ES_SELECTOR, 0);
#else
- vmcs_write16(HOST_DS_SELECTOR, __KERNEL_DS); /* 22.2.4 */
- vmcs_write16(HOST_ES_SELECTOR, __KERNEL_DS); /* 22.2.4 */
+ vmcs_write16(vcpu, HOST_DS_SELECTOR, __KERNEL_DS); /* 22.2.4 */
+ vmcs_write16(vcpu, HOST_ES_SELECTOR, __KERNEL_DS); /* 22.2.4 */
#endif
- vmcs_write16(HOST_SS_SELECTOR, __KERNEL_DS); /* 22.2.4 */
- vmcs_write16(HOST_TR_SELECTOR, GDT_ENTRY_TSS*8); /* 22.2.4 */
-
- native_store_idt(&dt);
- vmcs_writel(HOST_IDTR_BASE, dt.address); /* 22.2.4 */
- vmx->host_idt_base = dt.address;
+ vmcs_write16(vcpu, HOST_SS_SELECTOR, __KERNEL_SS); /* 22.2.4 */
+ vmcs_write16(vcpu, HOST_TR_SELECTOR, GDT_ENTRY_TSS*8); /* 22.2.4 */
- vmcs_writel(HOST_RIP, vmx_return); /* 22.2.5 */
+ vmcs_writel(vcpu, HOST_RIP, vmx_return); /* 22.2.5 */
rdmsr(MSR_IA32_SYSENTER_CS, low32, high32);
- vmcs_write32(HOST_IA32_SYSENTER_CS, low32);
+ vmcs_write32(vcpu, HOST_IA32_SYSENTER_CS, low32);
rdmsrl(MSR_IA32_SYSENTER_EIP, tmpl);
- vmcs_writel(HOST_IA32_SYSENTER_EIP, tmpl); /* 22.2.3 */
+ vmcs_writel(vcpu, HOST_IA32_SYSENTER_EIP, tmpl); /* 22.2.3 */
if (vmcs_config.vmexit_ctrl & VM_EXIT_LOAD_IA32_PAT) {
rdmsr(MSR_IA32_CR_PAT, low32, high32);
- vmcs_write64(HOST_IA32_PAT, low32 | ((u64) high32 << 32));
+ vmcs_write64(vcpu, HOST_IA32_PAT, low32 | ((u64) high32 << 32));
}
}
static void set_cr4_guest_host_mask(struct vcpu_vmx *vmx)
{
- vmx->vcpu.arch.cr4_guest_owned_bits = KVM_CR4_GUEST_OWNED_BITS;
+ vmx->vcpu.arch.cr4_guest_owned_bits = GVM_CR4_GUEST_OWNED_BITS;
if (enable_ept)
vmx->vcpu.arch.cr4_guest_owned_bits |= X86_CR4_PGE;
if (is_guest_mode(&vmx->vcpu))
vmx->vcpu.arch.cr4_guest_owned_bits &=
~get_vmcs12(&vmx->vcpu)->cr4_guest_host_mask;
- vmcs_writel(CR4_GUEST_HOST_MASK, ~vmx->vcpu.arch.cr4_guest_owned_bits);
+ vmcs_writel(&vmx->vcpu, CR4_GUEST_HOST_MASK, ~vmx->vcpu.arch.cr4_guest_owned_bits);
}
static u32 vmx_pin_based_exec_ctrl(struct vcpu_vmx *vmx)
{
u32 pin_based_exec_ctrl = vmcs_config.pin_based_exec_ctrl;
- if (!kvm_vcpu_apicv_active(&vmx->vcpu))
- pin_based_exec_ctrl &= ~PIN_BASED_POSTED_INTR;
- /* Enable the preemption timer dynamically */
- pin_based_exec_ctrl &= ~PIN_BASED_VMX_PREEMPTION_TIMER;
return pin_based_exec_ctrl;
}
@@ -4914,14 +3659,14 @@ static void vmx_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
- vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, vmx_pin_based_exec_ctrl(vmx));
+ vmcs_write32(vcpu, PIN_BASED_VM_EXEC_CONTROL, vmx_pin_based_exec_ctrl(vmx));
if (cpu_has_secondary_exec_ctrls()) {
if (kvm_vcpu_apicv_active(vcpu))
- vmcs_set_bits(SECONDARY_VM_EXEC_CONTROL,
+ vmcs_set_bits(vcpu, SECONDARY_VM_EXEC_CONTROL,
SECONDARY_EXEC_APIC_REGISTER_VIRT |
SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY);
else
- vmcs_clear_bits(SECONDARY_VM_EXEC_CONTROL,
+ vmcs_clear_bits(vcpu, SECONDARY_VM_EXEC_CONTROL,
SECONDARY_EXEC_APIC_REGISTER_VIRT |
SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY);
}
@@ -4934,7 +3679,7 @@ static u32 vmx_exec_control(struct vcpu_vmx *vmx)
{
u32 exec_control = vmcs_config.cpu_based_exec_ctrl;
- if (vmx->vcpu.arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT)
+ if (vmx->vcpu.arch.switch_db_regs & GVM_DEBUGREG_WONT_EXIT)
exec_control &= ~CPU_BASED_MOV_DR_EXITING;
if (!cpu_need_tpr_shadow(&vmx->vcpu)) {
@@ -4966,8 +3711,6 @@ static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx)
}
if (!enable_unrestricted_guest)
exec_control &= ~SECONDARY_EXEC_UNRESTRICTED_GUEST;
- if (!ple_gap)
- exec_control &= ~SECONDARY_EXEC_PAUSE_LOOP_EXITING;
if (!kvm_vcpu_apicv_active(&vmx->vcpu))
exec_control &= ~(SECONDARY_EXEC_APIC_REGISTER_VIRT |
SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY);
@@ -5003,114 +3746,90 @@ static void ept_set_mmio_spte_mask(void)
static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
{
#ifdef CONFIG_X86_64
- unsigned long a;
+ size_t a;
#endif
- int i;
+ struct kvm_vcpu *vcpu = &vmx->vcpu;
/* I/O */
- vmcs_write64(IO_BITMAP_A, __pa(vmx_io_bitmap_a));
- vmcs_write64(IO_BITMAP_B, __pa(vmx_io_bitmap_b));
+ vmcs_write64(vcpu, IO_BITMAP_A, __pa(vmx_io_bitmap_a));
+ vmcs_write64(vcpu, IO_BITMAP_B, __pa(vmx_io_bitmap_b));
if (enable_shadow_vmcs) {
- vmcs_write64(VMREAD_BITMAP, __pa(vmx_vmread_bitmap));
- vmcs_write64(VMWRITE_BITMAP, __pa(vmx_vmwrite_bitmap));
+ vmcs_write64(vcpu, VMREAD_BITMAP, __pa(vmx_vmread_bitmap));
+ vmcs_write64(vcpu, VMWRITE_BITMAP, __pa(vmx_vmwrite_bitmap));
}
if (cpu_has_vmx_msr_bitmap())
- vmcs_write64(MSR_BITMAP, __pa(vmx_msr_bitmap_legacy));
+ vmcs_write64(vcpu, MSR_BITMAP, __pa(vmx_msr_bitmap_legacy));
- vmcs_write64(VMCS_LINK_POINTER, -1ull); /* 22.3.1.5 */
+ vmcs_write64(vcpu, VMCS_LINK_POINTER, (u64)-1); /* 22.3.1.5 */
/* Control */
- vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, vmx_pin_based_exec_ctrl(vmx));
- vmx->hv_deadline_tsc = -1;
+ vmcs_write32(vcpu, PIN_BASED_VM_EXEC_CONTROL, vmx_pin_based_exec_ctrl(vmx));
- vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, vmx_exec_control(vmx));
+ vmcs_write32(vcpu, CPU_BASED_VM_EXEC_CONTROL, vmx_exec_control(vmx));
if (cpu_has_secondary_exec_ctrls()) {
- vmcs_write32(SECONDARY_VM_EXEC_CONTROL,
+ vmcs_write32(vcpu, SECONDARY_VM_EXEC_CONTROL,
vmx_secondary_exec_control(vmx));
}
if (kvm_vcpu_apicv_active(&vmx->vcpu)) {
- vmcs_write64(EOI_EXIT_BITMAP0, 0);
- vmcs_write64(EOI_EXIT_BITMAP1, 0);
- vmcs_write64(EOI_EXIT_BITMAP2, 0);
- vmcs_write64(EOI_EXIT_BITMAP3, 0);
-
- vmcs_write16(GUEST_INTR_STATUS, 0);
+ vmcs_write64(vcpu, EOI_EXIT_BITMAP0, 0);
+ vmcs_write64(vcpu, EOI_EXIT_BITMAP1, 0);
+ vmcs_write64(vcpu, EOI_EXIT_BITMAP2, 0);
+ vmcs_write64(vcpu, EOI_EXIT_BITMAP3, 0);
- vmcs_write16(POSTED_INTR_NV, POSTED_INTR_VECTOR);
- vmcs_write64(POSTED_INTR_DESC_ADDR, __pa((&vmx->pi_desc)));
- }
+ vmcs_write16(vcpu, GUEST_INTR_STATUS, 0);
- if (ple_gap) {
- vmcs_write32(PLE_GAP, ple_gap);
- vmx->ple_window = ple_window;
- vmx->ple_window_dirty = true;
+ //vmcs_write16(vcpu, POSTED_INTR_NV, POSTED_INTR_VECTOR);
+ //vmcs_write64(vcpu, POSTED_INTR_DESC_ADDR, __pa((&vmx->pi_desc)));
}
- vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK, 0);
- vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH, 0);
- vmcs_write32(CR3_TARGET_COUNT, 0); /* 22.2.1 */
+ vmcs_write32(vcpu, PAGE_FAULT_ERROR_CODE_MASK, 0);
+ vmcs_write32(vcpu, PAGE_FAULT_ERROR_CODE_MATCH, 0);
+ vmcs_write32(vcpu, CR3_TARGET_COUNT, 0); /* 22.2.1 */
- vmcs_write16(HOST_FS_SELECTOR, 0); /* 22.2.4 */
- vmcs_write16(HOST_GS_SELECTOR, 0); /* 22.2.4 */
+ vmcs_write16(vcpu, HOST_FS_SELECTOR, 0); /* 22.2.4 */
+ vmcs_write16(vcpu, HOST_GS_SELECTOR, 0); /* 22.2.4 */
vmx_set_constant_host_state(vmx);
#ifdef CONFIG_X86_64
rdmsrl(MSR_FS_BASE, a);
- vmcs_writel(HOST_FS_BASE, a); /* 22.2.4 */
+ vmcs_writel(vcpu, HOST_FS_BASE, a); /* 22.2.4 */
rdmsrl(MSR_GS_BASE, a);
- vmcs_writel(HOST_GS_BASE, a); /* 22.2.4 */
+ vmcs_writel(vcpu, HOST_GS_BASE, a); /* 22.2.4 */
#else
- vmcs_writel(HOST_FS_BASE, 0); /* 22.2.4 */
- vmcs_writel(HOST_GS_BASE, 0); /* 22.2.4 */
+ vmcs_writel(vcpu, HOST_FS_BASE, 0); /* 22.2.4 */
+ vmcs_writel(vcpu, HOST_GS_BASE, 0); /* 22.2.4 */
#endif
- vmcs_write32(VM_EXIT_MSR_STORE_COUNT, 0);
- vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, 0);
- vmcs_write64(VM_EXIT_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.host));
- vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, 0);
- vmcs_write64(VM_ENTRY_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.guest));
+ vmcs_write32(vcpu, VM_EXIT_MSR_STORE_COUNT, 0);
+ vmcs_write32(vcpu, VM_EXIT_MSR_LOAD_COUNT, 0);
+ vmcs_write64(vcpu, VM_EXIT_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.host));
+ vmcs_write32(vcpu, VM_ENTRY_MSR_LOAD_COUNT, 0);
+ vmcs_write64(vcpu, VM_ENTRY_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.guest));
if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT)
- vmcs_write64(GUEST_IA32_PAT, vmx->vcpu.arch.pat);
-
- for (i = 0; i < ARRAY_SIZE(vmx_msr_index); ++i) {
- u32 index = vmx_msr_index[i];
- u32 data_low, data_high;
- int j = vmx->nmsrs;
-
- if (rdmsr_safe(index, &data_low, &data_high) < 0)
- continue;
- if (wrmsr_safe(index, data_low, data_high) < 0)
- continue;
- vmx->guest_msrs[j].index = i;
- vmx->guest_msrs[j].data = 0;
- vmx->guest_msrs[j].mask = -1ull;
- ++vmx->nmsrs;
- }
-
+ vmcs_write64(vcpu, GUEST_IA32_PAT, vmx->vcpu.arch.pat);
vm_exit_controls_init(vmx, vmcs_config.vmexit_ctrl);
/* 22.2.1, 20.8.1 */
vm_entry_controls_init(vmx, vmcs_config.vmentry_ctrl);
- vmcs_writel(CR0_GUEST_HOST_MASK, ~0UL);
+ vmcs_writel(vcpu, CR0_GUEST_HOST_MASK, ~0UL);
set_cr4_guest_host_mask(vmx);
if (vmx_xsaves_supported())
- vmcs_write64(XSS_EXIT_BITMAP, VMX_XSS_EXIT_BITMAP);
+ vmcs_write64(vcpu, XSS_EXIT_BITMAP, VMX_XSS_EXIT_BITMAP);
if (enable_pml) {
ASSERT(vmx->pml_pg);
- vmcs_write64(PML_ADDRESS, page_to_phys(vmx->pml_pg));
- vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1);
+ vmcs_write64(vcpu, PML_ADDRESS, page_to_phys(vmx->pml_pg));
+ vmcs_write16(vcpu, GUEST_PML_INDEX, PML_ENTITY_NUM - 1);
}
return 0;
}
-
static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -5135,72 +3854,68 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
vmx_segment_cache_clear(vmx);
- seg_setup(VCPU_SREG_CS);
- vmcs_write16(GUEST_CS_SELECTOR, 0xf000);
- vmcs_writel(GUEST_CS_BASE, 0xffff0000ul);
+ seg_setup(vcpu, VCPU_SREG_CS);
+ vmcs_write16(vcpu, GUEST_CS_SELECTOR, 0xf000);
+ vmcs_writel(vcpu, GUEST_CS_BASE, 0xffff0000ul);
- seg_setup(VCPU_SREG_DS);
- seg_setup(VCPU_SREG_ES);
- seg_setup(VCPU_SREG_FS);
- seg_setup(VCPU_SREG_GS);
- seg_setup(VCPU_SREG_SS);
+ seg_setup(vcpu, VCPU_SREG_DS);
+ seg_setup(vcpu, VCPU_SREG_ES);
+ seg_setup(vcpu, VCPU_SREG_FS);
+ seg_setup(vcpu, VCPU_SREG_GS);
+ seg_setup(vcpu, VCPU_SREG_SS);
- vmcs_write16(GUEST_TR_SELECTOR, 0);
- vmcs_writel(GUEST_TR_BASE, 0);
- vmcs_write32(GUEST_TR_LIMIT, 0xffff);
- vmcs_write32(GUEST_TR_AR_BYTES, 0x008b);
+ vmcs_write16(vcpu, GUEST_TR_SELECTOR, 0);
+ vmcs_writel(vcpu, GUEST_TR_BASE, 0);
+ vmcs_write32(vcpu, GUEST_TR_LIMIT, 0xffff);
+ vmcs_write32(vcpu, GUEST_TR_AR_BYTES, 0x008b);
- vmcs_write16(GUEST_LDTR_SELECTOR, 0);
- vmcs_writel(GUEST_LDTR_BASE, 0);
- vmcs_write32(GUEST_LDTR_LIMIT, 0xffff);
- vmcs_write32(GUEST_LDTR_AR_BYTES, 0x00082);
+ vmcs_write16(vcpu, GUEST_LDTR_SELECTOR, 0);
+ vmcs_writel(vcpu, GUEST_LDTR_BASE, 0);
+ vmcs_write32(vcpu, GUEST_LDTR_LIMIT, 0xffff);
+ vmcs_write32(vcpu, GUEST_LDTR_AR_BYTES, 0x00082);
if (!init_event) {
- vmcs_write32(GUEST_SYSENTER_CS, 0);
- vmcs_writel(GUEST_SYSENTER_ESP, 0);
- vmcs_writel(GUEST_SYSENTER_EIP, 0);
- vmcs_write64(GUEST_IA32_DEBUGCTL, 0);
+ vmcs_write32(vcpu, GUEST_SYSENTER_CS, 0);
+ vmcs_writel(vcpu, GUEST_SYSENTER_ESP, 0);
+ vmcs_writel(vcpu, GUEST_SYSENTER_EIP, 0);
+ vmcs_write64(vcpu, GUEST_IA32_DEBUGCTL, 0);
}
- vmcs_writel(GUEST_RFLAGS, 0x02);
+ vmcs_writel(vcpu, GUEST_RFLAGS, 0x02);
kvm_rip_write(vcpu, 0xfff0);
- vmcs_writel(GUEST_GDTR_BASE, 0);
- vmcs_write32(GUEST_GDTR_LIMIT, 0xffff);
+ vmcs_writel(vcpu, GUEST_GDTR_BASE, 0);
+ vmcs_write32(vcpu, GUEST_GDTR_LIMIT, 0xffff);
- vmcs_writel(GUEST_IDTR_BASE, 0);
- vmcs_write32(GUEST_IDTR_LIMIT, 0xffff);
+ vmcs_writel(vcpu, GUEST_IDTR_BASE, 0);
+ vmcs_write32(vcpu, GUEST_IDTR_LIMIT, 0xffff);
- vmcs_write32(GUEST_ACTIVITY_STATE, GUEST_ACTIVITY_ACTIVE);
- vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, 0);
- vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS, 0);
+ vmcs_write32(vcpu, GUEST_ACTIVITY_STATE, GUEST_ACTIVITY_ACTIVE);
+ vmcs_write32(vcpu, GUEST_INTERRUPTIBILITY_INFO, 0);
+ vmcs_writel(vcpu, GUEST_PENDING_DBG_EXCEPTIONS, 0);
setup_msrs(vmx);
- vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 0); /* 22.2.1 */
+ vmcs_write32(vcpu, VM_ENTRY_INTR_INFO_FIELD, 0); /* 22.2.1 */
if (cpu_has_vmx_tpr_shadow() && !init_event) {
- vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, 0);
+ vmcs_write64(vcpu, VIRTUAL_APIC_PAGE_ADDR, 0);
if (cpu_need_tpr_shadow(vcpu))
- vmcs_write64(VIRTUAL_APIC_PAGE_ADDR,
+ vmcs_write64(vcpu, VIRTUAL_APIC_PAGE_ADDR,
__pa(vcpu->arch.apic->regs));
- vmcs_write32(TPR_THRESHOLD, 0);
+ vmcs_write32(vcpu, TPR_THRESHOLD, 0);
}
- kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu);
-
- if (kvm_vcpu_apicv_active(vcpu))
- memset(&vmx->pi_desc, 0, sizeof(struct pi_desc));
+ kvm_make_request(GVM_REQ_APIC_PAGE_RELOAD, vcpu);
if (vmx->vpid != 0)
- vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid);
+ vmcs_write16(vcpu, VIRTUAL_PROCESSOR_ID, vmx->vpid);
cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET;
vmx->vcpu.arch.cr0 = cr0;
vmx_set_cr0(vcpu, cr0); /* enter rmode */
vmx_set_cr4(vcpu, 0);
vmx_set_efer(vcpu, 0);
- vmx_fpu_activate(vcpu);
update_exception_bitmap(vcpu);
vpid_sync_context(vmx->vpid);
@@ -5236,9 +3951,9 @@ static void enable_irq_window(struct kvm_vcpu *vcpu)
{
u32 cpu_based_vm_exec_control;
- cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
+ cpu_based_vm_exec_control = vmcs_read32(vcpu, CPU_BASED_VM_EXEC_CONTROL);
cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_INTR_PENDING;
- vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
+ vmcs_write32(vcpu, CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
}
static void enable_nmi_window(struct kvm_vcpu *vcpu)
@@ -5246,14 +3961,14 @@ static void enable_nmi_window(struct kvm_vcpu *vcpu)
u32 cpu_based_vm_exec_control;
if (!cpu_has_virtual_nmis() ||
- vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_STI) {
+ vmcs_read32(vcpu, GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_STI) {
enable_irq_window(vcpu);
return;
}
- cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
+ cpu_based_vm_exec_control = vmcs_read32(vcpu, CPU_BASED_VM_EXEC_CONTROL);
cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_NMI_PENDING;
- vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
+ vmcs_write32(vcpu, CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
}
static void vmx_inject_irq(struct kvm_vcpu *vcpu)
@@ -5262,25 +3977,23 @@ static void vmx_inject_irq(struct kvm_vcpu *vcpu)
uint32_t intr;
int irq = vcpu->arch.interrupt.nr;
- trace_kvm_inj_virq(irq);
-
++vcpu->stat.irq_injections;
if (vmx->rmode.vm86_active) {
int inc_eip = 0;
if (vcpu->arch.interrupt.soft)
inc_eip = vcpu->arch.event_exit_inst_len;
if (kvm_inject_realmode_interrupt(vcpu, irq, inc_eip) != EMULATE_DONE)
- kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
+ kvm_make_request(GVM_REQ_TRIPLE_FAULT, vcpu);
return;
}
intr = irq | INTR_INFO_VALID_MASK;
if (vcpu->arch.interrupt.soft) {
intr |= INTR_TYPE_SOFT_INTR;
- vmcs_write32(VM_ENTRY_INSTRUCTION_LEN,
+ vmcs_write32(vcpu, VM_ENTRY_INSTRUCTION_LEN,
vmx->vcpu.arch.event_exit_inst_len);
} else
intr |= INTR_TYPE_EXT_INTR;
- vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr);
+ vmcs_write32(vcpu, VM_ENTRY_INTR_INFO_FIELD, intr);
}
static void vmx_inject_nmi(struct kvm_vcpu *vcpu)
@@ -5307,11 +4020,11 @@ static void vmx_inject_nmi(struct kvm_vcpu *vcpu)
if (vmx->rmode.vm86_active) {
if (kvm_inject_realmode_interrupt(vcpu, NMI_VECTOR, 0) != EMULATE_DONE)
- kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
+ kvm_make_request(GVM_REQ_TRIPLE_FAULT, vcpu);
return;
}
- vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
+ vmcs_write32(vcpu, VM_ENTRY_INTR_INFO_FIELD,
INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR);
}
@@ -5321,7 +4034,7 @@ static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu)
return to_vmx(vcpu)->soft_vnmi_blocked;
if (to_vmx(vcpu)->nmi_known_unmasked)
return false;
- return vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_NMI;
+ return vmcs_read32(vcpu, GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_NMI;
}
static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
@@ -5336,10 +4049,10 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
} else {
vmx->nmi_known_unmasked = !masked;
if (masked)
- vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
+ vmcs_set_bits(vcpu, GUEST_INTERRUPTIBILITY_INFO,
GUEST_INTR_STATE_NMI);
else
- vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO,
+ vmcs_clear_bits(vcpu, GUEST_INTERRUPTIBILITY_INFO,
GUEST_INTR_STATE_NMI);
}
}
@@ -5352,7 +4065,7 @@ static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
if (!cpu_has_virtual_nmis() && to_vmx(vcpu)->soft_vnmi_blocked)
return 0;
- return !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
+ return !(vmcs_read32(vcpu, GUEST_INTERRUPTIBILITY_INFO) &
(GUEST_INTR_STATE_MOV_SS | GUEST_INTR_STATE_STI
| GUEST_INTR_STATE_NMI));
}
@@ -5360,8 +4073,8 @@ static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu)
{
return (!to_vmx(vcpu)->nested.nested_run_pending &&
- vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) &&
- !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
+ vmcs_readl(vcpu, GUEST_RFLAGS) & X86_EFLAGS_IF) &&
+ !(vmcs_read32(vcpu, GUEST_INTERRUPTIBILITY_INFO) &
(GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS));
}
@@ -5386,13 +4099,13 @@ static bool rmode_exception(struct kvm_vcpu *vcpu, int vec)
* from user space while in guest debugging mode.
*/
to_vmx(vcpu)->vcpu.arch.event_exit_inst_len =
- vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
- if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP)
+ vmcs_read32(vcpu, VM_EXIT_INSTRUCTION_LEN);
+ if (vcpu->guest_debug & GVM_GUESTDBG_USE_SW_BP)
return false;
/* fall through */
case DB_VECTOR:
if (vcpu->guest_debug &
- (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))
+ (GVM_GUESTDBG_SINGLESTEP | GVM_GUESTDBG_USE_HW_BP))
return false;
/* fall through */
case DE_VECTOR:
@@ -5445,14 +4158,14 @@ static int handle_rmode_exception(struct kvm_vcpu *vcpu,
*/
static void kvm_machine_check(void)
{
-#if defined(CONFIG_X86_MCE) && defined(CONFIG_X86_64)
- struct pt_regs regs = {
- .cs = 3, /* Fake ring 3 no matter what the guest ran on */
- .flags = X86_EFLAGS_IF,
- };
+ /*
+ * On an #MC intercept the MCE handler is not called automatically in
+ * the host. So do it by hand here.
+ */
+ __int12();
+ /* not sure if we ever come back to this point */
- do_machine_check(&regs, 0);
-#endif
+ return;
}
static int handle_machine_check(struct kvm_vcpu *vcpu)
@@ -5466,7 +4179,7 @@ static int handle_exception(struct kvm_vcpu *vcpu)
struct vcpu_vmx *vmx = to_vmx(vcpu);
struct kvm_run *kvm_run = vcpu->run;
u32 intr_info, ex_no, error_code;
- unsigned long cr2, rip, dr6;
+ size_t cr2, rip, dr6;
u32 vect_info;
enum emulation_result er;
@@ -5479,11 +4192,6 @@ static int handle_exception(struct kvm_vcpu *vcpu)
if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR)
return 1; /* already handled by vmx_vcpu_run() */
- if (is_no_device(intr_info)) {
- vmx_fpu_activate(vcpu);
- return 1;
- }
-
if (is_invalid_opcode(intr_info)) {
if (is_guest_mode(vcpu)) {
kvm_queue_exception(vcpu, UD_VECTOR);
@@ -5497,7 +4205,7 @@ static int handle_exception(struct kvm_vcpu *vcpu)
error_code = 0;
if (intr_info & INTR_INFO_DELIVER_CODE_MASK)
- error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE);
+ error_code = vmcs_read32(vcpu, VM_EXIT_INTR_ERROR_CODE);
/*
* The #PF with PFEC.RSVD = 1 indicates the guest is accessing
@@ -5506,8 +4214,8 @@ static int handle_exception(struct kvm_vcpu *vcpu)
*/
if ((vect_info & VECTORING_INFO_VALID_MASK) &&
!(is_page_fault(intr_info) && !(error_code & PFERR_RSVD_MASK))) {
- vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
- vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_SIMUL_EX;
+ vcpu->run->exit_reason = GVM_EXIT_INTERNAL_ERROR;
+ vcpu->run->internal.suberror = GVM_INTERNAL_ERROR_SIMUL_EX;
vcpu->run->internal.ndata = 3;
vcpu->run->internal.data[0] = vect_info;
vcpu->run->internal.data[1] = intr_info;
@@ -5518,8 +4226,7 @@ static int handle_exception(struct kvm_vcpu *vcpu)
if (is_page_fault(intr_info)) {
/* EPT won't cause page fault directly */
BUG_ON(enable_ept);
- cr2 = vmcs_readl(EXIT_QUALIFICATION);
- trace_kvm_page_fault(cr2, error_code);
+ cr2 = vmcs_readl(vcpu, EXIT_QUALIFICATION);
if (kvm_event_needs_reinjection(vcpu))
kvm_mmu_unprotect_page_virt(vcpu, cr2);
@@ -5536,9 +4243,9 @@ static int handle_exception(struct kvm_vcpu *vcpu)
kvm_queue_exception_e(vcpu, AC_VECTOR, error_code);
return 1;
case DB_VECTOR:
- dr6 = vmcs_readl(EXIT_QUALIFICATION);
+ dr6 = vmcs_readl(vcpu, EXIT_QUALIFICATION);
if (!(vcpu->guest_debug &
- (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))) {
+ (GVM_GUESTDBG_SINGLESTEP | GVM_GUESTDBG_USE_HW_BP))) {
vcpu->arch.dr6 &= ~15;
vcpu->arch.dr6 |= dr6 | DR6_RTM;
if (!(dr6 & ~DR6_RESERVED)) /* icebp */
@@ -5548,7 +4255,7 @@ static int handle_exception(struct kvm_vcpu *vcpu)
return 1;
}
kvm_run->debug.arch.dr6 = dr6 | DR6_FIXED_1;
- kvm_run->debug.arch.dr7 = vmcs_readl(GUEST_DR7);
+ kvm_run->debug.arch.dr7 = vmcs_readl(vcpu, GUEST_DR7);
/* fall through */
case BP_VECTOR:
/*
@@ -5557,14 +4264,14 @@ static int handle_exception(struct kvm_vcpu *vcpu)
* #DB as well causes no harm, it is not used in that case.
*/
vmx->vcpu.arch.event_exit_inst_len =
- vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
- kvm_run->exit_reason = KVM_EXIT_DEBUG;
+ vmcs_read32(vcpu, VM_EXIT_INSTRUCTION_LEN);
+ kvm_run->exit_reason = GVM_EXIT_DEBUG;
rip = kvm_rip_read(vcpu);
- kvm_run->debug.arch.pc = vmcs_readl(GUEST_CS_BASE) + rip;
+ kvm_run->debug.arch.pc = vmcs_readl(vcpu, GUEST_CS_BASE) + rip;
kvm_run->debug.arch.exception = ex_no;
break;
default:
- kvm_run->exit_reason = KVM_EXIT_EXCEPTION;
+ kvm_run->exit_reason = GVM_EXIT_EXCEPTION;
kvm_run->ex.exception = ex_no;
kvm_run->ex.error_code = error_code;
break;
@@ -5580,17 +4287,17 @@ static int handle_external_interrupt(struct kvm_vcpu *vcpu)
static int handle_triple_fault(struct kvm_vcpu *vcpu)
{
- vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN;
+ vcpu->run->exit_reason = GVM_EXIT_SHUTDOWN;
return 0;
}
static int handle_io(struct kvm_vcpu *vcpu)
{
- unsigned long exit_qualification;
+ size_t exit_qualification;
int size, in, string;
unsigned port;
- exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
+ exit_qualification = vmcs_readl(vcpu, EXIT_QUALIFICATION);
string = (exit_qualification & 16) != 0;
in = (exit_qualification & 8) != 0;
@@ -5606,20 +4313,9 @@ static int handle_io(struct kvm_vcpu *vcpu)
return kvm_fast_pio_out(vcpu, size, port);
}
-static void
-vmx_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall)
+static bool nested_cr0_valid(struct kvm_vcpu *vcpu, size_t val)
{
- /*
- * Patch in the VMCALL instruction:
- */
- hypercall[0] = 0x0f;
- hypercall[1] = 0x01;
- hypercall[2] = 0xc1;
-}
-
-static bool nested_cr0_valid(struct kvm_vcpu *vcpu, unsigned long val)
-{
- unsigned long always_on = VMXON_CR0_ALWAYSON;
+ size_t always_on = VMXON_CR0_ALWAYSON;
struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
if (to_vmx(vcpu)->nested.nested_vmx_secondary_ctls_high &
@@ -5630,11 +4326,11 @@ static bool nested_cr0_valid(struct kvm_vcpu *vcpu, unsigned long val)
}
/* called to set cr0 as appropriate for a mov-to-cr0 exit. */
-static int handle_set_cr0(struct kvm_vcpu *vcpu, unsigned long val)
+static int handle_set_cr0(struct kvm_vcpu *vcpu, size_t val)
{
if (is_guest_mode(vcpu)) {
struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
- unsigned long orig_val = val;
+ size_t orig_val = val;
/*
* We get here when L2 changed cr0 in a way that did not change
@@ -5652,7 +4348,7 @@ static int handle_set_cr0(struct kvm_vcpu *vcpu, unsigned long val)
if (kvm_set_cr0(vcpu, val))
return 1;
- vmcs_writel(CR0_READ_SHADOW, orig_val);
+ vmcs_writel(vcpu, CR0_READ_SHADOW, orig_val);
return 0;
} else {
if (to_vmx(vcpu)->nested.vmxon &&
@@ -5662,53 +4358,36 @@ static int handle_set_cr0(struct kvm_vcpu *vcpu, unsigned long val)
}
}
-static int handle_set_cr4(struct kvm_vcpu *vcpu, unsigned long val)
+static int handle_set_cr4(struct kvm_vcpu *vcpu, size_t val)
{
if (is_guest_mode(vcpu)) {
struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
- unsigned long orig_val = val;
+ size_t orig_val = val;
/* analogously to handle_set_cr0 */
val = (val & ~vmcs12->cr4_guest_host_mask) |
(vmcs12->guest_cr4 & vmcs12->cr4_guest_host_mask);
if (kvm_set_cr4(vcpu, val))
return 1;
- vmcs_writel(CR4_READ_SHADOW, orig_val);
+ vmcs_writel(vcpu, CR4_READ_SHADOW, orig_val);
return 0;
} else
return kvm_set_cr4(vcpu, val);
}
-/* called to set cr0 as appropriate for clts instruction exit. */
-static void handle_clts(struct kvm_vcpu *vcpu)
-{
- if (is_guest_mode(vcpu)) {
- /*
- * We get here when L2 did CLTS, and L1 didn't shadow CR0.TS
- * but we did (!fpu_active). We need to keep GUEST_CR0.TS on,
- * just pretend it's off (also in arch.cr0 for fpu_activate).
- */
- vmcs_writel(CR0_READ_SHADOW,
- vmcs_readl(CR0_READ_SHADOW) & ~X86_CR0_TS);
- vcpu->arch.cr0 &= ~X86_CR0_TS;
- } else
- vmx_set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~X86_CR0_TS));
-}
-
static int handle_cr(struct kvm_vcpu *vcpu)
{
- unsigned long exit_qualification, val;
+ size_t exit_qualification, val;
int cr;
int reg;
int err;
- exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
+ exit_qualification = vmcs_readl(vcpu, EXIT_QUALIFICATION);
cr = exit_qualification & 15;
reg = (exit_qualification >> 8) & 15;
switch ((exit_qualification >> 4) & 3) {
case 0: /* mov to cr */
val = kvm_register_readl(vcpu, reg);
- trace_kvm_cr_write(cr, val);
switch (cr) {
case 0:
err = handle_set_cr0(vcpu, val);
@@ -5731,36 +4410,31 @@ static int handle_cr(struct kvm_vcpu *vcpu)
return 1;
if (cr8_prev <= cr8)
return 1;
- vcpu->run->exit_reason = KVM_EXIT_SET_TPR;
+ vcpu->run->exit_reason = GVM_EXIT_SET_TPR;
return 0;
}
}
break;
case 2: /* clts */
- handle_clts(vcpu);
- trace_kvm_cr_write(0, kvm_read_cr0(vcpu));
+ vmx_set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~X86_CR0_TS));
skip_emulated_instruction(vcpu);
- vmx_fpu_activate(vcpu);
return 1;
case 1: /*mov from cr*/
switch (cr) {
case 3:
val = kvm_read_cr3(vcpu);
kvm_register_write(vcpu, reg, val);
- trace_kvm_cr_read(cr, val);
skip_emulated_instruction(vcpu);
return 1;
case 8:
val = kvm_get_cr8(vcpu);
kvm_register_write(vcpu, reg, val);
- trace_kvm_cr_read(cr, val);
skip_emulated_instruction(vcpu);
return 1;
}
break;
case 3: /* lmsw */
val = (exit_qualification >> LMSW_SOURCE_DATA_SHIFT) & 0x0f;
- trace_kvm_cr_write(0, (kvm_read_cr0(vcpu) & ~0xful) | val);
kvm_lmsw(vcpu, val);
skip_emulated_instruction(vcpu);
@@ -5776,10 +4450,10 @@ static int handle_cr(struct kvm_vcpu *vcpu)
static int handle_dr(struct kvm_vcpu *vcpu)
{
- unsigned long exit_qualification;
+ size_t exit_qualification;
int dr, dr7, reg;
- exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
+ exit_qualification = vmcs_readl(vcpu, EXIT_QUALIFICATION);
dr = exit_qualification & DEBUG_REG_ACCESS_NUM;
/* First, if DR does not exist, trigger UD */
@@ -5789,19 +4463,19 @@ static int handle_dr(struct kvm_vcpu *vcpu)
/* Do not handle if the CPL > 0, will trigger GP on re-entry */
if (!kvm_require_cpl(vcpu, 0))
return 1;
- dr7 = vmcs_readl(GUEST_DR7);
+ dr7 = vmcs_readl(vcpu, GUEST_DR7);
if (dr7 & DR7_GD) {
/*
* As the vm-exit takes precedence over the debug trap, we
* need to emulate the latter, either for the host or the
* guest debugging itself.
*/
- if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) {
+ if (vcpu->guest_debug & GVM_GUESTDBG_USE_HW_BP) {
vcpu->run->debug.arch.dr6 = vcpu->arch.dr6;
vcpu->run->debug.arch.dr7 = dr7;
vcpu->run->debug.arch.pc = kvm_get_linear_rip(vcpu);
vcpu->run->debug.arch.exception = DB_VECTOR;
- vcpu->run->exit_reason = KVM_EXIT_DEBUG;
+ vcpu->run->exit_reason = GVM_EXIT_DEBUG;
return 0;
} else {
vcpu->arch.dr6 &= ~15;
@@ -5812,7 +4486,7 @@ static int handle_dr(struct kvm_vcpu *vcpu)
}
if (vcpu->guest_debug == 0) {
- vmcs_clear_bits(CPU_BASED_VM_EXEC_CONTROL,
+ vmcs_clear_bits(vcpu, CPU_BASED_VM_EXEC_CONTROL,
CPU_BASED_MOV_DR_EXITING);
/*
@@ -5820,13 +4494,13 @@ static int handle_dr(struct kvm_vcpu *vcpu)
* and reenter on this instruction. The next vmexit will
* retrieve the full state of the debug registers.
*/
- vcpu->arch.switch_db_regs |= KVM_DEBUGREG_WONT_EXIT;
+ vcpu->arch.switch_db_regs |= GVM_DEBUGREG_WONT_EXIT;
return 1;
}
reg = DEBUG_REG_ACCESS_REG(exit_qualification);
if (exit_qualification & TYPE_MOV_FROM_DR) {
- unsigned long val;
+ size_t val;
if (kvm_get_dr(vcpu, dr, &val))
return 1;
@@ -5844,7 +4518,7 @@ static u64 vmx_get_dr6(struct kvm_vcpu *vcpu)
return vcpu->arch.dr6;
}
-static void vmx_set_dr6(struct kvm_vcpu *vcpu, unsigned long val)
+static void vmx_set_dr6(struct kvm_vcpu *vcpu, size_t val)
{
}
@@ -5855,15 +4529,15 @@ static void vmx_sync_dirty_debug_regs(struct kvm_vcpu *vcpu)
get_debugreg(vcpu->arch.db[2], 2);
get_debugreg(vcpu->arch.db[3], 3);
get_debugreg(vcpu->arch.dr6, 6);
- vcpu->arch.dr7 = vmcs_readl(GUEST_DR7);
+ vcpu->arch.dr7 = vmcs_readl(vcpu, GUEST_DR7);
- vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_WONT_EXIT;
- vmcs_set_bits(CPU_BASED_VM_EXEC_CONTROL, CPU_BASED_MOV_DR_EXITING);
+ vcpu->arch.switch_db_regs &= ~GVM_DEBUGREG_WONT_EXIT;
+ vmcs_set_bits(vcpu, CPU_BASED_VM_EXEC_CONTROL, CPU_BASED_MOV_DR_EXITING);
}
-static void vmx_set_dr7(struct kvm_vcpu *vcpu, unsigned long val)
+static void vmx_set_dr7(struct kvm_vcpu *vcpu, size_t val)
{
- vmcs_writel(GUEST_DR7, val);
+ vmcs_writel(vcpu, GUEST_DR7, val);
}
static int handle_cpuid(struct kvm_vcpu *vcpu)
@@ -5880,16 +4554,13 @@ static int handle_rdmsr(struct kvm_vcpu *vcpu)
msr_info.index = ecx;
msr_info.host_initiated = false;
if (vmx_get_msr(vcpu, &msr_info)) {
- trace_kvm_msr_read_ex(ecx);
kvm_inject_gp(vcpu, 0);
return 1;
}
- trace_kvm_msr_read(ecx, msr_info.data);
-
/* FIXME: handling of bits 32:63 of rax, rdx */
- vcpu->arch.regs[VCPU_REGS_RAX] = msr_info.data & -1u;
- vcpu->arch.regs[VCPU_REGS_RDX] = (msr_info.data >> 32) & -1u;
+ vcpu->arch.regs[VCPU_REGS_RAX] = msr_info.data & (unsigned)-1;
+ vcpu->arch.regs[VCPU_REGS_RDX] = (msr_info.data >> 32) & (unsigned)-1;
skip_emulated_instruction(vcpu);
return 1;
}
@@ -5898,26 +4569,24 @@ static int handle_wrmsr(struct kvm_vcpu *vcpu)
{
struct msr_data msr;
u32 ecx = vcpu->arch.regs[VCPU_REGS_RCX];
- u64 data = (vcpu->arch.regs[VCPU_REGS_RAX] & -1u)
- | ((u64)(vcpu->arch.regs[VCPU_REGS_RDX] & -1u) << 32);
+ u64 data = (vcpu->arch.regs[VCPU_REGS_RAX] & (unsigned)-1)
+ | ((u64)(vcpu->arch.regs[VCPU_REGS_RDX] & (unsigned)-1) << 32);
msr.data = data;
msr.index = ecx;
msr.host_initiated = false;
if (kvm_set_msr(vcpu, &msr) != 0) {
- trace_kvm_msr_write_ex(ecx, data);
kvm_inject_gp(vcpu, 0);
return 1;
}
- trace_kvm_msr_write(ecx, data);
skip_emulated_instruction(vcpu);
return 1;
}
static int handle_tpr_below_threshold(struct kvm_vcpu *vcpu)
{
- kvm_make_request(KVM_REQ_EVENT, vcpu);
+ kvm_make_request(GVM_REQ_EVENT, vcpu);
return 1;
}
@@ -5926,11 +4595,11 @@ static int handle_interrupt_window(struct kvm_vcpu *vcpu)
u32 cpu_based_vm_exec_control;
/* clear pending irq */
- cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
+ cpu_based_vm_exec_control = vmcs_read32(vcpu, CPU_BASED_VM_EXEC_CONTROL);
cpu_based_vm_exec_control &= ~CPU_BASED_VIRTUAL_INTR_PENDING;
- vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
+ vmcs_write32(vcpu, CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
- kvm_make_request(KVM_REQ_EVENT, vcpu);
+ kvm_make_request(GVM_REQ_EVENT, vcpu);
++vcpu->stat.irq_window_exits;
return 1;
@@ -5941,11 +4610,6 @@ static int handle_halt(struct kvm_vcpu *vcpu)
return kvm_emulate_halt(vcpu);
}
-static int handle_vmcall(struct kvm_vcpu *vcpu)
-{
- return kvm_emulate_hypercall(vcpu);
-}
-
static int handle_invd(struct kvm_vcpu *vcpu)
{
return emulate_instruction(vcpu, 0) == EMULATE_DONE;
@@ -5953,7 +4617,7 @@ static int handle_invd(struct kvm_vcpu *vcpu)
static int handle_invlpg(struct kvm_vcpu *vcpu)
{
- unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
+ size_t exit_qualification = vmcs_readl(vcpu, EXIT_QUALIFICATION);
kvm_mmu_invlpg(vcpu, exit_qualification);
skip_emulated_instruction(vcpu);
@@ -5962,10 +4626,12 @@ static int handle_invlpg(struct kvm_vcpu *vcpu)
static int handle_rdpmc(struct kvm_vcpu *vcpu)
{
+#if 0
int err;
err = kvm_rdpmc(vcpu);
kvm_complete_insn_gp(vcpu, err);
+#endif
return 1;
}
@@ -5989,21 +4655,21 @@ static int handle_xsetbv(struct kvm_vcpu *vcpu)
static int handle_xsaves(struct kvm_vcpu *vcpu)
{
skip_emulated_instruction(vcpu);
- WARN(1, "this should never happen\n");
+ //WARN(1, "this should never happen\n");
return 1;
}
static int handle_xrstors(struct kvm_vcpu *vcpu)
{
skip_emulated_instruction(vcpu);
- WARN(1, "this should never happen\n");
+ //WARN(1, "this should never happen\n");
return 1;
}
static int handle_apic_access(struct kvm_vcpu *vcpu)
{
if (likely(fasteoi)) {
- unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
+ size_t exit_qualification = vmcs_readl(vcpu, EXIT_QUALIFICATION);
int access_type, offset;
access_type = exit_qualification & APIC_ACCESS_TYPE;
@@ -6025,7 +4691,7 @@ static int handle_apic_access(struct kvm_vcpu *vcpu)
static int handle_apic_eoi_induced(struct kvm_vcpu *vcpu)
{
- unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
+ size_t exit_qualification = vmcs_readl(vcpu, EXIT_QUALIFICATION);
int vector = exit_qualification & 0xff;
/* EOI-induced VM exit is trap-like and thus no need to adjust IP */
@@ -6035,7 +4701,7 @@ static int handle_apic_eoi_induced(struct kvm_vcpu *vcpu)
static int handle_apic_write(struct kvm_vcpu *vcpu)
{
- unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
+ size_t exit_qualification = vmcs_readl(vcpu, EXIT_QUALIFICATION);
u32 offset = exit_qualification & 0xfff;
/* APIC-write VM exit is trap-like and thus no need to adjust IP */
@@ -6046,7 +4712,7 @@ static int handle_apic_write(struct kvm_vcpu *vcpu)
static int handle_task_switch(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
- unsigned long exit_qualification;
+ size_t exit_qualification;
bool has_error_code = false;
u32 error_code = 0;
u16 tss_selector;
@@ -6056,7 +4722,7 @@ static int handle_task_switch(struct kvm_vcpu *vcpu)
idt_index = (vmx->idt_vectoring_info & VECTORING_INFO_VECTOR_MASK);
type = (vmx->idt_vectoring_info & VECTORING_INFO_TYPE_MASK);
- exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
+ exit_qualification = vmcs_readl(vcpu, EXIT_QUALIFICATION);
reason = (u32)exit_qualification >> 30;
if (reason == TASK_SWITCH_GATE && idt_v) {
@@ -6074,7 +4740,7 @@ static int handle_task_switch(struct kvm_vcpu *vcpu)
VECTORING_INFO_DELIVER_CODE_MASK) {
has_error_code = true;
error_code =
- vmcs_read32(IDT_VECTORING_ERROR_CODE);
+ vmcs_read32(vcpu, IDT_VECTORING_ERROR_CODE);
}
/* fall through */
case INTR_TYPE_SOFT_EXCEPTION:
@@ -6094,8 +4760,8 @@ static int handle_task_switch(struct kvm_vcpu *vcpu)
if (kvm_task_switch(vcpu, tss_selector,
type == INTR_TYPE_SOFT_INTR ? idt_index : -1, reason,
has_error_code, error_code) == EMULATE_FAIL) {
- vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
- vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
+ vcpu->run->exit_reason = GVM_EXIT_INTERNAL_ERROR;
+ vcpu->run->internal.suberror = GVM_INTERNAL_ERROR_EMULATION;
vcpu->run->internal.ndata = 0;
return 0;
}
@@ -6110,22 +4776,22 @@ static int handle_task_switch(struct kvm_vcpu *vcpu)
static int handle_ept_violation(struct kvm_vcpu *vcpu)
{
- unsigned long exit_qualification;
+ size_t exit_qualification;
gpa_t gpa;
u32 error_code;
int gla_validity;
- exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
+ exit_qualification = vmcs_readl(vcpu, EXIT_QUALIFICATION);
gla_validity = (exit_qualification >> 7) & 0x3;
if (gla_validity == 0x2) {
printk(KERN_ERR "EPT: Handling EPT violation failed!\n");
printk(KERN_ERR "EPT: GPA: 0x%lx, GVA: 0x%lx\n",
- (long unsigned int)vmcs_read64(GUEST_PHYSICAL_ADDRESS),
- vmcs_readl(GUEST_LINEAR_ADDRESS));
+ (long unsigned int)vmcs_read64(vcpu, GUEST_PHYSICAL_ADDRESS),
+ vmcs_readl(vcpu, GUEST_LINEAR_ADDRESS));
printk(KERN_ERR "EPT: Exit qualification is 0x%lx\n",
(long unsigned int)exit_qualification);
- vcpu->run->exit_reason = KVM_EXIT_UNKNOWN;
+ vcpu->run->exit_reason = GVM_EXIT_UNKNOWN;
vcpu->run->hw.hardware_exit_reason = EXIT_REASON_EPT_VIOLATION;
return 0;
}
@@ -6139,10 +4805,9 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu)
if (!(to_vmx(vcpu)->idt_vectoring_info & VECTORING_INFO_VALID_MASK) &&
cpu_has_virtual_nmis() &&
(exit_qualification & INTR_INFO_UNBLOCK_NMI))
- vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, GUEST_INTR_STATE_NMI);
+ vmcs_set_bits(vcpu, GUEST_INTERRUPTIBILITY_INFO, GUEST_INTR_STATE_NMI);
- gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS);
- trace_kvm_page_fault(gpa, exit_qualification);
+ gpa = vmcs_read64(vcpu, GUEST_PHYSICAL_ADDRESS);
/* it is a read fault? */
error_code = (exit_qualification << 2) & PFERR_USER_MASK;
@@ -6163,10 +4828,9 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu)
int ret;
gpa_t gpa;
- gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS);
- if (!kvm_io_bus_write(vcpu, KVM_FAST_MMIO_BUS, gpa, 0, NULL)) {
+ gpa = vmcs_read64(vcpu, GUEST_PHYSICAL_ADDRESS);
+ if (!kvm_io_bus_write(vcpu, GVM_FAST_MMIO_BUS, gpa, 0, NULL)) {
skip_emulated_instruction(vcpu);
- trace_kvm_fast_mmio(gpa);
return 1;
}
@@ -6184,7 +4848,7 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu)
/* It is the real ept misconfig */
WARN_ON(1);
- vcpu->run->exit_reason = KVM_EXIT_UNKNOWN;
+ vcpu->run->exit_reason = GVM_EXIT_UNKNOWN;
vcpu->run->hw.hardware_exit_reason = EXIT_REASON_EPT_MISCONFIG;
return 0;
@@ -6195,11 +4859,11 @@ static int handle_nmi_window(struct kvm_vcpu *vcpu)
u32 cpu_based_vm_exec_control;
/* clear pending NMI */
- cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
+ cpu_based_vm_exec_control = vmcs_read32(vcpu, CPU_BASED_VM_EXEC_CONTROL);
cpu_based_vm_exec_control &= ~CPU_BASED_VIRTUAL_NMI_PENDING;
- vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
+ vmcs_write32(vcpu, CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
++vcpu->stat.nmi_window_exits;
- kvm_make_request(KVM_REQ_EVENT, vcpu);
+ kvm_make_request(GVM_REQ_EVENT, vcpu);
return 1;
}
@@ -6213,14 +4877,14 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)
bool intr_window_requested;
unsigned count = 130;
- cpu_exec_ctrl = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
+ cpu_exec_ctrl = vmcs_read32(vcpu, CPU_BASED_VM_EXEC_CONTROL);
intr_window_requested = cpu_exec_ctrl & CPU_BASED_VIRTUAL_INTR_PENDING;
while (vmx->emulation_required && count-- != 0) {
if (intr_window_requested && vmx_interrupt_allowed(vcpu))
return handle_interrupt_window(&vmx->vcpu);
- if (test_bit(KVM_REQ_EVENT, &vcpu->requests))
+ if (test_bit(GVM_REQ_EVENT, &vcpu->requests))
return 1;
err = emulate_instruction(vcpu, EMULTYPE_NO_REEXECUTE);
@@ -6232,8 +4896,8 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)
}
if (err != EMULATE_DONE) {
- vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
- vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
+ vcpu->run->exit_reason = GVM_EXIT_INTERNAL_ERROR;
+ vcpu->run->internal.suberror = GVM_INTERNAL_ERROR_EMULATION;
vcpu->run->internal.ndata = 0;
return 0;
}
@@ -6244,155 +4908,65 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)
goto out;
}
+#if 0
if (signal_pending(current))
goto out;
if (need_resched())
schedule();
+#endif
}
out:
return ret;
}
-static int __grow_ple_window(int val)
-{
- if (ple_window_grow < 1)
- return ple_window;
-
- val = min(val, ple_window_actual_max);
-
- if (ple_window_grow < ple_window)
- val *= ple_window_grow;
- else
- val += ple_window_grow;
-
- return val;
-}
-
-static int __shrink_ple_window(int val, int modifier, int minimum)
-{
- if (modifier < 1)
- return ple_window;
-
- if (modifier < ple_window)
- val /= modifier;
- else
- val -= modifier;
-
- return max(val, minimum);
-}
-
-static void grow_ple_window(struct kvm_vcpu *vcpu)
-{
- struct vcpu_vmx *vmx = to_vmx(vcpu);
- int old = vmx->ple_window;
-
- vmx->ple_window = __grow_ple_window(old);
-
- if (vmx->ple_window != old)
- vmx->ple_window_dirty = true;
-
- trace_kvm_ple_window_grow(vcpu->vcpu_id, vmx->ple_window, old);
-}
-
-static void shrink_ple_window(struct kvm_vcpu *vcpu)
+static int hardware_setup(void)
{
- struct vcpu_vmx *vmx = to_vmx(vcpu);
- int old = vmx->ple_window;
-
- vmx->ple_window = __shrink_ple_window(old,
- ple_window_shrink, ple_window);
-
- if (vmx->ple_window != old)
- vmx->ple_window_dirty = true;
-
- trace_kvm_ple_window_shrink(vcpu->vcpu_id, vmx->ple_window, old);
-}
-
-/*
- * ple_window_actual_max is computed to be one grow_ple_window() below
- * ple_window_max. (See __grow_ple_window for the reason.)
- * This prevents overflows, because ple_window_max is int.
- * ple_window_max effectively rounded down to a multiple of ple_window_grow in
- * this process.
- * ple_window_max is also prevented from setting vmx->ple_window < ple_window.
- */
-static void update_ple_window_actual_max(void)
-{
- ple_window_actual_max =
- __shrink_ple_window(max(ple_window_max, ple_window),
- ple_window_grow, INT_MIN);
-}
-
-/*
- * Handler for POSTED_INTERRUPT_WAKEUP_VECTOR.
- */
-static void wakeup_handler(void)
-{
- struct kvm_vcpu *vcpu;
- int cpu = smp_processor_id();
-
- spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, cpu));
- list_for_each_entry(vcpu, &per_cpu(blocked_vcpu_on_cpu, cpu),
- blocked_vcpu_list) {
- struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
-
- if (pi_test_on(pi_desc) == 1)
- kvm_vcpu_kick(vcpu);
- }
- spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, cpu));
-}
-
-static __init int hardware_setup(void)
-{
- int r = -ENOMEM, i, msr;
+ int r = -ENOMEM, msr;
rdmsrl_safe(MSR_EFER, &host_efer);
- for (i = 0; i < ARRAY_SIZE(vmx_msr_index); ++i)
- kvm_define_shared_msr(i, vmx_msr_index[i]);
-
- vmx_io_bitmap_a = (unsigned long *)__get_free_page(GFP_KERNEL);
+ vmx_io_bitmap_a = (size_t *)__get_free_page(GFP_KERNEL);
if (!vmx_io_bitmap_a)
return r;
- vmx_io_bitmap_b = (unsigned long *)__get_free_page(GFP_KERNEL);
+ vmx_io_bitmap_b = (size_t *)__get_free_page(GFP_KERNEL);
if (!vmx_io_bitmap_b)
goto out;
- vmx_msr_bitmap_legacy = (unsigned long *)__get_free_page(GFP_KERNEL);
+ vmx_msr_bitmap_legacy = (size_t *)__get_free_page(GFP_KERNEL);
if (!vmx_msr_bitmap_legacy)
goto out1;
vmx_msr_bitmap_legacy_x2apic =
- (unsigned long *)__get_free_page(GFP_KERNEL);
+ (size_t *)__get_free_page(GFP_KERNEL);
if (!vmx_msr_bitmap_legacy_x2apic)
goto out2;
vmx_msr_bitmap_legacy_x2apic_apicv_inactive =
- (unsigned long *)__get_free_page(GFP_KERNEL);
+ (size_t *)__get_free_page(GFP_KERNEL);
if (!vmx_msr_bitmap_legacy_x2apic_apicv_inactive)
goto out3;
- vmx_msr_bitmap_longmode = (unsigned long *)__get_free_page(GFP_KERNEL);
+ vmx_msr_bitmap_longmode = (size_t *)__get_free_page(GFP_KERNEL);
if (!vmx_msr_bitmap_longmode)
goto out4;
vmx_msr_bitmap_longmode_x2apic =
- (unsigned long *)__get_free_page(GFP_KERNEL);
+ (size_t *)__get_free_page(GFP_KERNEL);
if (!vmx_msr_bitmap_longmode_x2apic)
goto out5;
vmx_msr_bitmap_longmode_x2apic_apicv_inactive =
- (unsigned long *)__get_free_page(GFP_KERNEL);
+ (size_t *)__get_free_page(GFP_KERNEL);
if (!vmx_msr_bitmap_longmode_x2apic_apicv_inactive)
goto out6;
- vmx_vmread_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL);
+ vmx_vmread_bitmap = (size_t *)__get_free_page(GFP_KERNEL);
if (!vmx_vmread_bitmap)
goto out7;
- vmx_vmwrite_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL);
+ vmx_vmwrite_bitmap = (size_t *)__get_free_page(GFP_KERNEL);
if (!vmx_vmwrite_bitmap)
goto out8;
@@ -6421,10 +4995,6 @@ static __init int hardware_setup(void)
if (!cpu_has_vmx_vpid())
enable_vpid = 0;
- if (!cpu_has_vmx_shadow_vmcs())
- enable_shadow_vmcs = 0;
- if (enable_shadow_vmcs)
- init_vmcs_shadow_fields();
if (!cpu_has_vmx_ept() ||
!cpu_has_vmx_ept_4levels()) {
@@ -6453,28 +5023,15 @@ static __init int hardware_setup(void)
if (!cpu_has_vmx_tpr_shadow())
kvm_x86_ops->update_cr8_intercept = NULL;
- if (enable_ept && !cpu_has_vmx_ept_2m_page())
- kvm_disable_largepages();
-
- if (!cpu_has_vmx_ple())
- ple_gap = 0;
-
if (!cpu_has_vmx_apicv())
enable_apicv = 0;
- if (cpu_has_vmx_tsc_scaling()) {
- kvm_has_tsc_control = true;
- kvm_max_tsc_scaling_ratio = KVM_VMX_TSC_MULTIPLIER_MAX;
- kvm_tsc_scaling_ratio_frac_bits = 48;
- }
-
vmx_disable_intercept_for_msr(MSR_FS_BASE, false);
vmx_disable_intercept_for_msr(MSR_GS_BASE, false);
vmx_disable_intercept_for_msr(MSR_KERNEL_GS_BASE, true);
vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_CS, false);
vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP, false);
vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false);
- vmx_disable_intercept_for_msr(MSR_IA32_BNDCFGS, true);
memcpy(vmx_msr_bitmap_legacy_x2apic,
vmx_msr_bitmap_legacy, PAGE_SIZE);
@@ -6522,8 +5079,6 @@ static __init int hardware_setup(void)
} else
kvm_disable_tdp();
- update_ple_window_actual_max();
-
/*
* Only enable PML when hardware supports PML feature, and both EPT
* and EPT A/D bit features are enabled -- PML depends on them to work.
@@ -6538,78 +5093,50 @@ static __init int hardware_setup(void)
kvm_x86_ops->enable_log_dirty_pt_masked = NULL;
}
- if (cpu_has_vmx_preemption_timer() && enable_preemption_timer) {
- u64 vmx_msr;
-
- rdmsrl(MSR_IA32_VMX_MISC, vmx_msr);
- cpu_preemption_timer_multi =
- vmx_msr & VMX_MISC_PREEMPTION_TIMER_RATE_MASK;
- } else {
- kvm_x86_ops->set_hv_timer = NULL;
- kvm_x86_ops->cancel_hv_timer = NULL;
- }
-
- kvm_set_posted_intr_wakeup_handler(wakeup_handler);
-
- kvm_mce_cap_supported |= MCG_LMCE_P;
+ //kvm_set_posted_intr_wakeup_handler(wakeup_handler);
return alloc_kvm_area();
out9:
- free_page((unsigned long)vmx_vmwrite_bitmap);
+ free_page((size_t)vmx_vmwrite_bitmap);
out8:
- free_page((unsigned long)vmx_vmread_bitmap);
+ free_page((size_t)vmx_vmread_bitmap);
out7:
- free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic_apicv_inactive);
+ free_page((size_t)vmx_msr_bitmap_longmode_x2apic_apicv_inactive);
out6:
- free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic);
+ free_page((size_t)vmx_msr_bitmap_longmode_x2apic);
out5:
- free_page((unsigned long)vmx_msr_bitmap_longmode);
+ free_page((size_t)vmx_msr_bitmap_longmode);
out4:
- free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic_apicv_inactive);
+ free_page((size_t)vmx_msr_bitmap_legacy_x2apic_apicv_inactive);
out3:
- free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic);
+ free_page((size_t)vmx_msr_bitmap_legacy_x2apic);
out2:
- free_page((unsigned long)vmx_msr_bitmap_legacy);
+ free_page((size_t)vmx_msr_bitmap_legacy);
out1:
- free_page((unsigned long)vmx_io_bitmap_b);
+ free_page((size_t)vmx_io_bitmap_b);
out:
- free_page((unsigned long)vmx_io_bitmap_a);
+ free_page((size_t)vmx_io_bitmap_a);
return r;
}
-static __exit void hardware_unsetup(void)
+static void hardware_unsetup(void)
{
- free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic);
- free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic_apicv_inactive);
- free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic);
- free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic_apicv_inactive);
- free_page((unsigned long)vmx_msr_bitmap_legacy);
- free_page((unsigned long)vmx_msr_bitmap_longmode);
- free_page((unsigned long)vmx_io_bitmap_b);
- free_page((unsigned long)vmx_io_bitmap_a);
- free_page((unsigned long)vmx_vmwrite_bitmap);
- free_page((unsigned long)vmx_vmread_bitmap);
+ free_page((size_t)vmx_msr_bitmap_legacy_x2apic);
+ free_page((size_t)vmx_msr_bitmap_legacy_x2apic_apicv_inactive);
+ free_page((size_t)vmx_msr_bitmap_longmode_x2apic);
+ free_page((size_t)vmx_msr_bitmap_longmode_x2apic_apicv_inactive);
+ free_page((size_t)vmx_msr_bitmap_legacy);
+ free_page((size_t)vmx_msr_bitmap_longmode);
+ free_page((size_t)vmx_io_bitmap_b);
+ free_page((size_t)vmx_io_bitmap_a);
+ free_page((size_t)vmx_vmwrite_bitmap);
+ free_page((size_t)vmx_vmread_bitmap);
free_kvm_area();
}
-/*
- * Indicate a busy-waiting vcpu in spinlock. We do not enable the PAUSE
- * exiting, so only get here on cpu with PAUSE-Loop-Exiting.
- */
-static int handle_pause(struct kvm_vcpu *vcpu)
-{
- if (ple_gap)
- grow_ple_window(vcpu);
-
- skip_emulated_instruction(vcpu);
- kvm_vcpu_on_spin(vcpu);
-
- return 1;
-}
-
static int handle_nop(struct kvm_vcpu *vcpu)
{
skip_emulated_instruction(vcpu);
@@ -6650,11 +5177,13 @@ static int handle_monitor(struct kvm_vcpu *vcpu)
static struct loaded_vmcs *nested_get_current_vmcs02(struct vcpu_vmx *vmx)
{
struct vmcs02_list *item;
+#define LIST_ENTRY_TYPE_INFO struct vmcs02_list
list_for_each_entry(item, &vmx->nested.vmcs02_pool, list)
if (item->vmptr == vmx->nested.current_vmptr) {
list_move(&item->list, &vmx->nested.vmcs02_pool);
return &item->vmcs02;
}
+#undef LIST_ENTRY_TYPE_INFO
if (vmx->nested.vmcs02_num >= max(VMCS02_POOL_SIZE, 1)) {
/* Recycle the least recently used VMCS. */
@@ -6686,6 +5215,7 @@ static struct loaded_vmcs *nested_get_current_vmcs02(struct vcpu_vmx *vmx)
static void nested_free_vmcs02(struct vcpu_vmx *vmx, gpa_t vmptr)
{
struct vmcs02_list *item;
+#define LIST_ENTRY_TYPE_INFO struct vmcs02_list
list_for_each_entry(item, &vmx->nested.vmcs02_pool, list)
if (item->vmptr == vmptr) {
free_loaded_vmcs(&item->vmcs02);
@@ -6694,6 +5224,7 @@ static void nested_free_vmcs02(struct vcpu_vmx *vmx, gpa_t vmptr)
vmx->nested.vmcs02_num--;
return;
}
+#undef LIST_ENTRY_TYPE_INFO
}
/*
@@ -6706,6 +5237,7 @@ static void nested_free_all_saved_vmcss(struct vcpu_vmx *vmx)
struct vmcs02_list *item, *n;
WARN_ON(vmx->loaded_vmcs != &vmx->vmcs01);
+#define LIST_ENTRY_TYPE_INFO struct vmcs02_list
list_for_each_entry_safe(item, n, &vmx->nested.vmcs02_pool, list) {
/*
* Something will leak if the above WARN triggers. Better than
@@ -6719,6 +5251,7 @@ static void nested_free_all_saved_vmcss(struct vcpu_vmx *vmx)
kfree(item);
vmx->nested.vmcs02_num--;
}
+#undef LIST_ENTRY_TYPE_INFO
}
/*
@@ -6766,22 +5299,10 @@ static void nested_vmx_failValid(struct kvm_vcpu *vcpu,
static void nested_vmx_abort(struct kvm_vcpu *vcpu, u32 indicator)
{
/* TODO: not to reset guest simply here. */
- kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
+ kvm_make_request(GVM_REQ_TRIPLE_FAULT, vcpu);
pr_debug_ratelimited("kvm: nested vmx abort, indicator %d\n", indicator);
}
-static enum hrtimer_restart vmx_preemption_timer_fn(struct hrtimer *timer)
-{
- struct vcpu_vmx *vmx =
- container_of(timer, struct vcpu_vmx, nested.preemption_timer);
-
- vmx->nested.preemption_timer_expired = true;
- kvm_make_request(KVM_REQ_EVENT, &vmx->vcpu);
- kvm_vcpu_kick(&vmx->vcpu);
-
- return HRTIMER_NORESTART;
-}
-
/*
* Decode the memory-address operand of a vmx instruction, as recorded on an
* exit caused by such an instruction (run by a guest hypervisor).
@@ -6789,7 +5310,7 @@ static enum hrtimer_restart vmx_preemption_timer_fn(struct hrtimer *timer)
* #UD or #GP.
*/
static int get_vmx_mem_address(struct kvm_vcpu *vcpu,
- unsigned long exit_qualification,
+ size_t exit_qualification,
u32 vmx_instruction_info, bool wr, gva_t *ret)
{
gva_t off;
@@ -6892,12 +5413,12 @@ static int nested_vmx_check_vmptr(struct kvm_vcpu *vcpu, int exit_reason,
gva_t gva;
gpa_t vmptr;
struct x86_exception e;
- struct page *page;
+ PMDL kmap_mdl;
struct vcpu_vmx *vmx = to_vmx(vcpu);
int maxphyaddr = cpuid_maxphyaddr(vcpu);
- if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION),
- vmcs_read32(VMX_INSTRUCTION_INFO), false, &gva))
+ if (get_vmx_mem_address(vcpu, vmcs_readl(vcpu, EXIT_QUALIFICATION),
+ vmcs_read32(vcpu, VMX_INSTRUCTION_INFO), false, &gva))
return 1;
if (kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, gva, &vmptr,
@@ -6924,15 +5445,15 @@ static int nested_vmx_check_vmptr(struct kvm_vcpu *vcpu, int exit_reason,
return 1;
}
- page = nested_get_page(vcpu, vmptr);
- if (page == NULL ||
- *(u32 *)kmap(page) != VMCS12_REVISION) {
+ kmap_mdl = nested_get_page(vcpu, vmptr);
+ if (kmap_mdl == NULL ||
+ *(u32 *)kmap(kmap_mdl) != VMCS12_REVISION) {
nested_vmx_failInvalid(vcpu);
- kunmap(page);
+ kunmap(kmap_mdl);
skip_emulated_instruction(vcpu);
return 1;
}
- kunmap(page);
+ kunmap(kmap_mdl);
vmx->nested.vmxon_ptr = vmptr;
break;
case EXIT_REASON_VMCLEAR:
@@ -7030,7 +5551,7 @@ static int handle_vmon(struct kvm_vcpu *vcpu)
if (cpu_has_vmx_msr_bitmap()) {
vmx->nested.msr_bitmap =
- (unsigned long *)__get_free_page(GFP_KERNEL);
+ (size_t *)__get_free_page(GFP_KERNEL);
if (!vmx->nested.msr_bitmap)
goto out_msr_bitmap;
}
@@ -7053,10 +5574,6 @@ static int handle_vmon(struct kvm_vcpu *vcpu)
INIT_LIST_HEAD(&(vmx->nested.vmcs02_pool));
vmx->nested.vmcs02_num = 0;
- hrtimer_init(&vmx->nested.preemption_timer, CLOCK_MONOTONIC,
- HRTIMER_MODE_REL_PINNED);
- vmx->nested.preemption_timer.function = vmx_preemption_timer_fn;
-
vmx->nested.vmxon = true;
skip_emulated_instruction(vcpu);
@@ -7067,7 +5584,7 @@ out_shadow_vmcs:
kfree(vmx->nested.cached_vmcs12);
out_cached_vmcs12:
- free_page((unsigned long)vmx->nested.msr_bitmap);
+ free_page((size_t)vmx->nested.msr_bitmap);
out_msr_bitmap:
return -ENOMEM;
@@ -7105,6 +5622,8 @@ static int nested_vmx_check_permission(struct kvm_vcpu *vcpu)
static inline void nested_release_vmcs12(struct vcpu_vmx *vmx)
{
+ struct kvm_vcpu* vcpu = &vmx->vcpu;
+
if (vmx->nested.current_vmptr == -1ull)
return;
@@ -7117,18 +5636,17 @@ static inline void nested_release_vmcs12(struct vcpu_vmx *vmx)
they were modified */
copy_shadow_to_vmcs12(vmx);
vmx->nested.sync_shadow_vmcs = false;
- vmcs_clear_bits(SECONDARY_VM_EXEC_CONTROL,
+ vmcs_clear_bits(vcpu, SECONDARY_VM_EXEC_CONTROL,
SECONDARY_EXEC_SHADOW_VMCS);
- vmcs_write64(VMCS_LINK_POINTER, -1ull);
+ vmcs_write64(vcpu, VMCS_LINK_POINTER, -1ull);
}
- vmx->nested.posted_intr_nv = -1;
/* Flush VMCS12 to guest memory */
memcpy(vmx->nested.current_vmcs12, vmx->nested.cached_vmcs12,
VMCS12_SIZE);
- kunmap(vmx->nested.current_vmcs12_page);
- nested_release_page(vmx->nested.current_vmcs12_page);
+ kunmap(vmx->nested.current_vmcs12_mdl);
+ nested_release_page(vmx->nested.current_vmcs12_mdl);
vmx->nested.current_vmptr = -1ull;
vmx->nested.current_vmcs12 = NULL;
}
@@ -7146,7 +5664,7 @@ static void free_nested(struct vcpu_vmx *vmx)
free_vpid(vmx->nested.vpid02);
nested_release_vmcs12(vmx);
if (vmx->nested.msr_bitmap) {
- free_page((unsigned long)vmx->nested.msr_bitmap);
+ free_page((size_t)vmx->nested.msr_bitmap);
vmx->nested.msr_bitmap = NULL;
}
if (enable_shadow_vmcs) {
@@ -7156,19 +5674,13 @@ static void free_nested(struct vcpu_vmx *vmx)
}
kfree(vmx->nested.cached_vmcs12);
/* Unpin physical memory we referred to in current vmcs02 */
- if (vmx->nested.apic_access_page) {
- nested_release_page(vmx->nested.apic_access_page);
- vmx->nested.apic_access_page = NULL;
- }
- if (vmx->nested.virtual_apic_page) {
- nested_release_page(vmx->nested.virtual_apic_page);
- vmx->nested.virtual_apic_page = NULL;
+ if (vmx->nested.apic_access_mdl) {
+ nested_release_page(vmx->nested.apic_access_mdl);
+ vmx->nested.apic_access_mdl = NULL;
}
- if (vmx->nested.pi_desc_page) {
- kunmap(vmx->nested.pi_desc_page);
- nested_release_page(vmx->nested.pi_desc_page);
- vmx->nested.pi_desc_page = NULL;
- vmx->nested.pi_desc = NULL;
+ if (vmx->nested.virtual_apic_mdl) {
+ nested_release_page(vmx->nested.virtual_apic_mdl);
+ vmx->nested.virtual_apic_mdl = NULL;
}
nested_free_all_saved_vmcss(vmx);
@@ -7191,7 +5703,7 @@ static int handle_vmclear(struct kvm_vcpu *vcpu)
struct vcpu_vmx *vmx = to_vmx(vcpu);
gpa_t vmptr;
struct vmcs12 *vmcs12;
- struct page *page;
+ PMDL kmap_mdl;
if (!nested_vmx_check_permission(vcpu))
return 1;
@@ -7202,8 +5714,8 @@ static int handle_vmclear(struct kvm_vcpu *vcpu)
if (vmptr == vmx->nested.current_vmptr)
nested_release_vmcs12(vmx);
- page = nested_get_page(vcpu, vmptr);
- if (page == NULL) {
+ kmap_mdl = nested_get_page(vcpu, vmptr);
+ if (kmap_mdl == NULL) {
/*
* For accurate processor emulation, VMCLEAR beyond available
* physical memory should do nothing at all. However, it is
@@ -7211,13 +5723,13 @@ static int handle_vmclear(struct kvm_vcpu *vcpu)
* resulted in this case, so let's shut down before doing any
* more damage:
*/
- kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
+ kvm_make_request(GVM_REQ_TRIPLE_FAULT, vcpu);
return 1;
}
- vmcs12 = kmap(page);
+ vmcs12 = kmap(kmap_mdl);
vmcs12->launch_state = 0;
- kunmap(page);
- nested_release_page(page);
+ kunmap(kmap_mdl);
+ nested_release_page(kmap_mdl);
nested_free_vmcs02(vmx, vmptr);
@@ -7248,14 +5760,14 @@ enum vmcs_field_type {
VMCS_FIELD_TYPE_NATURAL_WIDTH = 3
};
-static inline int vmcs_field_type(unsigned long field)
+static inline int vmcs_field_type(size_t field)
{
if (0x1 & field) /* the *_HIGH fields are all 32 bit */
return VMCS_FIELD_TYPE_U32;
return (field >> 13) & 0x3 ;
}
-static inline int vmcs_field_readonly(unsigned long field)
+static inline int vmcs_field_readonly(size_t field)
{
return (((field >> 10) & 0x3) == 1);
}
@@ -7268,7 +5780,7 @@ static inline int vmcs_field_readonly(unsigned long field)
* 64-bit fields are to be returned).
*/
static inline int vmcs12_read_any(struct kvm_vcpu *vcpu,
- unsigned long field, u64 *ret)
+ size_t field, u64 *ret)
{
short offset = vmcs_field_to_offset(field);
char *p;
@@ -7299,7 +5811,7 @@ static inline int vmcs12_read_any(struct kvm_vcpu *vcpu,
static inline int vmcs12_write_any(struct kvm_vcpu *vcpu,
- unsigned long field, u64 field_value){
+ size_t field, u64 field_value){
short offset = vmcs_field_to_offset(field);
char *p = ((char *) get_vmcs12(vcpu)) + offset;
if (offset < 0)
@@ -7328,11 +5840,12 @@ static inline int vmcs12_write_any(struct kvm_vcpu *vcpu,
static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx)
{
int i;
- unsigned long field;
+ size_t field;
u64 field_value;
struct vmcs *shadow_vmcs = vmx->vmcs01.shadow_vmcs;
- const unsigned long *fields = shadow_read_write_fields;
+ const size_t *fields = shadow_read_write_fields;
const int num_fields = max_shadow_read_write_fields;
+ struct kvm_vcpu* vcpu = &vmx->vcpu;
preempt_disable();
@@ -7342,16 +5855,16 @@ static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx)
field = fields[i];
switch (vmcs_field_type(field)) {
case VMCS_FIELD_TYPE_U16:
- field_value = vmcs_read16(field);
+ field_value = vmcs_read16(vcpu, field);
break;
case VMCS_FIELD_TYPE_U32:
- field_value = vmcs_read32(field);
+ field_value = vmcs_read32(vcpu, field);
break;
case VMCS_FIELD_TYPE_U64:
- field_value = vmcs_read64(field);
+ field_value = vmcs_read64(vcpu, field);
break;
case VMCS_FIELD_TYPE_NATURAL_WIDTH:
- field_value = vmcs_readl(field);
+ field_value = vmcs_readl(vcpu, field);
break;
default:
WARN_ON(1);
@@ -7368,7 +5881,7 @@ static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx)
static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx)
{
- const unsigned long *fields[] = {
+ const size_t *fields[] = {
shadow_read_write_fields,
shadow_read_only_fields
};
@@ -7377,9 +5890,10 @@ static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx)
max_shadow_read_only_fields
};
int i, q;
- unsigned long field;
+ size_t field;
u64 field_value = 0;
struct vmcs *shadow_vmcs = vmx->vmcs01.shadow_vmcs;
+ struct kvm_vcpu* vcpu = &vmx->vcpu;
vmcs_load(shadow_vmcs);
@@ -7390,16 +5904,16 @@ static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx)
switch (vmcs_field_type(field)) {
case VMCS_FIELD_TYPE_U16:
- vmcs_write16(field, (u16)field_value);
+ vmcs_write16(vcpu, field, (u16)field_value);
break;
case VMCS_FIELD_TYPE_U32:
- vmcs_write32(field, (u32)field_value);
+ vmcs_write32(vcpu, field, (u32)field_value);
break;
case VMCS_FIELD_TYPE_U64:
- vmcs_write64(field, (u64)field_value);
+ vmcs_write64(vcpu, field, (u64)field_value);
break;
case VMCS_FIELD_TYPE_NATURAL_WIDTH:
- vmcs_writel(field, (long)field_value);
+ vmcs_writel(vcpu, field, (long)field_value);
break;
default:
WARN_ON(1);
@@ -7429,10 +5943,10 @@ static int nested_vmx_check_vmcs12(struct kvm_vcpu *vcpu)
static int handle_vmread(struct kvm_vcpu *vcpu)
{
- unsigned long field;
+ size_t field;
u64 field_value;
- unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
- u32 vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
+ size_t exit_qualification = vmcs_readl(vcpu, EXIT_QUALIFICATION);
+ u32 vmx_instruction_info = vmcs_read32(vcpu, VMX_INSTRUCTION_INFO);
gva_t gva = 0;
if (!nested_vmx_check_permission(vcpu) ||
@@ -7472,10 +5986,10 @@ static int handle_vmread(struct kvm_vcpu *vcpu)
static int handle_vmwrite(struct kvm_vcpu *vcpu)
{
- unsigned long field;
+ size_t field;
gva_t gva;
- unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
- u32 vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
+ size_t exit_qualification = vmcs_readl(vcpu, EXIT_QUALIFICATION);
+ u32 vmx_instruction_info = vmcs_read32(vcpu, VMX_INSTRUCTION_INFO);
/* The value to write might be 32 or 64 bits, depending on L1's long
* mode, and eventually we need to write that into a field of several
* possible lengths. The code below first zero-extends the value to 64
@@ -7537,17 +6051,17 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu)
if (vmx->nested.current_vmptr != vmptr) {
struct vmcs12 *new_vmcs12;
- struct page *page;
- page = nested_get_page(vcpu, vmptr);
- if (page == NULL) {
+ PMDL kmap_mdl;
+ kmap_mdl = nested_get_page(vcpu, vmptr);
+ if (kmap_mdl == NULL) {
nested_vmx_failInvalid(vcpu);
skip_emulated_instruction(vcpu);
return 1;
}
- new_vmcs12 = kmap(page);
+ new_vmcs12 = kmap(kmap_mdl);
if (new_vmcs12->revision_id != VMCS12_REVISION) {
- kunmap(page);
- nested_release_page_clean(page);
+ kunmap(kmap_mdl);
+ nested_release_page(kmap_mdl);
nested_vmx_failValid(vcpu,
VMXERR_VMPTRLD_INCORRECT_VMCS_REVISION_ID);
skip_emulated_instruction(vcpu);
@@ -7557,7 +6071,7 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu)
nested_release_vmcs12(vmx);
vmx->nested.current_vmptr = vmptr;
vmx->nested.current_vmcs12 = new_vmcs12;
- vmx->nested.current_vmcs12_page = page;
+ vmx->nested.current_vmcs12_mdl = kmap_mdl;
/*
* Load VMCS12 from guest memory since it is not already
* cached.
@@ -7566,9 +6080,9 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu)
vmx->nested.current_vmcs12, VMCS12_SIZE);
if (enable_shadow_vmcs) {
- vmcs_set_bits(SECONDARY_VM_EXEC_CONTROL,
+ vmcs_set_bits(vcpu, SECONDARY_VM_EXEC_CONTROL,
SECONDARY_EXEC_SHADOW_VMCS);
- vmcs_write64(VMCS_LINK_POINTER,
+ vmcs_write64(vcpu, VMCS_LINK_POINTER,
__pa(vmx->vmcs01.shadow_vmcs));
vmx->nested.sync_shadow_vmcs = true;
}
@@ -7582,8 +6096,8 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu)
/* Emulate the VMPTRST instruction */
static int handle_vmptrst(struct kvm_vcpu *vcpu)
{
- unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
- u32 vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
+ size_t exit_qualification = vmcs_readl(vcpu, EXIT_QUALIFICATION);
+ u32 vmx_instruction_info = vmcs_read32(vcpu, VMX_INSTRUCTION_INFO);
gva_t vmcs_gva;
struct x86_exception e;
@@ -7610,7 +6124,7 @@ static int handle_invept(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
u32 vmx_instruction_info, types;
- unsigned long type;
+ size_t type;
gva_t gva;
struct x86_exception e;
struct {
@@ -7632,7 +6146,7 @@ static int handle_invept(struct kvm_vcpu *vcpu)
return 1;
}
- vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
+ vmx_instruction_info = vmcs_read32(vcpu, VMX_INSTRUCTION_INFO);
type = kvm_register_readl(vcpu, (vmx_instruction_info >> 28) & 0xf);
types = (vmx->nested.nested_vmx_ept_caps >> VMX_EPT_EXTENT_SHIFT) & 6;
@@ -7647,7 +6161,7 @@ static int handle_invept(struct kvm_vcpu *vcpu)
/* According to the Intel VMX instruction reference, the memory
* operand is read even if it isn't needed (e.g., for type==global)
*/
- if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION),
+ if (get_vmx_mem_address(vcpu, vmcs_readl(vcpu, EXIT_QUALIFICATION),
vmx_instruction_info, false, &gva))
return 1;
if (kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, gva, &operand,
@@ -7664,7 +6178,7 @@ static int handle_invept(struct kvm_vcpu *vcpu)
*/
case VMX_EPT_EXTENT_CONTEXT:
kvm_mmu_sync_roots(vcpu);
- kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
+ kvm_make_request(GVM_REQ_TLB_FLUSH, vcpu);
nested_vmx_succeed(vcpu);
break;
default:
@@ -7680,7 +6194,7 @@ static int handle_invvpid(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
u32 vmx_instruction_info;
- unsigned long type, types;
+ size_t type, types;
gva_t gva;
struct x86_exception e;
int vpid;
@@ -7695,7 +6209,7 @@ static int handle_invvpid(struct kvm_vcpu *vcpu)
if (!nested_vmx_check_permission(vcpu))
return 1;
- vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
+ vmx_instruction_info = vmcs_read32(vcpu, VMX_INSTRUCTION_INFO);
type = kvm_register_readl(vcpu, (vmx_instruction_info >> 28) & 0xf);
types = (vmx->nested.nested_vmx_vpid_caps >> 8) & 0x7;
@@ -7710,7 +6224,7 @@ static int handle_invvpid(struct kvm_vcpu *vcpu)
/* according to the intel vmx instruction reference, the memory
* operand is read even if it isn't needed (e.g., for type==global)
*/
- if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION),
+ if (get_vmx_mem_address(vcpu, vmcs_readl(vcpu, EXIT_QUALIFICATION),
vmx_instruction_info, false, &gva))
return 1;
if (kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, gva, &vpid,
@@ -7722,7 +6236,7 @@ static int handle_invvpid(struct kvm_vcpu *vcpu)
switch (type) {
case VMX_VPID_EXTENT_SINGLE_CONTEXT:
/*
- * Old versions of KVM use the single-context version so we
+ * Old versions of kvm use the single-context version so we
* have to support it; just treat it the same as all-context.
*/
case VMX_VPID_EXTENT_ALL_CONTEXT:
@@ -7741,11 +6255,9 @@ static int handle_invvpid(struct kvm_vcpu *vcpu)
static int handle_pml_full(struct kvm_vcpu *vcpu)
{
- unsigned long exit_qualification;
-
- trace_kvm_pml_full(vcpu->vcpu_id);
+ size_t exit_qualification;
- exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
+ exit_qualification = vmcs_readl(vcpu, EXIT_QUALIFICATION);
/*
* PML buffer FULL happened while executing iret from NMI,
@@ -7754,7 +6266,7 @@ static int handle_pml_full(struct kvm_vcpu *vcpu)
if (!(to_vmx(vcpu)->idt_vectoring_info & VECTORING_INFO_VALID_MASK) &&
cpu_has_virtual_nmis() &&
(exit_qualification & INTR_INFO_UNBLOCK_NMI))
- vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
+ vmcs_set_bits(vcpu, GUEST_INTERRUPTIBILITY_INFO,
GUEST_INTR_STATE_NMI);
/*
@@ -7764,12 +6276,6 @@ static int handle_pml_full(struct kvm_vcpu *vcpu)
return 1;
}
-static int handle_preemption_timer(struct kvm_vcpu *vcpu)
-{
- kvm_lapic_expired_hv_timer(vcpu);
- return 1;
-}
-
/*
* The exit handlers return 1 if the exit was handled fully and guest execution
* may resume. Otherwise they set the kvm_run parameter to indicate what needs
@@ -7791,7 +6297,6 @@ static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
[EXIT_REASON_INVD] = handle_invd,
[EXIT_REASON_INVLPG] = handle_invlpg,
[EXIT_REASON_RDPMC] = handle_rdpmc,
- [EXIT_REASON_VMCALL] = handle_vmcall,
[EXIT_REASON_VMCLEAR] = handle_vmclear,
[EXIT_REASON_VMLAUNCH] = handle_vmlaunch,
[EXIT_REASON_VMPTRLD] = handle_vmptrld,
@@ -7811,7 +6316,6 @@ static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
[EXIT_REASON_MCE_DURING_VMENTRY] = handle_machine_check,
[EXIT_REASON_EPT_VIOLATION] = handle_ept_violation,
[EXIT_REASON_EPT_MISCONFIG] = handle_ept_misconfig,
- [EXIT_REASON_PAUSE_INSTRUCTION] = handle_pause,
[EXIT_REASON_MWAIT_INSTRUCTION] = handle_mwait,
[EXIT_REASON_MONITOR_TRAP_FLAG] = handle_monitor_trap,
[EXIT_REASON_MONITOR_INSTRUCTION] = handle_monitor,
@@ -7820,7 +6324,6 @@ static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
[EXIT_REASON_XSAVES] = handle_xsaves,
[EXIT_REASON_XRSTORS] = handle_xrstors,
[EXIT_REASON_PML_FULL] = handle_pml_full,
- [EXIT_REASON_PREEMPTION_TIMER] = handle_preemption_timer,
};
static const int kvm_vmx_max_exit_handlers =
@@ -7829,7 +6332,7 @@ static const int kvm_vmx_max_exit_handlers =
static bool nested_vmx_exit_handled_io(struct kvm_vcpu *vcpu,
struct vmcs12 *vmcs12)
{
- unsigned long exit_qualification;
+ size_t exit_qualification;
gpa_t bitmap, last_bitmap;
unsigned int port;
int size;
@@ -7838,7 +6341,7 @@ static bool nested_vmx_exit_handled_io(struct kvm_vcpu *vcpu,
if (!nested_cpu_has(vmcs12, CPU_BASED_USE_IO_BITMAPS))
return nested_cpu_has(vmcs12, CPU_BASED_UNCOND_IO_EXITING);
- exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
+ exit_qualification = vmcs_readl(vcpu, EXIT_QUALIFICATION);
port = exit_qualification >> 16;
size = (exit_qualification & 7) + 1;
@@ -7915,10 +6418,10 @@ static bool nested_vmx_exit_handled_msr(struct kvm_vcpu *vcpu,
static bool nested_vmx_exit_handled_cr(struct kvm_vcpu *vcpu,
struct vmcs12 *vmcs12)
{
- unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
+ size_t exit_qualification = vmcs_readl(vcpu, EXIT_QUALIFICATION);
int cr = exit_qualification & 15;
int reg = (exit_qualification >> 8) & 15;
- unsigned long val = kvm_register_readl(vcpu, reg);
+ size_t val = kvm_register_readl(vcpu, reg);
switch ((exit_qualification >> 4) & 3) {
case 0: /* mov to cr */
@@ -7995,24 +6498,17 @@ static bool nested_vmx_exit_handled_cr(struct kvm_vcpu *vcpu,
*/
static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
{
- u32 intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
+ u32 intr_info = vmcs_read32(vcpu, VM_EXIT_INTR_INFO);
struct vcpu_vmx *vmx = to_vmx(vcpu);
struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
u32 exit_reason = vmx->exit_reason;
- trace_kvm_nested_vmexit(kvm_rip_read(vcpu), exit_reason,
- vmcs_readl(EXIT_QUALIFICATION),
- vmx->idt_vectoring_info,
- intr_info,
- vmcs_read32(VM_EXIT_INTR_ERROR_CODE),
- KVM_ISA_VMX);
-
if (vmx->nested.nested_run_pending)
return false;
if (unlikely(vmx->fail)) {
pr_info_ratelimited("%s failed vm entry %x\n", __func__,
- vmcs_read32(VM_INSTRUCTION_ERROR));
+ vmcs_read32(vcpu, VM_INSTRUCTION_ERROR));
return true;
}
@@ -8022,15 +6518,12 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
return false;
else if (is_page_fault(intr_info))
return enable_ept;
- else if (is_no_device(intr_info) &&
- !(vmcs12->guest_cr0 & X86_CR0_TS))
- return false;
else if (is_debug(intr_info) &&
vcpu->guest_debug &
- (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))
+ (GVM_GUESTDBG_SINGLESTEP | GVM_GUESTDBG_USE_HW_BP))
return false;
else if (is_breakpoint(intr_info) &&
- vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP)
+ vcpu->guest_debug & GVM_GUESTDBG_USE_SW_BP)
return false;
return vmcs12->exception_bitmap &
(1u << (intr_info & INTR_INFO_VECTOR_MASK));
@@ -8129,8 +6622,6 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
* the XSS exit bitmap in vmcs12.
*/
return nested_cpu_has2(vmcs12, SECONDARY_EXEC_XSAVES);
- case EXIT_REASON_PREEMPTION_TIMER:
- return false;
default:
return true;
}
@@ -8138,8 +6629,8 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
static void vmx_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2)
{
- *info1 = vmcs_readl(EXIT_QUALIFICATION);
- *info2 = vmcs_read32(VM_EXIT_INTR_INFO);
+ *info1 = vmcs_readl(vcpu, EXIT_QUALIFICATION);
+ *info2 = vmcs_read32(vcpu, VM_EXIT_INTR_INFO);
}
static void vmx_destroy_pml_buffer(struct vcpu_vmx *vmx)
@@ -8156,7 +6647,7 @@ static void vmx_flush_pml_buffer(struct kvm_vcpu *vcpu)
u64 *pml_buf;
u16 pml_idx;
- pml_idx = vmcs_read16(GUEST_PML_INDEX);
+ pml_idx = vmcs_read16(vcpu, GUEST_PML_INDEX);
/* Do nothing if PML buffer is empty */
if (pml_idx == (PML_ENTITY_NUM - 1))
@@ -8178,7 +6669,7 @@ static void vmx_flush_pml_buffer(struct kvm_vcpu *vcpu)
}
/* reset PML index */
- vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1);
+ vmcs_write16(vcpu, GUEST_PML_INDEX, PML_ENTITY_NUM - 1);
}
/*
@@ -8199,160 +6690,156 @@ static void kvm_flush_pml_buffers(struct kvm *kvm)
kvm_vcpu_kick(vcpu);
}
-static void vmx_dump_sel(char *name, uint32_t sel)
+static void vmx_dump_sel(struct kvm_vcpu* vcpu, char *name, uint32_t sel)
{
- pr_err("%s sel=0x%04x, attr=0x%05x, limit=0x%08x, base=0x%016lx\n",
- name, vmcs_read32(sel),
- vmcs_read32(sel + GUEST_ES_AR_BYTES - GUEST_ES_SELECTOR),
- vmcs_read32(sel + GUEST_ES_LIMIT - GUEST_ES_SELECTOR),
- vmcs_readl(sel + GUEST_ES_BASE - GUEST_ES_SELECTOR));
+ pr_err("%s sel=0x%04x, attr=0x%05x, limit=0x%08x, base=0x%016llx\n",
+ name, vmcs_read32(vcpu, sel),
+ vmcs_read32(vcpu, sel + GUEST_ES_AR_BYTES - GUEST_ES_SELECTOR),
+ vmcs_read32(vcpu, sel + GUEST_ES_LIMIT - GUEST_ES_SELECTOR),
+ vmcs_readl(vcpu, sel + GUEST_ES_BASE - GUEST_ES_SELECTOR));
}
-static void vmx_dump_dtsel(char *name, uint32_t limit)
+static void vmx_dump_dtsel(struct kvm_vcpu* vcpu, char *name, uint32_t limit)
{
- pr_err("%s limit=0x%08x, base=0x%016lx\n",
- name, vmcs_read32(limit),
- vmcs_readl(limit + GUEST_GDTR_BASE - GUEST_GDTR_LIMIT));
+ pr_err("%s limit=0x%08x, base=0x%016llx\n",
+ name, vmcs_read32(vcpu, limit),
+ vmcs_readl(vcpu, limit + GUEST_GDTR_BASE - GUEST_GDTR_LIMIT));
}
-static void dump_vmcs(void)
+static void dump_vmcs(struct kvm_vcpu* vcpu)
{
- u32 vmentry_ctl = vmcs_read32(VM_ENTRY_CONTROLS);
- u32 vmexit_ctl = vmcs_read32(VM_EXIT_CONTROLS);
- u32 cpu_based_exec_ctrl = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
- u32 pin_based_exec_ctrl = vmcs_read32(PIN_BASED_VM_EXEC_CONTROL);
+ u32 vmentry_ctl = vmcs_read32(vcpu, VM_ENTRY_CONTROLS);
+ u32 vmexit_ctl = vmcs_read32(vcpu, VM_EXIT_CONTROLS);
+ u32 cpu_based_exec_ctrl = vmcs_read32(vcpu, CPU_BASED_VM_EXEC_CONTROL);
+ u32 pin_based_exec_ctrl = vmcs_read32(vcpu, PIN_BASED_VM_EXEC_CONTROL);
u32 secondary_exec_control = 0;
- unsigned long cr4 = vmcs_readl(GUEST_CR4);
- u64 efer = vmcs_read64(GUEST_IA32_EFER);
+ size_t cr4 = vmcs_readl(vcpu, GUEST_CR4);
+ u64 efer = vmcs_read64(vcpu, GUEST_IA32_EFER);
int i, n;
if (cpu_has_secondary_exec_ctrls())
- secondary_exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL);
+ secondary_exec_control = vmcs_read32(vcpu, SECONDARY_VM_EXEC_CONTROL);
pr_err("*** Guest State ***\n");
- pr_err("CR0: actual=0x%016lx, shadow=0x%016lx, gh_mask=%016lx\n",
- vmcs_readl(GUEST_CR0), vmcs_readl(CR0_READ_SHADOW),
- vmcs_readl(CR0_GUEST_HOST_MASK));
- pr_err("CR4: actual=0x%016lx, shadow=0x%016lx, gh_mask=%016lx\n",
- cr4, vmcs_readl(CR4_READ_SHADOW), vmcs_readl(CR4_GUEST_HOST_MASK));
- pr_err("CR3 = 0x%016lx\n", vmcs_readl(GUEST_CR3));
+ pr_err("CR0: actual=0x%016llx, shadow=0x%016llx, gh_mask=%016llx\n",
+ vmcs_readl(vcpu, GUEST_CR0), vmcs_readl(vcpu, CR0_READ_SHADOW),
+ vmcs_readl(vcpu, CR0_GUEST_HOST_MASK));
+ pr_err("CR4: actual=0x%016llx, shadow=0x%016llx, gh_mask=%016llx\n",
+ cr4, vmcs_readl(vcpu, CR4_READ_SHADOW), vmcs_readl(vcpu, CR4_GUEST_HOST_MASK));
+ pr_err("CR3 = 0x%016llx\n", vmcs_readl(vcpu, GUEST_CR3));
if ((secondary_exec_control & SECONDARY_EXEC_ENABLE_EPT) &&
(cr4 & X86_CR4_PAE) && !(efer & EFER_LMA))
{
pr_err("PDPTR0 = 0x%016llx PDPTR1 = 0x%016llx\n",
- vmcs_read64(GUEST_PDPTR0), vmcs_read64(GUEST_PDPTR1));
+ vmcs_read64(vcpu, GUEST_PDPTR0), vmcs_read64(vcpu, GUEST_PDPTR1));
pr_err("PDPTR2 = 0x%016llx PDPTR3 = 0x%016llx\n",
- vmcs_read64(GUEST_PDPTR2), vmcs_read64(GUEST_PDPTR3));
- }
- pr_err("RSP = 0x%016lx RIP = 0x%016lx\n",
- vmcs_readl(GUEST_RSP), vmcs_readl(GUEST_RIP));
- pr_err("RFLAGS=0x%08lx DR7 = 0x%016lx\n",
- vmcs_readl(GUEST_RFLAGS), vmcs_readl(GUEST_DR7));
- pr_err("Sysenter RSP=%016lx CS:RIP=%04x:%016lx\n",
- vmcs_readl(GUEST_SYSENTER_ESP),
- vmcs_read32(GUEST_SYSENTER_CS), vmcs_readl(GUEST_SYSENTER_EIP));
- vmx_dump_sel("CS: ", GUEST_CS_SELECTOR);
- vmx_dump_sel("DS: ", GUEST_DS_SELECTOR);
- vmx_dump_sel("SS: ", GUEST_SS_SELECTOR);
- vmx_dump_sel("ES: ", GUEST_ES_SELECTOR);
- vmx_dump_sel("FS: ", GUEST_FS_SELECTOR);
- vmx_dump_sel("GS: ", GUEST_GS_SELECTOR);
- vmx_dump_dtsel("GDTR:", GUEST_GDTR_LIMIT);
- vmx_dump_sel("LDTR:", GUEST_LDTR_SELECTOR);
- vmx_dump_dtsel("IDTR:", GUEST_IDTR_LIMIT);
- vmx_dump_sel("TR: ", GUEST_TR_SELECTOR);
+ vmcs_read64(vcpu, GUEST_PDPTR2), vmcs_read64(vcpu, GUEST_PDPTR3));
+ }
+ pr_err("RSP = 0x%016llx RIP = 0x%016llx\n",
+ vmcs_readl(vcpu, GUEST_RSP), vmcs_readl(vcpu, GUEST_RIP));
+ pr_err("RFLAGS=0x%08lx DR7 = 0x%016llx\n",
+ vmcs_readl(vcpu, GUEST_RFLAGS), vmcs_readl(vcpu, GUEST_DR7));
+ pr_err("Sysenter RSP=%016llx CS:RIP=%04x:%016llx\n",
+ vmcs_readl(vcpu, GUEST_SYSENTER_ESP),
+ vmcs_read32(vcpu, GUEST_SYSENTER_CS), vmcs_readl(vcpu, GUEST_SYSENTER_EIP));
+ vmx_dump_sel(vcpu, "CS: ", GUEST_CS_SELECTOR);
+ vmx_dump_sel(vcpu, "DS: ", GUEST_DS_SELECTOR);
+ vmx_dump_sel(vcpu, "SS: ", GUEST_SS_SELECTOR);
+ vmx_dump_sel(vcpu, "ES: ", GUEST_ES_SELECTOR);
+ vmx_dump_sel(vcpu, "FS: ", GUEST_FS_SELECTOR);
+ vmx_dump_sel(vcpu, "GS: ", GUEST_GS_SELECTOR);
+ vmx_dump_dtsel(vcpu, "GDTR:", GUEST_GDTR_LIMIT);
+ vmx_dump_sel(vcpu, "LDTR:", GUEST_LDTR_SELECTOR);
+ vmx_dump_dtsel(vcpu, "IDTR:", GUEST_IDTR_LIMIT);
+ vmx_dump_sel(vcpu, "TR: ", GUEST_TR_SELECTOR);
if ((vmexit_ctl & (VM_EXIT_SAVE_IA32_PAT | VM_EXIT_SAVE_IA32_EFER)) ||
(vmentry_ctl & (VM_ENTRY_LOAD_IA32_PAT | VM_ENTRY_LOAD_IA32_EFER)))
pr_err("EFER = 0x%016llx PAT = 0x%016llx\n",
- efer, vmcs_read64(GUEST_IA32_PAT));
- pr_err("DebugCtl = 0x%016llx DebugExceptions = 0x%016lx\n",
- vmcs_read64(GUEST_IA32_DEBUGCTL),
- vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS));
+ efer, vmcs_read64(vcpu, GUEST_IA32_PAT));
+ pr_err("DebugCtl = 0x%016llx DebugExceptions = 0x%016llx\n",
+ vmcs_read64(vcpu, GUEST_IA32_DEBUGCTL),
+ vmcs_readl(vcpu, GUEST_PENDING_DBG_EXCEPTIONS));
if (vmentry_ctl & VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL)
pr_err("PerfGlobCtl = 0x%016llx\n",
- vmcs_read64(GUEST_IA32_PERF_GLOBAL_CTRL));
+ vmcs_read64(vcpu, GUEST_IA32_PERF_GLOBAL_CTRL));
if (vmentry_ctl & VM_ENTRY_LOAD_BNDCFGS)
- pr_err("BndCfgS = 0x%016llx\n", vmcs_read64(GUEST_BNDCFGS));
+ pr_err("BndCfgS = 0x%016llx\n", vmcs_read64(vcpu, GUEST_BNDCFGS));
pr_err("Interruptibility = %08x ActivityState = %08x\n",
- vmcs_read32(GUEST_INTERRUPTIBILITY_INFO),
- vmcs_read32(GUEST_ACTIVITY_STATE));
+ vmcs_read32(vcpu, GUEST_INTERRUPTIBILITY_INFO),
+ vmcs_read32(vcpu, GUEST_ACTIVITY_STATE));
if (secondary_exec_control & SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY)
pr_err("InterruptStatus = %04x\n",
- vmcs_read16(GUEST_INTR_STATUS));
+ vmcs_read16(vcpu, GUEST_INTR_STATUS));
pr_err("*** Host State ***\n");
- pr_err("RIP = 0x%016lx RSP = 0x%016lx\n",
- vmcs_readl(HOST_RIP), vmcs_readl(HOST_RSP));
+ pr_err("RIP = 0x%016llx RSP = 0x%016llx\n",
+ vmcs_readl(vcpu, HOST_RIP), vmcs_readl(vcpu, HOST_RSP));
pr_err("CS=%04x SS=%04x DS=%04x ES=%04x FS=%04x GS=%04x TR=%04x\n",
- vmcs_read16(HOST_CS_SELECTOR), vmcs_read16(HOST_SS_SELECTOR),
- vmcs_read16(HOST_DS_SELECTOR), vmcs_read16(HOST_ES_SELECTOR),
- vmcs_read16(HOST_FS_SELECTOR), vmcs_read16(HOST_GS_SELECTOR),
- vmcs_read16(HOST_TR_SELECTOR));
- pr_err("FSBase=%016lx GSBase=%016lx TRBase=%016lx\n",
- vmcs_readl(HOST_FS_BASE), vmcs_readl(HOST_GS_BASE),
- vmcs_readl(HOST_TR_BASE));
- pr_err("GDTBase=%016lx IDTBase=%016lx\n",
- vmcs_readl(HOST_GDTR_BASE), vmcs_readl(HOST_IDTR_BASE));
- pr_err("CR0=%016lx CR3=%016lx CR4=%016lx\n",
- vmcs_readl(HOST_CR0), vmcs_readl(HOST_CR3),
- vmcs_readl(HOST_CR4));
- pr_err("Sysenter RSP=%016lx CS:RIP=%04x:%016lx\n",
- vmcs_readl(HOST_IA32_SYSENTER_ESP),
- vmcs_read32(HOST_IA32_SYSENTER_CS),
- vmcs_readl(HOST_IA32_SYSENTER_EIP));
+ vmcs_read16(vcpu, HOST_CS_SELECTOR), vmcs_read16(vcpu, HOST_SS_SELECTOR),
+ vmcs_read16(vcpu, HOST_DS_SELECTOR), vmcs_read16(vcpu, HOST_ES_SELECTOR),
+ vmcs_read16(vcpu, HOST_FS_SELECTOR), vmcs_read16(vcpu, HOST_GS_SELECTOR),
+ vmcs_read16(vcpu, HOST_TR_SELECTOR));
+ pr_err("FSBase=%016llx GSBase=%016llx TRBase=%016llx\n",
+ vmcs_readl(vcpu, HOST_FS_BASE), vmcs_readl(vcpu, HOST_GS_BASE),
+ vmcs_readl(vcpu, HOST_TR_BASE));
+ pr_err("GDTBase=%016llx IDTBase=%016llx\n",
+ vmcs_readl(vcpu, HOST_GDTR_BASE), vmcs_readl(vcpu, HOST_IDTR_BASE));
+ pr_err("CR0=%016llx CR3=%016llx CR4=%016llx\n",
+ vmcs_readl(vcpu, HOST_CR0), vmcs_readl(vcpu, HOST_CR3),
+ vmcs_readl(vcpu, HOST_CR4));
+ pr_err("Sysenter RSP=%016llx CS:RIP=%04x:%016llx\n",
+ vmcs_readl(vcpu, HOST_IA32_SYSENTER_ESP),
+ vmcs_read32(vcpu, HOST_IA32_SYSENTER_CS),
+ vmcs_readl(vcpu, HOST_IA32_SYSENTER_EIP));
if (vmexit_ctl & (VM_EXIT_LOAD_IA32_PAT | VM_EXIT_LOAD_IA32_EFER))
pr_err("EFER = 0x%016llx PAT = 0x%016llx\n",
- vmcs_read64(HOST_IA32_EFER),
- vmcs_read64(HOST_IA32_PAT));
+ vmcs_read64(vcpu, HOST_IA32_EFER),
+ vmcs_read64(vcpu, HOST_IA32_PAT));
if (vmexit_ctl & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL)
pr_err("PerfGlobCtl = 0x%016llx\n",
- vmcs_read64(HOST_IA32_PERF_GLOBAL_CTRL));
+ vmcs_read64(vcpu, HOST_IA32_PERF_GLOBAL_CTRL));
pr_err("*** Control State ***\n");
pr_err("PinBased=%08x CPUBased=%08x SecondaryExec=%08x\n",
pin_based_exec_ctrl, cpu_based_exec_ctrl, secondary_exec_control);
pr_err("EntryControls=%08x ExitControls=%08x\n", vmentry_ctl, vmexit_ctl);
pr_err("ExceptionBitmap=%08x PFECmask=%08x PFECmatch=%08x\n",
- vmcs_read32(EXCEPTION_BITMAP),
- vmcs_read32(PAGE_FAULT_ERROR_CODE_MASK),
- vmcs_read32(PAGE_FAULT_ERROR_CODE_MATCH));
+ vmcs_read32(vcpu, EXCEPTION_BITMAP),
+ vmcs_read32(vcpu, PAGE_FAULT_ERROR_CODE_MASK),
+ vmcs_read32(vcpu, PAGE_FAULT_ERROR_CODE_MATCH));
pr_err("VMEntry: intr_info=%08x errcode=%08x ilen=%08x\n",
- vmcs_read32(VM_ENTRY_INTR_INFO_FIELD),
- vmcs_read32(VM_ENTRY_EXCEPTION_ERROR_CODE),
- vmcs_read32(VM_ENTRY_INSTRUCTION_LEN));
+ vmcs_read32(vcpu, VM_ENTRY_INTR_INFO_FIELD),
+ vmcs_read32(vcpu, VM_ENTRY_EXCEPTION_ERROR_CODE),
+ vmcs_read32(vcpu, VM_ENTRY_INSTRUCTION_LEN));
pr_err("VMExit: intr_info=%08x errcode=%08x ilen=%08x\n",
- vmcs_read32(VM_EXIT_INTR_INFO),
- vmcs_read32(VM_EXIT_INTR_ERROR_CODE),
- vmcs_read32(VM_EXIT_INSTRUCTION_LEN));
- pr_err(" reason=%08x qualification=%016lx\n",
- vmcs_read32(VM_EXIT_REASON), vmcs_readl(EXIT_QUALIFICATION));
+ vmcs_read32(vcpu, VM_EXIT_INTR_INFO),
+ vmcs_read32(vcpu, VM_EXIT_INTR_ERROR_CODE),
+ vmcs_read32(vcpu, VM_EXIT_INSTRUCTION_LEN));
+ pr_err(" reason=%08x qualification=%016llx\n",
+ vmcs_read32(vcpu, VM_EXIT_REASON), vmcs_readl(vcpu, EXIT_QUALIFICATION));
+ pr_err(" gpa=%016llx\n", vmcs_read64(vcpu, GUEST_PHYSICAL_ADDRESS));
pr_err("IDTVectoring: info=%08x errcode=%08x\n",
- vmcs_read32(IDT_VECTORING_INFO_FIELD),
- vmcs_read32(IDT_VECTORING_ERROR_CODE));
- pr_err("TSC Offset = 0x%016llx\n", vmcs_read64(TSC_OFFSET));
- if (secondary_exec_control & SECONDARY_EXEC_TSC_SCALING)
- pr_err("TSC Multiplier = 0x%016llx\n",
- vmcs_read64(TSC_MULTIPLIER));
+ vmcs_read32(vcpu, IDT_VECTORING_INFO_FIELD),
+ vmcs_read32(vcpu, IDT_VECTORING_ERROR_CODE));
+ pr_err("TSC Offset = 0x%016llx\n", vmcs_read64(vcpu, TSC_OFFSET));
if (cpu_based_exec_ctrl & CPU_BASED_TPR_SHADOW)
- pr_err("TPR Threshold = 0x%02x\n", vmcs_read32(TPR_THRESHOLD));
- if (pin_based_exec_ctrl & PIN_BASED_POSTED_INTR)
- pr_err("PostedIntrVec = 0x%02x\n", vmcs_read16(POSTED_INTR_NV));
+ pr_err("TPR Threshold = 0x%02x\n", vmcs_read32(vcpu, TPR_THRESHOLD));
if ((secondary_exec_control & SECONDARY_EXEC_ENABLE_EPT))
- pr_err("EPT pointer = 0x%016llx\n", vmcs_read64(EPT_POINTER));
- n = vmcs_read32(CR3_TARGET_COUNT);
+ pr_err("EPT pointer = 0x%016llx\n", vmcs_read64(vcpu, EPT_POINTER));
+ n = vmcs_read32(vcpu, CR3_TARGET_COUNT);
for (i = 0; i + 1 < n; i += 4)
- pr_err("CR3 target%u=%016lx target%u=%016lx\n",
- i, vmcs_readl(CR3_TARGET_VALUE0 + i * 2),
- i + 1, vmcs_readl(CR3_TARGET_VALUE0 + i * 2 + 2));
+ pr_err("CR3 target%u=%016llx target%u=%016llx\n",
+ i, vmcs_readl(vcpu, CR3_TARGET_VALUE0 + i * 2),
+ i + 1, vmcs_readl(vcpu, CR3_TARGET_VALUE0 + i * 2 + 2));
if (i < n)
- pr_err("CR3 target%u=%016lx\n",
- i, vmcs_readl(CR3_TARGET_VALUE0 + i * 2));
+ pr_err("CR3 target%u=%016llx\n",
+ i, vmcs_readl(vcpu, CR3_TARGET_VALUE0 + i * 2));
if (secondary_exec_control & SECONDARY_EXEC_PAUSE_LOOP_EXITING)
pr_err("PLE Gap=%08x Window=%08x\n",
- vmcs_read32(PLE_GAP), vmcs_read32(PLE_WINDOW));
+ vmcs_read32(vcpu, PLE_GAP), vmcs_read32(vcpu, PLE_WINDOW));
if (secondary_exec_control & SECONDARY_EXEC_ENABLE_VPID)
pr_err("Virtual processor ID = 0x%04x\n",
- vmcs_read16(VIRTUAL_PROCESSOR_ID));
+ vmcs_read16(vcpu, VIRTUAL_PROCESSOR_ID));
}
/*
@@ -8365,8 +6852,6 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
u32 exit_reason = vmx->exit_reason;
u32 vectoring_info = vmx->idt_vectoring_info;
- trace_kvm_exit(exit_reason, vcpu, KVM_ISA_VMX);
-
/*
* Flush logged GPAs PML buffer, this will make dirty_bitmap more
* updated. Another good is, in kvm_vm_ioctl_get_dirty_log, before
@@ -8383,23 +6868,23 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
if (is_guest_mode(vcpu) && nested_vmx_exit_handled(vcpu)) {
nested_vmx_vmexit(vcpu, exit_reason,
- vmcs_read32(VM_EXIT_INTR_INFO),
- vmcs_readl(EXIT_QUALIFICATION));
+ vmcs_read32(vcpu, VM_EXIT_INTR_INFO),
+ vmcs_readl(vcpu, EXIT_QUALIFICATION));
return 1;
}
if (exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY) {
- dump_vmcs();
- vcpu->run->exit_reason = KVM_EXIT_FAIL_ENTRY;
+ dump_vmcs(vcpu);
+ vcpu->run->exit_reason = GVM_EXIT_FAIL_ENTRY;
vcpu->run->fail_entry.hardware_entry_failure_reason
= exit_reason;
return 0;
}
if (unlikely(vmx->fail)) {
- vcpu->run->exit_reason = KVM_EXIT_FAIL_ENTRY;
+ vcpu->run->exit_reason = GVM_EXIT_FAIL_ENTRY;
vcpu->run->fail_entry.hardware_entry_failure_reason
- = vmcs_read32(VM_INSTRUCTION_ERROR);
+ = vmcs_read32(vcpu, VM_INSTRUCTION_ERROR);
return 0;
}
@@ -8415,8 +6900,8 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
exit_reason != EXIT_REASON_EPT_VIOLATION &&
exit_reason != EXIT_REASON_PML_FULL &&
exit_reason != EXIT_REASON_TASK_SWITCH)) {
- vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
- vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_DELIVERY_EV;
+ vcpu->run->exit_reason = GVM_EXIT_INTERNAL_ERROR;
+ vcpu->run->internal.suberror = GVM_INTERNAL_ERROR_DELIVERY_EV;
vcpu->run->internal.ndata = 2;
vcpu->run->internal.data[0] = vectoring_info;
vcpu->run->internal.data[1] = exit_reason;
@@ -8447,7 +6932,7 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
&& kvm_vmx_exit_handlers[exit_reason])
return kvm_vmx_exit_handlers[exit_reason](vcpu);
else {
- WARN_ONCE(1, "vmx: unexpected exit reason 0x%x\n", exit_reason);
+ //WARN_ONCE(1, "vmx: unexpected exit reason 0x%x\n", exit_reason);
kvm_queue_exception(vcpu, UD_VECTOR);
return 1;
}
@@ -8462,11 +6947,11 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
return;
if (irr == -1 || tpr < irr) {
- vmcs_write32(TPR_THRESHOLD, 0);
+ vmcs_write32(vcpu, TPR_THRESHOLD, 0);
return;
}
- vmcs_write32(TPR_THRESHOLD, irr);
+ vmcs_write32(vcpu, TPR_THRESHOLD, irr);
}
static void vmx_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set)
@@ -8485,7 +6970,7 @@ static void vmx_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set)
if (!cpu_need_tpr_shadow(vcpu))
return;
- sec_exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL);
+ sec_exec_control = vmcs_read32(vcpu, SECONDARY_VM_EXEC_CONTROL);
if (set) {
sec_exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
@@ -8494,14 +6979,14 @@ static void vmx_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set)
sec_exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE;
sec_exec_control |= SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
}
- vmcs_write32(SECONDARY_VM_EXEC_CONTROL, sec_exec_control);
+ vmcs_write32(vcpu, SECONDARY_VM_EXEC_CONTROL, sec_exec_control);
vmx_set_msr_bitmap(vcpu);
}
static void vmx_set_apic_access_page_addr(struct kvm_vcpu *vcpu, hpa_t hpa)
{
- struct vcpu_vmx *vmx = to_vmx(vcpu);
+ //struct vcpu_vmx *vmx = to_vmx(vcpu);
/*
* Currently we do not handle the nested case where L2 has an
@@ -8516,10 +7001,10 @@ static void vmx_set_apic_access_page_addr(struct kvm_vcpu *vcpu, hpa_t hpa)
* prepare_vmcs02. If the latter, the vmcs01 will be updated in
* the next L2->L1 exit.
*/
- if (!is_guest_mode(vcpu) ||
- !nested_cpu_has2(get_vmcs12(&vmx->vcpu),
- SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES))
- vmcs_write64(APIC_ACCESS_ADDR, hpa);
+ //if (!is_guest_mode(vcpu) ||
+ //!nested_cpu_has2(get_vmcs12(&vmx->vcpu),
+ //SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES))
+ vmcs_write64(vcpu, APIC_ACCESS_ADDR, hpa);
}
static void vmx_hwapic_isr_update(struct kvm_vcpu *vcpu, int max_isr)
@@ -8530,16 +7015,16 @@ static void vmx_hwapic_isr_update(struct kvm_vcpu *vcpu, int max_isr)
if (max_isr == -1)
max_isr = 0;
- status = vmcs_read16(GUEST_INTR_STATUS);
+ status = vmcs_read16(vcpu, GUEST_INTR_STATUS);
old = status >> 8;
if (max_isr != old) {
status &= 0xff;
status |= max_isr << 8;
- vmcs_write16(GUEST_INTR_STATUS, status);
+ vmcs_write16(vcpu, GUEST_INTR_STATUS, status);
}
}
-static void vmx_set_rvi(int vector)
+static void vmx_set_rvi(struct kvm_vcpu *vcpu, int vector)
{
u16 status;
u8 old;
@@ -8547,19 +7032,19 @@ static void vmx_set_rvi(int vector)
if (vector == -1)
vector = 0;
- status = vmcs_read16(GUEST_INTR_STATUS);
+ status = vmcs_read16(vcpu, GUEST_INTR_STATUS);
old = (u8)status & 0xff;
if ((u8)vector != old) {
status &= ~0xff;
status |= (u8)vector;
- vmcs_write16(GUEST_INTR_STATUS, status);
+ vmcs_write16(vcpu, GUEST_INTR_STATUS, status);
}
}
static void vmx_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr)
{
if (!is_guest_mode(vcpu)) {
- vmx_set_rvi(max_irr);
+ vmx_set_rvi(vcpu, max_irr);
return;
}
@@ -8589,12 +7074,13 @@ static void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
if (!kvm_vcpu_apicv_active(vcpu))
return;
- vmcs_write64(EOI_EXIT_BITMAP0, eoi_exit_bitmap[0]);
- vmcs_write64(EOI_EXIT_BITMAP1, eoi_exit_bitmap[1]);
- vmcs_write64(EOI_EXIT_BITMAP2, eoi_exit_bitmap[2]);
- vmcs_write64(EOI_EXIT_BITMAP3, eoi_exit_bitmap[3]);
+ vmcs_write64(vcpu, EOI_EXIT_BITMAP0, eoi_exit_bitmap[0]);
+ vmcs_write64(vcpu, EOI_EXIT_BITMAP1, eoi_exit_bitmap[1]);
+ vmcs_write64(vcpu, EOI_EXIT_BITMAP2, eoi_exit_bitmap[2]);
+ vmcs_write64(vcpu, EOI_EXIT_BITMAP3, eoi_exit_bitmap[3]);
}
+static u64 nmi_count = 0;
static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx)
{
u32 exit_intr_info;
@@ -8603,7 +7089,7 @@ static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx)
|| vmx->exit_reason == EXIT_REASON_EXCEPTION_NMI))
return;
- vmx->exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
+ vmx->exit_intr_info = vmcs_read32(&vmx->vcpu, VM_EXIT_INTR_INFO);
exit_intr_info = vmx->exit_intr_info;
/* Handle machine checks before interrupts are enabled */
@@ -8614,15 +7100,15 @@ static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx)
if ((exit_intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR &&
(exit_intr_info & INTR_INFO_VALID_MASK)) {
kvm_before_handle_nmi(&vmx->vcpu);
- asm("int $2");
+ __int2();
+ nmi_count++;
kvm_after_handle_nmi(&vmx->vcpu);
}
}
static void vmx_handle_external_intr(struct kvm_vcpu *vcpu)
{
- u32 exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
- register void *__sp asm(_ASM_SP);
+ u32 exit_intr_info = vmcs_read32(vcpu, VM_EXIT_INTR_INFO);
/*
* If external interrupt exists, IF bit is set in rflags/eflags on the
@@ -8632,36 +7118,13 @@ static void vmx_handle_external_intr(struct kvm_vcpu *vcpu)
if ((exit_intr_info & (INTR_INFO_VALID_MASK | INTR_INFO_INTR_TYPE_MASK))
== (INTR_INFO_VALID_MASK | INTR_TYPE_EXT_INTR)) {
unsigned int vector;
- unsigned long entry;
+ size_t entry;
gate_desc *desc;
- struct vcpu_vmx *vmx = to_vmx(vcpu);
-#ifdef CONFIG_X86_64
- unsigned long tmp;
-#endif
vector = exit_intr_info & INTR_INFO_VECTOR_MASK;
- desc = (gate_desc *)vmx->host_idt_base + vector;
+ desc = (gate_desc *)(this_cpu_ptr(&host_idt))->address + vector;
entry = gate_offset(*desc);
- asm volatile(
-#ifdef CONFIG_X86_64
- "mov %%" _ASM_SP ", %[sp]\n\t"
- "and $0xfffffffffffffff0, %%" _ASM_SP "\n\t"
- "push $%c[ss]\n\t"
- "push %[sp]\n\t"
-#endif
- "pushf\n\t"
- __ASM_SIZE(push) " $%c[cs]\n\t"
- "call *%[entry]\n\t"
- :
-#ifdef CONFIG_X86_64
- [sp]"=&r"(tmp),
-#endif
- "+r"(__sp)
- :
- [entry]"r"(entry),
- [ss]"i"(__KERNEL_DS),
- [cs]"i"(__KERNEL_CS)
- );
+ __asm_vmx_handle_external_intr(entry);
}
}
@@ -8698,7 +7161,7 @@ static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx)
* Can't use vmx->exit_intr_info since we're not sure what
* the exit reason is.
*/
- exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
+ exit_intr_info = vmcs_read32(&vmx->vcpu, VM_EXIT_INTR_INFO);
unblock_nmi = (exit_intr_info & INTR_INFO_UNBLOCK_NMI) != 0;
vector = exit_intr_info & INTR_INFO_VECTOR_MASK;
/*
@@ -8713,11 +7176,11 @@ static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx)
*/
if ((exit_intr_info & INTR_INFO_VALID_MASK) && unblock_nmi &&
vector != DF_VECTOR && !idtv_info_valid)
- vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
+ vmcs_set_bits(&vmx->vcpu, GUEST_INTERRUPTIBILITY_INFO,
GUEST_INTR_STATE_NMI);
else
vmx->nmi_known_unmasked =
- !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO)
+ !(vmcs_read32(&vmx->vcpu, GUEST_INTERRUPTIBILITY_INFO)
& GUEST_INTR_STATE_NMI);
} else if (unlikely(vmx->soft_vnmi_blocked))
vmx->vnmi_blocked_time +=
@@ -8742,7 +7205,7 @@ static void __vmx_complete_interrupts(struct kvm_vcpu *vcpu,
if (!idtv_info_valid)
return;
- kvm_make_request(KVM_REQ_EVENT, vcpu);
+ kvm_make_request(GVM_REQ_EVENT, vcpu);
vector = idt_vectoring_info & VECTORING_INFO_VECTOR_MASK;
type = idt_vectoring_info & VECTORING_INFO_TYPE_MASK;
@@ -8758,17 +7221,17 @@ static void __vmx_complete_interrupts(struct kvm_vcpu *vcpu,
vmx_set_nmi_mask(vcpu, false);
break;
case INTR_TYPE_SOFT_EXCEPTION:
- vcpu->arch.event_exit_inst_len = vmcs_read32(instr_len_field);
+ vcpu->arch.event_exit_inst_len = vmcs_read32(vcpu, instr_len_field);
/* fall through */
case INTR_TYPE_HARD_EXCEPTION:
if (idt_vectoring_info & VECTORING_INFO_DELIVER_CODE_MASK) {
- u32 err = vmcs_read32(error_code_field);
+ u32 err = vmcs_read32(vcpu, error_code_field);
kvm_requeue_exception_e(vcpu, vector, err);
} else
kvm_requeue_exception(vcpu, vector);
break;
case INTR_TYPE_SOFT_INTR:
- vcpu->arch.event_exit_inst_len = vmcs_read32(instr_len_field);
+ vcpu->arch.event_exit_inst_len = vmcs_read32(vcpu, instr_len_field);
/* fall through */
case INTR_TYPE_EXT_INTR:
kvm_queue_interrupt(vcpu, vector, type == INTR_TYPE_SOFT_INTR);
@@ -8788,15 +7251,16 @@ static void vmx_complete_interrupts(struct vcpu_vmx *vmx)
static void vmx_cancel_injection(struct kvm_vcpu *vcpu)
{
__vmx_complete_interrupts(vcpu,
- vmcs_read32(VM_ENTRY_INTR_INFO_FIELD),
+ vmcs_read32(vcpu, VM_ENTRY_INTR_INFO_FIELD),
VM_ENTRY_INSTRUCTION_LEN,
VM_ENTRY_EXCEPTION_ERROR_CODE);
- vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 0);
+ vmcs_write32(vcpu, VM_ENTRY_INTR_INFO_FIELD, 0);
}
static void atomic_switch_perf_msrs(struct vcpu_vmx *vmx)
{
+#if 0
int i, nr_msrs;
struct perf_guest_switch_msr *msrs;
@@ -8811,32 +7275,26 @@ static void atomic_switch_perf_msrs(struct vcpu_vmx *vmx)
else
add_atomic_switch_msr(vmx, msrs[i].msr, msrs[i].guest,
msrs[i].host);
+#endif
}
-void vmx_arm_hv_timer(struct kvm_vcpu *vcpu)
-{
- struct vcpu_vmx *vmx = to_vmx(vcpu);
- u64 tscl;
- u32 delta_tsc;
-
- if (vmx->hv_deadline_tsc == -1)
- return;
-
- tscl = rdtsc();
- if (vmx->hv_deadline_tsc > tscl)
- /* sure to be 32 bit only because checked on set_hv_timer */
- delta_tsc = (u32)((vmx->hv_deadline_tsc - tscl) >>
- cpu_preemption_timer_multi);
- else
- delta_tsc = 0;
-
- vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, delta_tsc);
-}
+u64 last_vmexit_rip = 0;
+u64 last_vmexit_rsp = 0;
+u64 rip = 0xffffffffffffffff;
+u8 do_print = 1;
+u8 do_print1 = 1;
-static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
+static void __declspec(noinline) vmx_vcpu_run(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
- unsigned long debugctlmsr, cr4;
+ //size_t debugctlmsr, cr4;
+ size_t cr4;
+ struct desc_ptr *gdt = this_cpu_ptr(&host_gdt);
+ struct desc_ptr *idt = this_cpu_ptr(&host_idt);
+ size_t sysenter_esp;
+ unsigned int i;
+ struct msr_autoload *m = &vmx->msr_autoload;
+
/* Record the guest's net vcpu time for enforced NMI injections. */
if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked))
@@ -8847,24 +7305,19 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
if (vmx->emulation_required)
return;
- if (vmx->ple_window_dirty) {
- vmx->ple_window_dirty = false;
- vmcs_write32(PLE_WINDOW, vmx->ple_window);
- }
-
if (vmx->nested.sync_shadow_vmcs) {
copy_vmcs12_to_shadow(vmx);
vmx->nested.sync_shadow_vmcs = false;
}
- if (test_bit(VCPU_REGS_RSP, (unsigned long *)&vcpu->arch.regs_dirty))
- vmcs_writel(GUEST_RSP, vcpu->arch.regs[VCPU_REGS_RSP]);
- if (test_bit(VCPU_REGS_RIP, (unsigned long *)&vcpu->arch.regs_dirty))
- vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]);
+ if (test_bit(VCPU_REGS_RSP, (size_t *)&vcpu->arch.regs_dirty))
+ vmcs_writel(vcpu, GUEST_RSP, vcpu->arch.regs[VCPU_REGS_RSP]);
+ if (test_bit(VCPU_REGS_RIP, (size_t *)&vcpu->arch.regs_dirty))
+ vmcs_writel(vcpu, GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]);
cr4 = cr4_read_shadow();
if (unlikely(cr4 != vmx->host_state.vmcs_host_cr4)) {
- vmcs_writel(HOST_CR4, cr4);
+ vmcs_writel(vcpu, HOST_CR4, cr4);
vmx->host_state.vmcs_host_cr4 = cr4;
}
@@ -8873,126 +7326,50 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
* vmentry fails as it then expects bit 14 (BS) in pending debug
* exceptions being set, but that's not correct for the guest debugging
* case. */
- if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
+ if (vcpu->guest_debug & GVM_GUESTDBG_SINGLESTEP)
vmx_set_interrupt_shadow(vcpu, 0);
- if (vmx->guest_pkru_valid)
- __write_pkru(vmx->guest_pkru);
+ vmcs_writel(vcpu, HOST_TR_BASE, kvm_read_tr_base()); /* 22.2.4 */
+ vmcs_writel(vcpu, HOST_GDTR_BASE, gdt->address); /* 22.2.4 */
+ vmcs_writel(vcpu, HOST_IDTR_BASE, idt->address); /* 22.2.4 */
+ rdmsrl(MSR_IA32_SYSENTER_ESP, sysenter_esp);
+ vmcs_writel(vcpu, HOST_IA32_SYSENTER_ESP, sysenter_esp); /* 22.2.3 */
+#if 0
atomic_switch_perf_msrs(vmx);
debugctlmsr = get_debugctlmsr();
-
- vmx_arm_hv_timer(vcpu);
-
- vmx->__launched = vmx->loaded_vmcs->launched;
- asm(
- /* Store host registers */
- "push %%" _ASM_DX "; push %%" _ASM_BP ";"
- "push %%" _ASM_CX " \n\t" /* placeholder for guest rcx */
- "push %%" _ASM_CX " \n\t"
- "cmp %%" _ASM_SP ", %c[host_rsp](%0) \n\t"
- "je 1f \n\t"
- "mov %%" _ASM_SP ", %c[host_rsp](%0) \n\t"
- __ex(ASM_VMX_VMWRITE_RSP_RDX) "\n\t"
- "1: \n\t"
- /* Reload cr2 if changed */
- "mov %c[cr2](%0), %%" _ASM_AX " \n\t"
- "mov %%cr2, %%" _ASM_DX " \n\t"
- "cmp %%" _ASM_AX ", %%" _ASM_DX " \n\t"
- "je 2f \n\t"
- "mov %%" _ASM_AX", %%cr2 \n\t"
- "2: \n\t"
- /* Check if vmlaunch of vmresume is needed */
- "cmpl $0, %c[launched](%0) \n\t"
- /* Load guest registers. Don't clobber flags. */
- "mov %c[rax](%0), %%" _ASM_AX " \n\t"
- "mov %c[rbx](%0), %%" _ASM_BX " \n\t"
- "mov %c[rdx](%0), %%" _ASM_DX " \n\t"
- "mov %c[rsi](%0), %%" _ASM_SI " \n\t"
- "mov %c[rdi](%0), %%" _ASM_DI " \n\t"
- "mov %c[rbp](%0), %%" _ASM_BP " \n\t"
-#ifdef CONFIG_X86_64
- "mov %c[r8](%0), %%r8 \n\t"
- "mov %c[r9](%0), %%r9 \n\t"
- "mov %c[r10](%0), %%r10 \n\t"
- "mov %c[r11](%0), %%r11 \n\t"
- "mov %c[r12](%0), %%r12 \n\t"
- "mov %c[r13](%0), %%r13 \n\t"
- "mov %c[r14](%0), %%r14 \n\t"
- "mov %c[r15](%0), %%r15 \n\t"
-#endif
- "mov %c[rcx](%0), %%" _ASM_CX " \n\t" /* kills %0 (ecx) */
-
- /* Enter guest mode */
- "jne 1f \n\t"
- __ex(ASM_VMX_VMLAUNCH) "\n\t"
- "jmp 2f \n\t"
- "1: " __ex(ASM_VMX_VMRESUME) "\n\t"
- "2: "
- /* Save guest registers, load host registers, keep flags */
- "mov %0, %c[wordsize](%%" _ASM_SP ") \n\t"
- "pop %0 \n\t"
- "mov %%" _ASM_AX ", %c[rax](%0) \n\t"
- "mov %%" _ASM_BX ", %c[rbx](%0) \n\t"
- __ASM_SIZE(pop) " %c[rcx](%0) \n\t"
- "mov %%" _ASM_DX ", %c[rdx](%0) \n\t"
- "mov %%" _ASM_SI ", %c[rsi](%0) \n\t"
- "mov %%" _ASM_DI ", %c[rdi](%0) \n\t"
- "mov %%" _ASM_BP ", %c[rbp](%0) \n\t"
-#ifdef CONFIG_X86_64
- "mov %%r8, %c[r8](%0) \n\t"
- "mov %%r9, %c[r9](%0) \n\t"
- "mov %%r10, %c[r10](%0) \n\t"
- "mov %%r11, %c[r11](%0) \n\t"
- "mov %%r12, %c[r12](%0) \n\t"
- "mov %%r13, %c[r13](%0) \n\t"
- "mov %%r14, %c[r14](%0) \n\t"
- "mov %%r15, %c[r15](%0) \n\t"
-#endif
- "mov %%cr2, %%" _ASM_AX " \n\t"
- "mov %%" _ASM_AX ", %c[cr2](%0) \n\t"
-
- "pop %%" _ASM_BP "; pop %%" _ASM_DX " \n\t"
- "setbe %c[fail](%0) \n\t"
- ".pushsection .rodata \n\t"
- ".global vmx_return \n\t"
- "vmx_return: " _ASM_PTR " 2b \n\t"
- ".popsection"
- : : "c"(vmx), "d"((unsigned long)HOST_RSP),
- [launched]"i"(offsetof(struct vcpu_vmx, __launched)),
- [fail]"i"(offsetof(struct vcpu_vmx, fail)),
- [host_rsp]"i"(offsetof(struct vcpu_vmx, host_rsp)),
- [rax]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RAX])),
- [rbx]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RBX])),
- [rcx]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RCX])),
- [rdx]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RDX])),
- [rsi]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RSI])),
- [rdi]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RDI])),
- [rbp]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RBP])),
-#ifdef CONFIG_X86_64
- [r8]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R8])),
- [r9]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R9])),
- [r10]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R10])),
- [r11]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R11])),
- [r12]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R12])),
- [r13]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R13])),
- [r14]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R14])),
- [r15]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R15])),
#endif
- [cr2]"i"(offsetof(struct vcpu_vmx, vcpu.arch.cr2)),
- [wordsize]"i"(sizeof(ulong))
- : "cc", "memory"
-#ifdef CONFIG_X86_64
- , "rax", "rbx", "rdi", "rsi"
- , "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
-#else
- , "eax", "ebx", "edi", "esi"
-#endif
- );
+ if (do_print1) {
+ dump_vmcs(vcpu);
+ do_print1 = 0;
+ }
+ vmcs_load(vmx->loaded_vmcs->vmcs);
+
+ for (i = 0; i < m->nr; i++)
+ wrmsrl(m->guest[i].index, m->guest[i].value);
+ /* Calls to low-level assembly functions*/
+ __asm_vmx_vcpu_run(vmx);
+ for (i = 0; i < m->nr; i++)
+ wrmsrl(m->host[i].index, m->host[i].value);
+ vmcs_clear(vmx->loaded_vmcs->vmcs);
+
+ if (vcpu->vcpu_id == 0) {
+ last_vmexit_rip = vmcs_read64(vcpu, GUEST_RIP);
+ last_vmexit_rsp = vmcs_read64(vcpu, GUEST_RSP);
+ }
+ if (do_print && (vcpu->vcpu_id == 0)) {
+ DbgPrint("-------------------vcpu 0-----------------------------------------------------------\n");
+ dump_vmcs(vcpu);
+ do_print = 0;
+ }
+ if (last_vmexit_rip == rip)
+ DbgBreakPoint();
+#if 0
/* MSR_IA32_DEBUGCTLMSR is zeroed on vmexit. Restore it if needed */
if (debugctlmsr)
update_debugctlmsr(debugctlmsr);
+#endif
#ifndef CONFIG_X86_64
/*
@@ -9014,33 +7391,17 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
| (1 << VCPU_EXREG_CR3));
vcpu->arch.regs_dirty = 0;
- vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD);
-
- vmx->loaded_vmcs->launched = 1;
-
- vmx->exit_reason = vmcs_read32(VM_EXIT_REASON);
+ vmx->idt_vectoring_info = vmcs_read32(vcpu, IDT_VECTORING_INFO_FIELD);
- /*
- * eager fpu is enabled if PKEY is supported and CR4 is switched
- * back on host, so it is safe to read guest PKRU from current
- * XSAVE.
- */
- if (boot_cpu_has(X86_FEATURE_OSPKE)) {
- vmx->guest_pkru = __read_pkru();
- if (vmx->guest_pkru != vmx->host_pkru) {
- vmx->guest_pkru_valid = true;
- __write_pkru(vmx->host_pkru);
- } else
- vmx->guest_pkru_valid = false;
- }
+ vmx->exit_reason = vmcs_read32(vcpu, VM_EXIT_REASON);
/*
- * the KVM_REQ_EVENT optimization bit is only on for one entry, and if
+ * the GVM_REQ_EVENT optimization bit is only on for one entry, and if
* we did not inject a still-pending event to L1 now because of
* nested_run_pending, we need to re-enable this bit.
*/
if (vmx->nested.nested_run_pending)
- kvm_make_request(KVM_REQ_EVENT, vcpu);
+ kvm_make_request(GVM_REQ_EVENT, vcpu);
vmx->nested.nested_run_pending = 0;
@@ -9072,13 +7433,9 @@ static void vmx_load_vmcs01(struct kvm_vcpu *vcpu)
static void vmx_free_vcpu_nested(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
- int r;
- r = vcpu_load(vcpu);
- BUG_ON(r);
vmx_load_vmcs01(vcpu);
free_nested(vmx);
- vcpu_put(vcpu);
}
static void vmx_free_vcpu(struct kvm_vcpu *vcpu)
@@ -9091,16 +7448,14 @@ static void vmx_free_vcpu(struct kvm_vcpu *vcpu)
leave_guest_mode(vcpu);
vmx_free_vcpu_nested(vcpu);
free_loaded_vmcs(vmx->loaded_vmcs);
- kfree(vmx->guest_msrs);
kvm_vcpu_uninit(vcpu);
- kmem_cache_free(kvm_vcpu_cache, vmx);
+ kfree(vmx);
}
static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
{
int err;
- struct vcpu_vmx *vmx = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
- int cpu;
+ struct vcpu_vmx *vmx = kzalloc_fast(sizeof(struct vcpu_vmx), GFP_KERNEL);
if (!vmx)
return ERR_PTR(-ENOMEM);
@@ -9125,30 +7480,19 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
goto uninit_vcpu;
}
- vmx->guest_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL);
- BUILD_BUG_ON(ARRAY_SIZE(vmx_msr_index) * sizeof(vmx->guest_msrs[0])
- > PAGE_SIZE);
-
- if (!vmx->guest_msrs)
- goto free_pml;
-
vmx->loaded_vmcs = &vmx->vmcs01;
vmx->loaded_vmcs->vmcs = alloc_vmcs();
+ DbgPrint("vmcs allocated with phys %llx on cpu %d\n", __pa(vmx->loaded_vmcs->vmcs), smp_processor_id());
vmx->loaded_vmcs->shadow_vmcs = NULL;
if (!vmx->loaded_vmcs->vmcs)
- goto free_msrs;
+ goto free_pml;
if (!vmm_exclusive)
kvm_cpu_vmxon(__pa(per_cpu(vmxarea, raw_smp_processor_id())));
loaded_vmcs_init(vmx->loaded_vmcs);
if (!vmm_exclusive)
kvm_cpu_vmxoff();
- cpu = get_cpu();
- vmx_vcpu_load(&vmx->vcpu, cpu);
- vmx->vcpu.cpu = cpu;
err = vmx_vcpu_setup(vmx);
- vmx_vcpu_put(&vmx->vcpu);
- put_cpu();
if (err)
goto free_vmcs;
if (cpu_need_virtualize_apic_accesses(&vmx->vcpu)) {
@@ -9171,7 +7515,6 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
vmx->nested.vpid02 = allocate_vpid();
}
- vmx->nested.posted_intr_nv = -1;
vmx->nested.current_vmptr = -1ull;
vmx->nested.current_vmcs12 = NULL;
@@ -9182,19 +7525,17 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
free_vmcs:
free_vpid(vmx->nested.vpid02);
free_loaded_vmcs(vmx->loaded_vmcs);
-free_msrs:
- kfree(vmx->guest_msrs);
free_pml:
vmx_destroy_pml_buffer(vmx);
uninit_vcpu:
kvm_vcpu_uninit(&vmx->vcpu);
free_vcpu:
free_vpid(vmx->vpid);
- kmem_cache_free(kvm_vcpu_cache, vmx);
+ kfree(vmx);
return ERR_PTR(err);
}
-static void __init vmx_check_processor_compat(void *rtn)
+static void vmx_check_processor_compat(void *rtn)
{
struct vmcs_config vmcs_conf;
@@ -9234,22 +7575,14 @@ static u64 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
goto exit;
}
- if (!kvm_arch_has_noncoherent_dma(vcpu->kvm)) {
+ //if (!kvm_arch_has_noncoherent_dma(vcpu->kvm)) {
+ {
ipat = VMX_EPT_IPAT_BIT;
cache = MTRR_TYPE_WRBACK;
goto exit;
}
- if (kvm_read_cr0(vcpu) & X86_CR0_CD) {
- ipat = VMX_EPT_IPAT_BIT;
- if (kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_CD_NW_CLEARED))
- cache = MTRR_TYPE_WRBACK;
- else
- cache = MTRR_TYPE_UNCACHABLE;
- goto exit;
- }
-
- cache = kvm_mtrr_get_guest_memory_type(vcpu, gfn);
+ //cache = kvm_mtrr_get_guest_memory_type(vcpu, gfn);
exit:
return (cache << VMX_EPT_MT_EPTE_SHIFT) | ipat;
@@ -9264,7 +7597,7 @@ static int vmx_get_lpage_level(void)
return PT_PDPE_LEVEL;
}
-static void vmcs_set_secondary_exec_control(u32 new_ctl)
+static void vmcs_set_secondary_exec_control(struct kvm_vcpu *vcpu, u32 new_ctl)
{
/*
* These bits in the secondary execution controls field
@@ -9277,15 +7610,15 @@ static void vmcs_set_secondary_exec_control(u32 new_ctl)
SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
- u32 cur_ctl = vmcs_read32(SECONDARY_VM_EXEC_CONTROL);
+ u32 cur_ctl = vmcs_read32(vcpu, SECONDARY_VM_EXEC_CONTROL);
- vmcs_write32(SECONDARY_VM_EXEC_CONTROL,
+ vmcs_write32(vcpu, SECONDARY_VM_EXEC_CONTROL,
(new_ctl & ~mask) | (cur_ctl & mask));
}
static void vmx_cpuid_update(struct kvm_vcpu *vcpu)
{
- struct kvm_cpuid_entry2 *best;
+ struct kvm_cpuid_entry *best;
struct vcpu_vmx *vmx = to_vmx(vcpu);
u32 secondary_exec_ctl = vmx_secondary_exec_control(vmx);
@@ -9316,7 +7649,7 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu)
}
if (cpu_has_secondary_exec_ctrls())
- vmcs_set_secondary_exec_control(secondary_exec_ctl);
+ vmcs_set_secondary_exec_control(vcpu, secondary_exec_ctl);
if (nested_vmx_allowed(vcpu))
to_vmx(vcpu)->msr_ia32_feature_control_valid_bits |=
@@ -9326,7 +7659,7 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu)
~FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX;
}
-static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry)
+static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry *entry)
{
if (func == 1 && nested)
entry->ecx |= bit(X86_FEATURE_VMX);
@@ -9348,7 +7681,7 @@ static void nested_ept_inject_page_fault(struct kvm_vcpu *vcpu,
/* Callbacks for nested_ept_init_mmu_context: */
-static unsigned long nested_ept_get_cr3(struct kvm_vcpu *vcpu)
+static size_t nested_ept_get_cr3(struct kvm_vcpu *vcpu)
{
/* return the page table to be shadowed - in our case, EPT12 */
return get_vmcs12(vcpu)->ept_pointer;
@@ -9393,8 +7726,8 @@ static void vmx_inject_page_fault_nested(struct kvm_vcpu *vcpu,
if (nested_vmx_is_page_fault_vmexit(vmcs12, fault->error_code))
nested_vmx_vmexit(vcpu, to_vmx(vcpu)->exit_reason,
- vmcs_read32(VM_EXIT_INTR_INFO),
- vmcs_readl(EXIT_QUALIFICATION));
+ vmcs_read32(vcpu, VM_EXIT_INTR_INFO),
+ vmcs_readl(vcpu, EXIT_QUALIFICATION));
else
kvm_inject_page_fault(vcpu, fault);
}
@@ -9416,9 +7749,9 @@ static bool nested_get_vmcs12_pages(struct kvm_vcpu *vcpu,
* physical address remains valid. We keep a reference
* to it so we can release it later.
*/
- if (vmx->nested.apic_access_page) /* shouldn't happen */
- nested_release_page(vmx->nested.apic_access_page);
- vmx->nested.apic_access_page =
+ if (vmx->nested.apic_access_mdl) /* shouldn't happen */
+ nested_release_page(vmx->nested.apic_access_mdl);
+ vmx->nested.apic_access_mdl =
nested_get_page(vcpu, vmcs12->apic_access_addr);
}
@@ -9427,9 +7760,9 @@ static bool nested_get_vmcs12_pages(struct kvm_vcpu *vcpu,
vmcs12->virtual_apic_page_addr >> maxphyaddr)
return false;
- if (vmx->nested.virtual_apic_page) /* shouldn't happen */
- nested_release_page(vmx->nested.virtual_apic_page);
- vmx->nested.virtual_apic_page =
+ if (vmx->nested.virtual_apic_mdl) /* shouldn't happen */
+ nested_release_page(vmx->nested.virtual_apic_mdl);
+ vmx->nested.virtual_apic_mdl =
nested_get_page(vcpu, vmcs12->virtual_apic_page_addr);
/*
@@ -9442,61 +7775,13 @@ static bool nested_get_vmcs12_pages(struct kvm_vcpu *vcpu,
* the execution control. But such a configuration is useless,
* so let's keep the code simple.
*/
- if (!vmx->nested.virtual_apic_page)
+ if (!vmx->nested.virtual_apic_mdl)
return false;
}
- if (nested_cpu_has_posted_intr(vmcs12)) {
- if (!IS_ALIGNED(vmcs12->posted_intr_desc_addr, 64) ||
- vmcs12->posted_intr_desc_addr >> maxphyaddr)
- return false;
-
- if (vmx->nested.pi_desc_page) { /* shouldn't happen */
- kunmap(vmx->nested.pi_desc_page);
- nested_release_page(vmx->nested.pi_desc_page);
- }
- vmx->nested.pi_desc_page =
- nested_get_page(vcpu, vmcs12->posted_intr_desc_addr);
- if (!vmx->nested.pi_desc_page)
- return false;
-
- vmx->nested.pi_desc =
- (struct pi_desc *)kmap(vmx->nested.pi_desc_page);
- if (!vmx->nested.pi_desc) {
- nested_release_page_clean(vmx->nested.pi_desc_page);
- return false;
- }
- vmx->nested.pi_desc =
- (struct pi_desc *)((void *)vmx->nested.pi_desc +
- (unsigned long)(vmcs12->posted_intr_desc_addr &
- (PAGE_SIZE - 1)));
- }
-
return true;
}
-static void vmx_start_preemption_timer(struct kvm_vcpu *vcpu)
-{
- u64 preemption_timeout = get_vmcs12(vcpu)->vmx_preemption_timer_value;
- struct vcpu_vmx *vmx = to_vmx(vcpu);
-
- if (vcpu->arch.virtual_tsc_khz == 0)
- return;
-
- /* Make sure short timeouts reliably trigger an immediate vmexit.
- * hrtimer_start does not guarantee this. */
- if (preemption_timeout <= 1) {
- vmx_preemption_timer_fn(&vmx->nested.preemption_timer);
- return;
- }
-
- preemption_timeout <<= VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE;
- preemption_timeout *= 1000000;
- do_div(preemption_timeout, vcpu->arch.virtual_tsc_khz);
- hrtimer_start(&vmx->nested.preemption_timer,
- ns_to_ktime(preemption_timeout), HRTIMER_MODE_REL);
-}
-
static int nested_vmx_check_msr_bitmap_controls(struct kvm_vcpu *vcpu,
struct vmcs12 *vmcs12)
{
@@ -9527,22 +7812,22 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu,
struct vmcs12 *vmcs12)
{
int msr;
- struct page *page;
- unsigned long *msr_bitmap_l1;
- unsigned long *msr_bitmap_l0 = to_vmx(vcpu)->nested.msr_bitmap;
+ PMDL kmap_mdl;
+ size_t *msr_bitmap_l1;
+ size_t *msr_bitmap_l0 = to_vmx(vcpu)->nested.msr_bitmap;
/* This shortcut is ok because we support only x2APIC MSRs so far. */
if (!nested_cpu_has_virt_x2apic_mode(vmcs12))
return false;
- page = nested_get_page(vcpu, vmcs12->msr_bitmap);
- if (!page) {
+ kmap_mdl = nested_get_page(vcpu, vmcs12->msr_bitmap);
+ if (!kmap_mdl) {
WARN_ON(1);
return false;
}
- msr_bitmap_l1 = (unsigned long *)kmap(page);
+ msr_bitmap_l1 = (size_t *)kmap(kmap_mdl);
if (!msr_bitmap_l1) {
- nested_release_page_clean(page);
+ nested_release_page(kmap_mdl);
WARN_ON(1);
return false;
}
@@ -9572,8 +7857,8 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu,
MSR_TYPE_W);
}
}
- kunmap(page);
- nested_release_page_clean(page);
+ kunmap(kmap_mdl);
+ nested_release_page(kmap_mdl);
return true;
}
@@ -9583,8 +7868,7 @@ static int nested_vmx_check_apicv_controls(struct kvm_vcpu *vcpu,
{
if (!nested_cpu_has_virt_x2apic_mode(vmcs12) &&
!nested_cpu_has_apic_reg_virt(vmcs12) &&
- !nested_cpu_has_vid(vmcs12) &&
- !nested_cpu_has_posted_intr(vmcs12))
+ !nested_cpu_has_vid(vmcs12))
return 0;
/*
@@ -9603,17 +7887,6 @@ static int nested_vmx_check_apicv_controls(struct kvm_vcpu *vcpu,
!nested_exit_on_intr(vcpu))
return -EINVAL;
- /*
- * bits 15:8 should be zero in posted_intr_nv,
- * the descriptor address has been already checked
- * in nested_get_vmcs12_pages.
- */
- if (nested_cpu_has_posted_intr(vmcs12) &&
- (!nested_cpu_has_vid(vmcs12) ||
- !nested_exit_intr_ack_set(vcpu) ||
- vmcs12->posted_intr_nv & 0xff00))
- return -EINVAL;
-
/* tpr shadow is needed by all apicv features. */
if (!nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW))
return -EINVAL;
@@ -9622,8 +7895,8 @@ static int nested_vmx_check_apicv_controls(struct kvm_vcpu *vcpu,
}
static int nested_vmx_check_msr_switch(struct kvm_vcpu *vcpu,
- unsigned long count_field,
- unsigned long addr_field)
+ size_t count_field,
+ size_t addr_field)
{
int maxphyaddr;
u64 count, addr;
@@ -9792,98 +8065,74 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
struct vcpu_vmx *vmx = to_vmx(vcpu);
u32 exec_control;
- vmcs_write16(GUEST_ES_SELECTOR, vmcs12->guest_es_selector);
- vmcs_write16(GUEST_CS_SELECTOR, vmcs12->guest_cs_selector);
- vmcs_write16(GUEST_SS_SELECTOR, vmcs12->guest_ss_selector);
- vmcs_write16(GUEST_DS_SELECTOR, vmcs12->guest_ds_selector);
- vmcs_write16(GUEST_FS_SELECTOR, vmcs12->guest_fs_selector);
- vmcs_write16(GUEST_GS_SELECTOR, vmcs12->guest_gs_selector);
- vmcs_write16(GUEST_LDTR_SELECTOR, vmcs12->guest_ldtr_selector);
- vmcs_write16(GUEST_TR_SELECTOR, vmcs12->guest_tr_selector);
- vmcs_write32(GUEST_ES_LIMIT, vmcs12->guest_es_limit);
- vmcs_write32(GUEST_CS_LIMIT, vmcs12->guest_cs_limit);
- vmcs_write32(GUEST_SS_LIMIT, vmcs12->guest_ss_limit);
- vmcs_write32(GUEST_DS_LIMIT, vmcs12->guest_ds_limit);
- vmcs_write32(GUEST_FS_LIMIT, vmcs12->guest_fs_limit);
- vmcs_write32(GUEST_GS_LIMIT, vmcs12->guest_gs_limit);
- vmcs_write32(GUEST_LDTR_LIMIT, vmcs12->guest_ldtr_limit);
- vmcs_write32(GUEST_TR_LIMIT, vmcs12->guest_tr_limit);
- vmcs_write32(GUEST_GDTR_LIMIT, vmcs12->guest_gdtr_limit);
- vmcs_write32(GUEST_IDTR_LIMIT, vmcs12->guest_idtr_limit);
- vmcs_write32(GUEST_ES_AR_BYTES, vmcs12->guest_es_ar_bytes);
- vmcs_write32(GUEST_CS_AR_BYTES, vmcs12->guest_cs_ar_bytes);
- vmcs_write32(GUEST_SS_AR_BYTES, vmcs12->guest_ss_ar_bytes);
- vmcs_write32(GUEST_DS_AR_BYTES, vmcs12->guest_ds_ar_bytes);
- vmcs_write32(GUEST_FS_AR_BYTES, vmcs12->guest_fs_ar_bytes);
- vmcs_write32(GUEST_GS_AR_BYTES, vmcs12->guest_gs_ar_bytes);
- vmcs_write32(GUEST_LDTR_AR_BYTES, vmcs12->guest_ldtr_ar_bytes);
- vmcs_write32(GUEST_TR_AR_BYTES, vmcs12->guest_tr_ar_bytes);
- vmcs_writel(GUEST_ES_BASE, vmcs12->guest_es_base);
- vmcs_writel(GUEST_CS_BASE, vmcs12->guest_cs_base);
- vmcs_writel(GUEST_SS_BASE, vmcs12->guest_ss_base);
- vmcs_writel(GUEST_DS_BASE, vmcs12->guest_ds_base);
- vmcs_writel(GUEST_FS_BASE, vmcs12->guest_fs_base);
- vmcs_writel(GUEST_GS_BASE, vmcs12->guest_gs_base);
- vmcs_writel(GUEST_LDTR_BASE, vmcs12->guest_ldtr_base);
- vmcs_writel(GUEST_TR_BASE, vmcs12->guest_tr_base);
- vmcs_writel(GUEST_GDTR_BASE, vmcs12->guest_gdtr_base);
- vmcs_writel(GUEST_IDTR_BASE, vmcs12->guest_idtr_base);
+ vmcs_write16(vcpu, GUEST_ES_SELECTOR, vmcs12->guest_es_selector);
+ vmcs_write16(vcpu, GUEST_CS_SELECTOR, vmcs12->guest_cs_selector);
+ vmcs_write16(vcpu, GUEST_SS_SELECTOR, vmcs12->guest_ss_selector);
+ vmcs_write16(vcpu, GUEST_DS_SELECTOR, vmcs12->guest_ds_selector);
+ vmcs_write16(vcpu, GUEST_FS_SELECTOR, vmcs12->guest_fs_selector);
+ vmcs_write16(vcpu, GUEST_GS_SELECTOR, vmcs12->guest_gs_selector);
+ vmcs_write16(vcpu, GUEST_LDTR_SELECTOR, vmcs12->guest_ldtr_selector);
+ vmcs_write16(vcpu, GUEST_TR_SELECTOR, vmcs12->guest_tr_selector);
+ vmcs_write32(vcpu, GUEST_ES_LIMIT, vmcs12->guest_es_limit);
+ vmcs_write32(vcpu, GUEST_CS_LIMIT, vmcs12->guest_cs_limit);
+ vmcs_write32(vcpu, GUEST_SS_LIMIT, vmcs12->guest_ss_limit);
+ vmcs_write32(vcpu, GUEST_DS_LIMIT, vmcs12->guest_ds_limit);
+ vmcs_write32(vcpu, GUEST_FS_LIMIT, vmcs12->guest_fs_limit);
+ vmcs_write32(vcpu, GUEST_GS_LIMIT, vmcs12->guest_gs_limit);
+ vmcs_write32(vcpu, GUEST_LDTR_LIMIT, vmcs12->guest_ldtr_limit);
+ vmcs_write32(vcpu, GUEST_TR_LIMIT, vmcs12->guest_tr_limit);
+ vmcs_write32(vcpu, GUEST_GDTR_LIMIT, vmcs12->guest_gdtr_limit);
+ vmcs_write32(vcpu, GUEST_IDTR_LIMIT, vmcs12->guest_idtr_limit);
+ vmcs_write32(vcpu, GUEST_ES_AR_BYTES, vmcs12->guest_es_ar_bytes);
+ vmcs_write32(vcpu, GUEST_CS_AR_BYTES, vmcs12->guest_cs_ar_bytes);
+ vmcs_write32(vcpu, GUEST_SS_AR_BYTES, vmcs12->guest_ss_ar_bytes);
+ vmcs_write32(vcpu, GUEST_DS_AR_BYTES, vmcs12->guest_ds_ar_bytes);
+ vmcs_write32(vcpu, GUEST_FS_AR_BYTES, vmcs12->guest_fs_ar_bytes);
+ vmcs_write32(vcpu, GUEST_GS_AR_BYTES, vmcs12->guest_gs_ar_bytes);
+ vmcs_write32(vcpu, GUEST_LDTR_AR_BYTES, vmcs12->guest_ldtr_ar_bytes);
+ vmcs_write32(vcpu, GUEST_TR_AR_BYTES, vmcs12->guest_tr_ar_bytes);
+ vmcs_writel(vcpu, GUEST_ES_BASE, vmcs12->guest_es_base);
+ vmcs_writel(vcpu, GUEST_CS_BASE, vmcs12->guest_cs_base);
+ vmcs_writel(vcpu, GUEST_SS_BASE, vmcs12->guest_ss_base);
+ vmcs_writel(vcpu, GUEST_DS_BASE, vmcs12->guest_ds_base);
+ vmcs_writel(vcpu, GUEST_FS_BASE, vmcs12->guest_fs_base);
+ vmcs_writel(vcpu, GUEST_GS_BASE, vmcs12->guest_gs_base);
+ vmcs_writel(vcpu, GUEST_LDTR_BASE, vmcs12->guest_ldtr_base);
+ vmcs_writel(vcpu, GUEST_TR_BASE, vmcs12->guest_tr_base);
+ vmcs_writel(vcpu, GUEST_GDTR_BASE, vmcs12->guest_gdtr_base);
+ vmcs_writel(vcpu, GUEST_IDTR_BASE, vmcs12->guest_idtr_base);
if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS) {
kvm_set_dr(vcpu, 7, vmcs12->guest_dr7);
- vmcs_write64(GUEST_IA32_DEBUGCTL, vmcs12->guest_ia32_debugctl);
+ vmcs_write64(vcpu, GUEST_IA32_DEBUGCTL, vmcs12->guest_ia32_debugctl);
} else {
kvm_set_dr(vcpu, 7, vcpu->arch.dr7);
- vmcs_write64(GUEST_IA32_DEBUGCTL, vmx->nested.vmcs01_debugctl);
+ vmcs_write64(vcpu, GUEST_IA32_DEBUGCTL, vmx->nested.vmcs01_debugctl);
}
- vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
+ vmcs_write32(vcpu, VM_ENTRY_INTR_INFO_FIELD,
vmcs12->vm_entry_intr_info_field);
- vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE,
+ vmcs_write32(vcpu, VM_ENTRY_EXCEPTION_ERROR_CODE,
vmcs12->vm_entry_exception_error_code);
- vmcs_write32(VM_ENTRY_INSTRUCTION_LEN,
+ vmcs_write32(vcpu, VM_ENTRY_INSTRUCTION_LEN,
vmcs12->vm_entry_instruction_len);
- vmcs_write32(GUEST_INTERRUPTIBILITY_INFO,
+ vmcs_write32(vcpu, GUEST_INTERRUPTIBILITY_INFO,
vmcs12->guest_interruptibility_info);
- vmcs_write32(GUEST_SYSENTER_CS, vmcs12->guest_sysenter_cs);
+ vmcs_write32(vcpu, GUEST_SYSENTER_CS, vmcs12->guest_sysenter_cs);
vmx_set_rflags(vcpu, vmcs12->guest_rflags);
- vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS,
+ vmcs_writel(vcpu, GUEST_PENDING_DBG_EXCEPTIONS,
vmcs12->guest_pending_dbg_exceptions);
- vmcs_writel(GUEST_SYSENTER_ESP, vmcs12->guest_sysenter_esp);
- vmcs_writel(GUEST_SYSENTER_EIP, vmcs12->guest_sysenter_eip);
+ vmcs_writel(vcpu, GUEST_SYSENTER_ESP, vmcs12->guest_sysenter_esp);
+ vmcs_writel(vcpu, GUEST_SYSENTER_EIP, vmcs12->guest_sysenter_eip);
if (nested_cpu_has_xsaves(vmcs12))
- vmcs_write64(XSS_EXIT_BITMAP, vmcs12->xss_exit_bitmap);
- vmcs_write64(VMCS_LINK_POINTER, -1ull);
+ vmcs_write64(vcpu, XSS_EXIT_BITMAP, vmcs12->xss_exit_bitmap);
+ vmcs_write64(vcpu, VMCS_LINK_POINTER, -1ull);
exec_control = vmcs12->pin_based_vm_exec_control;
- /* Preemption timer setting is only taken from vmcs01. */
- exec_control &= ~PIN_BASED_VMX_PREEMPTION_TIMER;
exec_control |= vmcs_config.pin_based_exec_ctrl;
- if (vmx->hv_deadline_tsc == -1)
- exec_control &= ~PIN_BASED_VMX_PREEMPTION_TIMER;
-
- /* Posted interrupts setting is only taken from vmcs12. */
- if (nested_cpu_has_posted_intr(vmcs12)) {
- /*
- * Note that we use L0's vector here and in
- * vmx_deliver_nested_posted_interrupt.
- */
- vmx->nested.posted_intr_nv = vmcs12->posted_intr_nv;
- vmx->nested.pi_pending = false;
- vmcs_write16(POSTED_INTR_NV, POSTED_INTR_VECTOR);
- vmcs_write64(POSTED_INTR_DESC_ADDR,
- page_to_phys(vmx->nested.pi_desc_page) +
- (unsigned long)(vmcs12->posted_intr_desc_addr &
- (PAGE_SIZE - 1)));
- } else
- exec_control &= ~PIN_BASED_POSTED_INTR;
-
- vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, exec_control);
- vmx->nested.preemption_timer_expired = false;
- if (nested_cpu_has_preemption_timer(vmcs12))
- vmx_start_preemption_timer(vcpu);
+ vmcs_write32(vcpu, PIN_BASED_VM_EXEC_CONTROL, exec_control);
/*
* Whether page-faults are trapped is determined by a combination of
@@ -9905,9 +8154,9 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
* To fix this, we will need to emulate the PFEC checking (on the L1
* page tables), using walk_addr(), when injecting PFs to L1.
*/
- vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK,
+ vmcs_write32(vcpu, PAGE_FAULT_ERROR_CODE_MASK,
enable_ept ? vmcs12->page_fault_error_code_mask : 0);
- vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH,
+ vmcs_write32(vcpu, PAGE_FAULT_ERROR_CODE_MATCH,
enable_ept ? vmcs12->page_fault_error_code_match : 0);
if (cpu_has_secondary_exec_ctrls()) {
@@ -9929,12 +8178,12 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
* can never be accessed, this feature won't do
* anything anyway.
*/
- if (!vmx->nested.apic_access_page)
+ if (!vmx->nested.apic_access_mdl)
exec_control &=
~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
else
- vmcs_write64(APIC_ACCESS_ADDR,
- page_to_phys(vmx->nested.apic_access_page));
+ vmcs_write64(vcpu, APIC_ACCESS_ADDR,
+ mdl_to_phys(vmx->nested.apic_access_mdl));
} else if (!(nested_cpu_has_virt_x2apic_mode(vmcs12)) &&
cpu_need_virtualize_apic_accesses(&vmx->vcpu)) {
exec_control |=
@@ -9943,19 +8192,19 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
}
if (exec_control & SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY) {
- vmcs_write64(EOI_EXIT_BITMAP0,
+ vmcs_write64(vcpu, EOI_EXIT_BITMAP0,
vmcs12->eoi_exit_bitmap0);
- vmcs_write64(EOI_EXIT_BITMAP1,
+ vmcs_write64(vcpu, EOI_EXIT_BITMAP1,
vmcs12->eoi_exit_bitmap1);
- vmcs_write64(EOI_EXIT_BITMAP2,
+ vmcs_write64(vcpu, EOI_EXIT_BITMAP2,
vmcs12->eoi_exit_bitmap2);
- vmcs_write64(EOI_EXIT_BITMAP3,
+ vmcs_write64(vcpu, EOI_EXIT_BITMAP3,
vmcs12->eoi_exit_bitmap3);
- vmcs_write16(GUEST_INTR_STATUS,
+ vmcs_write16(vcpu, GUEST_INTR_STATUS,
vmcs12->guest_intr_status);
}
- vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control);
+ vmcs_write32(vcpu, SECONDARY_VM_EXEC_CONTROL, exec_control);
}
@@ -9983,9 +8232,9 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
exec_control |= vmcs12->cpu_based_vm_exec_control;
if (exec_control & CPU_BASED_TPR_SHADOW) {
- vmcs_write64(VIRTUAL_APIC_PAGE_ADDR,
- page_to_phys(vmx->nested.virtual_apic_page));
- vmcs_write32(TPR_THRESHOLD, vmcs12->tpr_threshold);
+ vmcs_write64(vcpu, VIRTUAL_APIC_PAGE_ADDR,
+ mdl_to_phys(vmx->nested.virtual_apic_mdl));
+ vmcs_write32(vcpu, TPR_THRESHOLD, vmcs12->tpr_threshold);
}
if (cpu_has_vmx_msr_bitmap() &&
@@ -10002,7 +8251,7 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
exec_control &= ~CPU_BASED_USE_IO_BITMAPS;
exec_control |= CPU_BASED_UNCOND_IO_EXITING;
- vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, exec_control);
+ vmcs_write32(vcpu, CPU_BASED_VM_EXEC_CONTROL, exec_control);
/* EXCEPTION_BITMAP and CR0_GUEST_HOST_MASK should basically be the
* bitwise-or of what L1 wants to trap for L2, and what we want to
@@ -10010,13 +8259,13 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
*/
update_exception_bitmap(vcpu);
vcpu->arch.cr0_guest_owned_bits &= ~vmcs12->cr0_guest_host_mask;
- vmcs_writel(CR0_GUEST_HOST_MASK, ~vcpu->arch.cr0_guest_owned_bits);
+ vmcs_writel(vcpu, CR0_GUEST_HOST_MASK, ~vcpu->arch.cr0_guest_owned_bits);
/* L2->L1 exit controls are emulated - the hardware exit is to L0 so
* we should use its exit controls. Note that VM_EXIT_LOAD_IA32_EFER
* bits are further modified by vmx_set_efer() below.
*/
- vmcs_write32(VM_EXIT_CONTROLS, vmcs_config.vmexit_ctrl);
+ vmcs_write32(vcpu, VM_EXIT_CONTROLS, vmcs_config.vmexit_ctrl);
/* vmcs12's VM_ENTRY_LOAD_IA32_EFER and VM_ENTRY_IA32E_MODE are
* emulated by vmx_set_efer(), below.
@@ -10027,24 +8276,22 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
(vmcs_config.vmentry_ctrl & ~VM_ENTRY_IA32E_MODE));
if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PAT) {
- vmcs_write64(GUEST_IA32_PAT, vmcs12->guest_ia32_pat);
+ vmcs_write64(vcpu, GUEST_IA32_PAT, vmcs12->guest_ia32_pat);
vcpu->arch.pat = vmcs12->guest_ia32_pat;
} else if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT)
- vmcs_write64(GUEST_IA32_PAT, vmx->vcpu.arch.pat);
+ vmcs_write64(vcpu, GUEST_IA32_PAT, vmx->vcpu.arch.pat);
set_cr4_guest_host_mask(vmx);
if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS)
- vmcs_write64(GUEST_BNDCFGS, vmcs12->guest_bndcfgs);
+ vmcs_write64(vcpu, GUEST_BNDCFGS, vmcs12->guest_bndcfgs);
if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETING)
- vmcs_write64(TSC_OFFSET,
+ vmcs_write64(vcpu, TSC_OFFSET,
vcpu->arch.tsc_offset + vmcs12->tsc_offset);
else
- vmcs_write64(TSC_OFFSET, vcpu->arch.tsc_offset);
- if (kvm_has_tsc_control)
- decache_tsc_multiplier(vmx);
+ vmcs_write64(vcpu, TSC_OFFSET, vcpu->arch.tsc_offset);
if (enable_vpid) {
/*
@@ -10056,13 +8303,13 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
* even if spawn a lot of nested vCPUs.
*/
if (nested_cpu_has_vpid(vmcs12) && vmx->nested.vpid02) {
- vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->nested.vpid02);
+ vmcs_write16(vcpu, VIRTUAL_PROCESSOR_ID, vmx->nested.vpid02);
if (vmcs12->virtual_processor_id != vmx->nested.last_vpid) {
vmx->nested.last_vpid = vmcs12->virtual_processor_id;
__vmx_flush_tlb(vcpu, to_vmx(vcpu)->nested.vpid02);
}
} else {
- vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid);
+ vmcs_write16(vcpu, VIRTUAL_PROCESSOR_ID, vmx->vpid);
vmx_flush_tlb(vcpu);
}
@@ -10091,10 +8338,10 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
* have more bits than L1 expected.
*/
vmx_set_cr0(vcpu, vmcs12->guest_cr0);
- vmcs_writel(CR0_READ_SHADOW, nested_read_cr0(vmcs12));
+ vmcs_writel(vcpu, CR0_READ_SHADOW, nested_read_cr0(vmcs12));
vmx_set_cr4(vcpu, vmcs12->guest_cr4);
- vmcs_writel(CR4_READ_SHADOW, nested_read_cr4(vmcs12));
+ vmcs_writel(vcpu, CR4_READ_SHADOW, nested_read_cr4(vmcs12));
/* shadow page tables on either EPT or shadow page tables */
kvm_set_cr3(vcpu, vmcs12->guest_cr3);
@@ -10107,10 +8354,10 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
* L1 may access the L2's PDPTR, so save them to construct vmcs12
*/
if (enable_ept) {
- vmcs_write64(GUEST_PDPTR0, vmcs12->guest_pdptr0);
- vmcs_write64(GUEST_PDPTR1, vmcs12->guest_pdptr1);
- vmcs_write64(GUEST_PDPTR2, vmcs12->guest_pdptr2);
- vmcs_write64(GUEST_PDPTR3, vmcs12->guest_pdptr3);
+ vmcs_write64(vcpu, GUEST_PDPTR0, vmcs12->guest_pdptr0);
+ vmcs_write64(vcpu, GUEST_PDPTR1, vmcs12->guest_pdptr1);
+ vmcs_write64(vcpu, GUEST_PDPTR2, vmcs12->guest_pdptr2);
+ vmcs_write64(vcpu, GUEST_PDPTR3, vmcs12->guest_pdptr3);
}
kvm_register_write(vcpu, VCPU_REGS_RSP, vmcs12->guest_rsp);
@@ -10273,7 +8520,7 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
enter_guest_mode(vcpu);
if (!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS))
- vmx->nested.vmcs01_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL);
+ vmx->nested.vmcs01_debugctl = vmcs_read64(vcpu, GUEST_IA32_DEBUGCTL);
cpu = get_cpu();
vmx->loaded_vmcs = vmcs02;
@@ -10330,23 +8577,23 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
* didn't necessarily allow them to be changed in GUEST_CR0 - and rather
* put them in vmcs02 CR0_READ_SHADOW. So take these bits from there.
*/
-static inline unsigned long
+static inline size_t
vmcs12_guest_cr0(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
{
return
- /*1*/ (vmcs_readl(GUEST_CR0) & vcpu->arch.cr0_guest_owned_bits) |
+ /*1*/ (vmcs_readl(vcpu, GUEST_CR0) & vcpu->arch.cr0_guest_owned_bits) |
/*2*/ (vmcs12->guest_cr0 & vmcs12->cr0_guest_host_mask) |
- /*3*/ (vmcs_readl(CR0_READ_SHADOW) & ~(vmcs12->cr0_guest_host_mask |
+ /*3*/ (vmcs_readl(vcpu, CR0_READ_SHADOW) & ~(vmcs12->cr0_guest_host_mask |
vcpu->arch.cr0_guest_owned_bits));
}
-static inline unsigned long
+static inline size_t
vmcs12_guest_cr4(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
{
return
- /*1*/ (vmcs_readl(GUEST_CR4) & vcpu->arch.cr4_guest_owned_bits) |
+ /*1*/ (vmcs_readl(vcpu, GUEST_CR4) & vcpu->arch.cr4_guest_owned_bits) |
/*2*/ (vmcs12->guest_cr4 & vmcs12->cr4_guest_host_mask) |
- /*3*/ (vmcs_readl(CR4_READ_SHADOW) & ~(vmcs12->cr4_guest_host_mask |
+ /*3*/ (vmcs_readl(vcpu, CR4_READ_SHADOW) & ~(vmcs12->cr4_guest_host_mask |
vcpu->arch.cr4_guest_owned_bits));
}
@@ -10396,14 +8643,6 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
- if (nested_cpu_has_preemption_timer(get_vmcs12(vcpu)) &&
- vmx->nested.preemption_timer_expired) {
- if (vmx->nested.nested_run_pending)
- return -EBUSY;
- nested_vmx_vmexit(vcpu, EXIT_REASON_PREEMPTION_TIMER, 0, 0);
- return 0;
- }
-
if (vcpu->arch.nmi_pending && nested_exit_on_nmi(vcpu)) {
if (vmx->nested.nested_run_pending ||
vcpu->arch.interrupt.pending)
@@ -10428,21 +8667,7 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr)
return 0;
}
- return vmx_complete_nested_posted_interrupt(vcpu);
-}
-
-static u32 vmx_get_preemption_timer_value(struct kvm_vcpu *vcpu)
-{
- ktime_t remaining =
- hrtimer_get_remaining(&to_vmx(vcpu)->nested.preemption_timer);
- u64 value;
-
- if (ktime_to_ns(remaining) <= 0)
- return 0;
-
- value = ktime_to_ns(remaining) * vcpu->arch.virtual_tsc_khz;
- do_div(value, 1000000);
- return value >> VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE;
+ return 0;
}
/*
@@ -10458,7 +8683,7 @@ static u32 vmx_get_preemption_timer_value(struct kvm_vcpu *vcpu)
*/
static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
u32 exit_reason, u32 exit_intr_info,
- unsigned long exit_qualification)
+ size_t exit_qualification)
{
/* update guest state fields: */
vmcs12->guest_cr0 = vmcs12_guest_cr0(vcpu, vmcs12);
@@ -10466,62 +8691,54 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
vmcs12->guest_rsp = kvm_register_read(vcpu, VCPU_REGS_RSP);
vmcs12->guest_rip = kvm_register_read(vcpu, VCPU_REGS_RIP);
- vmcs12->guest_rflags = vmcs_readl(GUEST_RFLAGS);
-
- vmcs12->guest_es_selector = vmcs_read16(GUEST_ES_SELECTOR);
- vmcs12->guest_cs_selector = vmcs_read16(GUEST_CS_SELECTOR);
- vmcs12->guest_ss_selector = vmcs_read16(GUEST_SS_SELECTOR);
- vmcs12->guest_ds_selector = vmcs_read16(GUEST_DS_SELECTOR);
- vmcs12->guest_fs_selector = vmcs_read16(GUEST_FS_SELECTOR);
- vmcs12->guest_gs_selector = vmcs_read16(GUEST_GS_SELECTOR);
- vmcs12->guest_ldtr_selector = vmcs_read16(GUEST_LDTR_SELECTOR);
- vmcs12->guest_tr_selector = vmcs_read16(GUEST_TR_SELECTOR);
- vmcs12->guest_es_limit = vmcs_read32(GUEST_ES_LIMIT);
- vmcs12->guest_cs_limit = vmcs_read32(GUEST_CS_LIMIT);
- vmcs12->guest_ss_limit = vmcs_read32(GUEST_SS_LIMIT);
- vmcs12->guest_ds_limit = vmcs_read32(GUEST_DS_LIMIT);
- vmcs12->guest_fs_limit = vmcs_read32(GUEST_FS_LIMIT);
- vmcs12->guest_gs_limit = vmcs_read32(GUEST_GS_LIMIT);
- vmcs12->guest_ldtr_limit = vmcs_read32(GUEST_LDTR_LIMIT);
- vmcs12->guest_tr_limit = vmcs_read32(GUEST_TR_LIMIT);
- vmcs12->guest_gdtr_limit = vmcs_read32(GUEST_GDTR_LIMIT);
- vmcs12->guest_idtr_limit = vmcs_read32(GUEST_IDTR_LIMIT);
- vmcs12->guest_es_ar_bytes = vmcs_read32(GUEST_ES_AR_BYTES);
- vmcs12->guest_cs_ar_bytes = vmcs_read32(GUEST_CS_AR_BYTES);
- vmcs12->guest_ss_ar_bytes = vmcs_read32(GUEST_SS_AR_BYTES);
- vmcs12->guest_ds_ar_bytes = vmcs_read32(GUEST_DS_AR_BYTES);
- vmcs12->guest_fs_ar_bytes = vmcs_read32(GUEST_FS_AR_BYTES);
- vmcs12->guest_gs_ar_bytes = vmcs_read32(GUEST_GS_AR_BYTES);
- vmcs12->guest_ldtr_ar_bytes = vmcs_read32(GUEST_LDTR_AR_BYTES);
- vmcs12->guest_tr_ar_bytes = vmcs_read32(GUEST_TR_AR_BYTES);
- vmcs12->guest_es_base = vmcs_readl(GUEST_ES_BASE);
- vmcs12->guest_cs_base = vmcs_readl(GUEST_CS_BASE);
- vmcs12->guest_ss_base = vmcs_readl(GUEST_SS_BASE);
- vmcs12->guest_ds_base = vmcs_readl(GUEST_DS_BASE);
- vmcs12->guest_fs_base = vmcs_readl(GUEST_FS_BASE);
- vmcs12->guest_gs_base = vmcs_readl(GUEST_GS_BASE);
- vmcs12->guest_ldtr_base = vmcs_readl(GUEST_LDTR_BASE);
- vmcs12->guest_tr_base = vmcs_readl(GUEST_TR_BASE);
- vmcs12->guest_gdtr_base = vmcs_readl(GUEST_GDTR_BASE);
- vmcs12->guest_idtr_base = vmcs_readl(GUEST_IDTR_BASE);
+ vmcs12->guest_rflags = vmcs_readl(vcpu, GUEST_RFLAGS);
+
+ vmcs12->guest_es_selector = vmcs_read16(vcpu, GUEST_ES_SELECTOR);
+ vmcs12->guest_cs_selector = vmcs_read16(vcpu, GUEST_CS_SELECTOR);
+ vmcs12->guest_ss_selector = vmcs_read16(vcpu, GUEST_SS_SELECTOR);
+ vmcs12->guest_ds_selector = vmcs_read16(vcpu, GUEST_DS_SELECTOR);
+ vmcs12->guest_fs_selector = vmcs_read16(vcpu, GUEST_FS_SELECTOR);
+ vmcs12->guest_gs_selector = vmcs_read16(vcpu, GUEST_GS_SELECTOR);
+ vmcs12->guest_ldtr_selector = vmcs_read16(vcpu, GUEST_LDTR_SELECTOR);
+ vmcs12->guest_tr_selector = vmcs_read16(vcpu, GUEST_TR_SELECTOR);
+ vmcs12->guest_es_limit = vmcs_read32(vcpu, GUEST_ES_LIMIT);
+ vmcs12->guest_cs_limit = vmcs_read32(vcpu, GUEST_CS_LIMIT);
+ vmcs12->guest_ss_limit = vmcs_read32(vcpu, GUEST_SS_LIMIT);
+ vmcs12->guest_ds_limit = vmcs_read32(vcpu, GUEST_DS_LIMIT);
+ vmcs12->guest_fs_limit = vmcs_read32(vcpu, GUEST_FS_LIMIT);
+ vmcs12->guest_gs_limit = vmcs_read32(vcpu, GUEST_GS_LIMIT);
+ vmcs12->guest_ldtr_limit = vmcs_read32(vcpu, GUEST_LDTR_LIMIT);
+ vmcs12->guest_tr_limit = vmcs_read32(vcpu, GUEST_TR_LIMIT);
+ vmcs12->guest_gdtr_limit = vmcs_read32(vcpu, GUEST_GDTR_LIMIT);
+ vmcs12->guest_idtr_limit = vmcs_read32(vcpu, GUEST_IDTR_LIMIT);
+ vmcs12->guest_es_ar_bytes = vmcs_read32(vcpu, GUEST_ES_AR_BYTES);
+ vmcs12->guest_cs_ar_bytes = vmcs_read32(vcpu, GUEST_CS_AR_BYTES);
+ vmcs12->guest_ss_ar_bytes = vmcs_read32(vcpu, GUEST_SS_AR_BYTES);
+ vmcs12->guest_ds_ar_bytes = vmcs_read32(vcpu, GUEST_DS_AR_BYTES);
+ vmcs12->guest_fs_ar_bytes = vmcs_read32(vcpu, GUEST_FS_AR_BYTES);
+ vmcs12->guest_gs_ar_bytes = vmcs_read32(vcpu, GUEST_GS_AR_BYTES);
+ vmcs12->guest_ldtr_ar_bytes = vmcs_read32(vcpu, GUEST_LDTR_AR_BYTES);
+ vmcs12->guest_tr_ar_bytes = vmcs_read32(vcpu, GUEST_TR_AR_BYTES);
+ vmcs12->guest_es_base = vmcs_readl(vcpu, GUEST_ES_BASE);
+ vmcs12->guest_cs_base = vmcs_readl(vcpu, GUEST_CS_BASE);
+ vmcs12->guest_ss_base = vmcs_readl(vcpu, GUEST_SS_BASE);
+ vmcs12->guest_ds_base = vmcs_readl(vcpu, GUEST_DS_BASE);
+ vmcs12->guest_fs_base = vmcs_readl(vcpu, GUEST_FS_BASE);
+ vmcs12->guest_gs_base = vmcs_readl(vcpu, GUEST_GS_BASE);
+ vmcs12->guest_ldtr_base = vmcs_readl(vcpu, GUEST_LDTR_BASE);
+ vmcs12->guest_tr_base = vmcs_readl(vcpu, GUEST_TR_BASE);
+ vmcs12->guest_gdtr_base = vmcs_readl(vcpu, GUEST_GDTR_BASE);
+ vmcs12->guest_idtr_base = vmcs_readl(vcpu, GUEST_IDTR_BASE);
vmcs12->guest_interruptibility_info =
- vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
+ vmcs_read32(vcpu, GUEST_INTERRUPTIBILITY_INFO);
vmcs12->guest_pending_dbg_exceptions =
- vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS);
- if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED)
+ vmcs_readl(vcpu, GUEST_PENDING_DBG_EXCEPTIONS);
+ if (vcpu->arch.mp_state == GVM_MP_STATE_HALTED)
vmcs12->guest_activity_state = GUEST_ACTIVITY_HLT;
else
vmcs12->guest_activity_state = GUEST_ACTIVITY_ACTIVE;
- if (nested_cpu_has_preemption_timer(vmcs12)) {
- if (vmcs12->vm_exit_controls &
- VM_EXIT_SAVE_VMX_PREEMPTION_TIMER)
- vmcs12->vmx_preemption_timer_value =
- vmx_get_preemption_timer_value(vcpu);
- hrtimer_cancel(&to_vmx(vcpu)->nested.preemption_timer);
- }
-
/*
* In some cases (usually, nested EPT), L2 is allowed to change its
* own CR3 without exiting. If it has changed it, we must keep it.
@@ -10531,41 +8748,41 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
* Additionally, restore L2's PDPTR to vmcs12.
*/
if (enable_ept) {
- vmcs12->guest_cr3 = vmcs_readl(GUEST_CR3);
- vmcs12->guest_pdptr0 = vmcs_read64(GUEST_PDPTR0);
- vmcs12->guest_pdptr1 = vmcs_read64(GUEST_PDPTR1);
- vmcs12->guest_pdptr2 = vmcs_read64(GUEST_PDPTR2);
- vmcs12->guest_pdptr3 = vmcs_read64(GUEST_PDPTR3);
+ vmcs12->guest_cr3 = vmcs_readl(vcpu, GUEST_CR3);
+ vmcs12->guest_pdptr0 = vmcs_read64(vcpu, GUEST_PDPTR0);
+ vmcs12->guest_pdptr1 = vmcs_read64(vcpu, GUEST_PDPTR1);
+ vmcs12->guest_pdptr2 = vmcs_read64(vcpu, GUEST_PDPTR2);
+ vmcs12->guest_pdptr3 = vmcs_read64(vcpu, GUEST_PDPTR3);
}
if (nested_cpu_has_ept(vmcs12))
- vmcs12->guest_linear_address = vmcs_readl(GUEST_LINEAR_ADDRESS);
+ vmcs12->guest_linear_address = vmcs_readl(vcpu, GUEST_LINEAR_ADDRESS);
if (nested_cpu_has_vid(vmcs12))
- vmcs12->guest_intr_status = vmcs_read16(GUEST_INTR_STATUS);
+ vmcs12->guest_intr_status = vmcs_read16(vcpu, GUEST_INTR_STATUS);
vmcs12->vm_entry_controls =
(vmcs12->vm_entry_controls & ~VM_ENTRY_IA32E_MODE) |
(vm_entry_controls_get(to_vmx(vcpu)) & VM_ENTRY_IA32E_MODE);
if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_DEBUG_CONTROLS) {
- kvm_get_dr(vcpu, 7, (unsigned long *)&vmcs12->guest_dr7);
- vmcs12->guest_ia32_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL);
+ kvm_get_dr(vcpu, 7, (size_t *)&vmcs12->guest_dr7);
+ vmcs12->guest_ia32_debugctl = vmcs_read64(vcpu, GUEST_IA32_DEBUGCTL);
}
/* TODO: These cannot have changed unless we have MSR bitmaps and
* the relevant bit asks not to trap the change */
if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_IA32_PAT)
- vmcs12->guest_ia32_pat = vmcs_read64(GUEST_IA32_PAT);
+ vmcs12->guest_ia32_pat = vmcs_read64(vcpu, GUEST_IA32_PAT);
if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_IA32_EFER)
vmcs12->guest_ia32_efer = vcpu->arch.efer;
- vmcs12->guest_sysenter_cs = vmcs_read32(GUEST_SYSENTER_CS);
- vmcs12->guest_sysenter_esp = vmcs_readl(GUEST_SYSENTER_ESP);
- vmcs12->guest_sysenter_eip = vmcs_readl(GUEST_SYSENTER_EIP);
+ vmcs12->guest_sysenter_cs = vmcs_read32(vcpu, GUEST_SYSENTER_CS);
+ vmcs12->guest_sysenter_esp = vmcs_readl(vcpu, GUEST_SYSENTER_ESP);
+ vmcs12->guest_sysenter_eip = vmcs_readl(vcpu, GUEST_SYSENTER_EIP);
if (kvm_mpx_supported())
- vmcs12->guest_bndcfgs = vmcs_read64(GUEST_BNDCFGS);
+ vmcs12->guest_bndcfgs = vmcs_read64(vcpu, GUEST_BNDCFGS);
if (nested_cpu_has_xsaves(vmcs12))
- vmcs12->xss_exit_bitmap = vmcs_read64(XSS_EXIT_BITMAP);
+ vmcs12->xss_exit_bitmap = vmcs_read64(vcpu, XSS_EXIT_BITMAP);
/* update exit information fields: */
@@ -10577,10 +8794,10 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
(INTR_INFO_VALID_MASK | INTR_INFO_DELIVER_CODE_MASK)) ==
(INTR_INFO_VALID_MASK | INTR_INFO_DELIVER_CODE_MASK))
vmcs12->vm_exit_intr_error_code =
- vmcs_read32(VM_EXIT_INTR_ERROR_CODE);
+ vmcs_read32(vcpu, VM_EXIT_INTR_ERROR_CODE);
vmcs12->idt_vectoring_info_field = 0;
- vmcs12->vm_exit_instruction_len = vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
- vmcs12->vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
+ vmcs12->vm_exit_instruction_len = vmcs_read32(vcpu, VM_EXIT_INSTRUCTION_LEN);
+ vmcs12->vmx_instruction_info = vmcs_read32(vcpu, VMX_INSTRUCTION_INFO);
if (!(vmcs12->vm_exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY)) {
/* vm_entry_intr_info_field is cleared on exit. Emulate this
@@ -10641,14 +8858,14 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
* but we also need to update cr0_guest_host_mask and exception_bitmap.
*/
update_exception_bitmap(vcpu);
- vcpu->arch.cr0_guest_owned_bits = (vcpu->fpu_active ? X86_CR0_TS : 0);
- vmcs_writel(CR0_GUEST_HOST_MASK, ~vcpu->arch.cr0_guest_owned_bits);
+ vcpu->arch.cr0_guest_owned_bits = X86_CR0_TS;
+ vmcs_writel(vcpu, CR0_GUEST_HOST_MASK, ~vcpu->arch.cr0_guest_owned_bits);
/*
* Note that CR4_GUEST_HOST_MASK is already set in the original vmcs01
- * (KVM doesn't change it)- no reason to call set_cr4_guest_host_mask();
+ * (kvm doesn't change it)- no reason to call set_cr4_guest_host_mask();
*/
- vcpu->arch.cr4_guest_owned_bits = ~vmcs_readl(CR4_GUEST_HOST_MASK);
+ vcpu->arch.cr4_guest_owned_bits = ~vmcs_readl(vcpu, CR4_GUEST_HOST_MASK);
kvm_set_cr4(vcpu, vmcs12->host_cr4);
nested_ept_uninit_mmu_context(vcpu);
@@ -10669,22 +8886,22 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
}
- vmcs_write32(GUEST_SYSENTER_CS, vmcs12->host_ia32_sysenter_cs);
- vmcs_writel(GUEST_SYSENTER_ESP, vmcs12->host_ia32_sysenter_esp);
- vmcs_writel(GUEST_SYSENTER_EIP, vmcs12->host_ia32_sysenter_eip);
- vmcs_writel(GUEST_IDTR_BASE, vmcs12->host_idtr_base);
- vmcs_writel(GUEST_GDTR_BASE, vmcs12->host_gdtr_base);
+ vmcs_write32(vcpu, GUEST_SYSENTER_CS, vmcs12->host_ia32_sysenter_cs);
+ vmcs_writel(vcpu, GUEST_SYSENTER_ESP, vmcs12->host_ia32_sysenter_esp);
+ vmcs_writel(vcpu, GUEST_SYSENTER_EIP, vmcs12->host_ia32_sysenter_eip);
+ vmcs_writel(vcpu, GUEST_IDTR_BASE, vmcs12->host_idtr_base);
+ vmcs_writel(vcpu, GUEST_GDTR_BASE, vmcs12->host_gdtr_base);
/* If not VM_EXIT_CLEAR_BNDCFGS, the L2 value propagates to L1. */
if (vmcs12->vm_exit_controls & VM_EXIT_CLEAR_BNDCFGS)
- vmcs_write64(GUEST_BNDCFGS, 0);
+ vmcs_write64(vcpu, GUEST_BNDCFGS, 0);
if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PAT) {
- vmcs_write64(GUEST_IA32_PAT, vmcs12->host_ia32_pat);
+ vmcs_write64(vcpu, GUEST_IA32_PAT, vmcs12->host_ia32_pat);
vcpu->arch.pat = vmcs12->host_ia32_pat;
}
if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL)
- vmcs_write64(GUEST_IA32_PERF_GLOBAL_CTRL,
+ vmcs_write64(vcpu, GUEST_IA32_PERF_GLOBAL_CTRL,
vmcs12->host_ia32_perf_global_ctrl);
/* Set L1 segment info according to Intel SDM
@@ -10734,7 +8951,7 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
vmx_set_segment(vcpu, &seg, VCPU_SREG_TR);
kvm_set_dr(vcpu, 7, 0x400);
- vmcs_write64(GUEST_IA32_DEBUGCTL, 0);
+ vmcs_write64(vcpu, GUEST_IA32_DEBUGCTL, 0);
if (cpu_has_vmx_msr_bitmap())
vmx_set_msr_bitmap(vcpu);
@@ -10751,7 +8968,7 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
*/
static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
u32 exit_intr_info,
- unsigned long exit_qualification)
+ size_t exit_qualification)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
@@ -10777,13 +8994,6 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
INTR_INFO_VALID_MASK | INTR_TYPE_EXT_INTR;
}
- trace_kvm_nested_vmexit_inject(vmcs12->vm_exit_reason,
- vmcs12->exit_qualification,
- vmcs12->idt_vectoring_info_field,
- vmcs12->vm_exit_intr_info,
- vmcs12->vm_exit_intr_error_code,
- KVM_ISA_VMX);
-
vm_entry_controls_reset_shadow(vmx);
vm_exit_controls_reset_shadow(vmx);
vmx_segment_cache_clear(vmx);
@@ -10795,15 +9005,7 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
load_vmcs12_host_state(vcpu, vmcs12);
/* Update any VMCS fields that might have changed while L2 ran */
- vmcs_write64(TSC_OFFSET, vcpu->arch.tsc_offset);
- if (vmx->hv_deadline_tsc == -1)
- vmcs_clear_bits(PIN_BASED_VM_EXEC_CONTROL,
- PIN_BASED_VMX_PREEMPTION_TIMER);
- else
- vmcs_set_bits(PIN_BASED_VM_EXEC_CONTROL,
- PIN_BASED_VMX_PREEMPTION_TIMER);
- if (kvm_has_tsc_control)
- decache_tsc_multiplier(vmx);
+ vmcs_write64(vcpu, TSC_OFFSET, vcpu->arch.tsc_offset);
if (vmx->nested.change_vmcs01_virtual_x2apic_mode) {
vmx->nested.change_vmcs01_virtual_x2apic_mode = false;
@@ -10815,26 +9017,20 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
vmx->host_rsp = 0;
/* Unpin physical memory we referred to in vmcs02 */
- if (vmx->nested.apic_access_page) {
- nested_release_page(vmx->nested.apic_access_page);
- vmx->nested.apic_access_page = NULL;
- }
- if (vmx->nested.virtual_apic_page) {
- nested_release_page(vmx->nested.virtual_apic_page);
- vmx->nested.virtual_apic_page = NULL;
+ if (vmx->nested.apic_access_mdl) {
+ nested_release_page(vmx->nested.apic_access_mdl);
+ vmx->nested.apic_access_mdl = NULL;
}
- if (vmx->nested.pi_desc_page) {
- kunmap(vmx->nested.pi_desc_page);
- nested_release_page(vmx->nested.pi_desc_page);
- vmx->nested.pi_desc_page = NULL;
- vmx->nested.pi_desc = NULL;
+ if (vmx->nested.virtual_apic_mdl) {
+ nested_release_page(vmx->nested.virtual_apic_mdl);
+ vmx->nested.virtual_apic_mdl = NULL;
}
/*
* We are now running in L2, mmu_notifier will force to reload the
* page's hpa for L2 vmcs. Need to reload it for L1 before entering L1.
*/
- kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu);
+ kvm_make_request(GVM_REQ_APIC_PAGE_RELOAD, vcpu);
/*
* Exiting from L2 to L1, we're now back to L1 which thinks it just
@@ -10843,14 +9039,14 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
*/
if (unlikely(vmx->fail)) {
vmx->fail = 0;
- nested_vmx_failValid(vcpu, vmcs_read32(VM_INSTRUCTION_ERROR));
+ nested_vmx_failValid(vcpu, vmcs_read32(vcpu, VM_INSTRUCTION_ERROR));
} else
nested_vmx_succeed(vcpu);
if (enable_shadow_vmcs)
vmx->nested.sync_shadow_vmcs = true;
/* in case we halted in L2 */
- vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
+ vcpu->arch.mp_state = GVM_MP_STATE_RUNNABLE;
}
/*
@@ -10872,7 +9068,7 @@ static void vmx_leave_nested(struct kvm_vcpu *vcpu)
*/
static void nested_vmx_entry_failure(struct kvm_vcpu *vcpu,
struct vmcs12 *vmcs12,
- u32 reason, unsigned long qualification)
+ u32 reason, size_t qualification)
{
load_vmcs12_host_state(vcpu, vmcs12);
vmcs12->vm_exit_reason = reason | VMX_EXIT_REASONS_FAILED_VMENTRY;
@@ -10889,75 +9085,10 @@ static int vmx_check_intercept(struct kvm_vcpu *vcpu,
return X86EMUL_CONTINUE;
}
-#ifdef CONFIG_X86_64
-/* (a << shift) / divisor, return 1 if overflow otherwise 0 */
-static inline int u64_shl_div_u64(u64 a, unsigned int shift,
- u64 divisor, u64 *result)
-{
- u64 low = a << shift, high = a >> (64 - shift);
-
- /* To avoid the overflow on divq */
- if (high >= divisor)
- return 1;
-
- /* Low hold the result, high hold rem which is discarded */
- asm("divq %2\n\t" : "=a" (low), "=d" (high) :
- "rm" (divisor), "0" (low), "1" (high));
- *result = low;
-
- return 0;
-}
-
-static int vmx_set_hv_timer(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc)
-{
- struct vcpu_vmx *vmx = to_vmx(vcpu);
- u64 tscl = rdtsc();
- u64 guest_tscl = kvm_read_l1_tsc(vcpu, tscl);
- u64 delta_tsc = max(guest_deadline_tsc, guest_tscl) - guest_tscl;
-
- /* Convert to host delta tsc if tsc scaling is enabled */
- if (vcpu->arch.tsc_scaling_ratio != kvm_default_tsc_scaling_ratio &&
- u64_shl_div_u64(delta_tsc,
- kvm_tsc_scaling_ratio_frac_bits,
- vcpu->arch.tsc_scaling_ratio,
- &delta_tsc))
- return -ERANGE;
-
- /*
- * If the delta tsc can't fit in the 32 bit after the multi shift,
- * we can't use the preemption timer.
- * It's possible that it fits on later vmentries, but checking
- * on every vmentry is costly so we just use an hrtimer.
- */
- if (delta_tsc >> (cpu_preemption_timer_multi + 32))
- return -ERANGE;
-
- vmx->hv_deadline_tsc = tscl + delta_tsc;
- vmcs_set_bits(PIN_BASED_VM_EXEC_CONTROL,
- PIN_BASED_VMX_PREEMPTION_TIMER);
- return 0;
-}
-
-static void vmx_cancel_hv_timer(struct kvm_vcpu *vcpu)
-{
- struct vcpu_vmx *vmx = to_vmx(vcpu);
- vmx->hv_deadline_tsc = -1;
- vmcs_clear_bits(PIN_BASED_VM_EXEC_CONTROL,
- PIN_BASED_VMX_PREEMPTION_TIMER);
-}
-#endif
-
-static void vmx_sched_in(struct kvm_vcpu *vcpu, int cpu)
-{
- if (ple_gap)
- shrink_ple_window(vcpu);
-}
-
static void vmx_slot_enable_log_dirty(struct kvm *kvm,
struct kvm_memory_slot *slot)
{
kvm_mmu_slot_leaf_clear_dirty(kvm, slot);
- kvm_mmu_slot_largepage_remove_write_access(kvm, slot);
}
static void vmx_slot_disable_log_dirty(struct kvm *kvm,
@@ -10968,257 +9099,17 @@ static void vmx_slot_disable_log_dirty(struct kvm *kvm,
static void vmx_flush_log_dirty(struct kvm *kvm)
{
- kvm_flush_pml_buffers(kvm);
+ //kvm_flush_pml_buffers(kvm);
}
static void vmx_enable_log_dirty_pt_masked(struct kvm *kvm,
struct kvm_memory_slot *memslot,
- gfn_t offset, unsigned long mask)
+ gfn_t offset, size_t mask)
{
kvm_mmu_clear_dirty_pt_masked(kvm, memslot, offset, mask);
}
-/*
- * This routine does the following things for vCPU which is going
- * to be blocked if VT-d PI is enabled.
- * - Store the vCPU to the wakeup list, so when interrupts happen
- * we can find the right vCPU to wake up.
- * - Change the Posted-interrupt descriptor as below:
- * 'NDST' <-- vcpu->pre_pcpu
- * 'NV' <-- POSTED_INTR_WAKEUP_VECTOR
- * - If 'ON' is set during this process, which means at least one
- * interrupt is posted for this vCPU, we cannot block it, in
- * this case, return 1, otherwise, return 0.
- *
- */
-static int pi_pre_block(struct kvm_vcpu *vcpu)
-{
- unsigned long flags;
- unsigned int dest;
- struct pi_desc old, new;
- struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
-
- if (!kvm_arch_has_assigned_device(vcpu->kvm) ||
- !irq_remapping_cap(IRQ_POSTING_CAP) ||
- !kvm_vcpu_apicv_active(vcpu))
- return 0;
-
- vcpu->pre_pcpu = vcpu->cpu;
- spin_lock_irqsave(&per_cpu(blocked_vcpu_on_cpu_lock,
- vcpu->pre_pcpu), flags);
- list_add_tail(&vcpu->blocked_vcpu_list,
- &per_cpu(blocked_vcpu_on_cpu,
- vcpu->pre_pcpu));
- spin_unlock_irqrestore(&per_cpu(blocked_vcpu_on_cpu_lock,
- vcpu->pre_pcpu), flags);
-
- do {
- old.control = new.control = pi_desc->control;
-
- /*
- * We should not block the vCPU if
- * an interrupt is posted for it.
- */
- if (pi_test_on(pi_desc) == 1) {
- spin_lock_irqsave(&per_cpu(blocked_vcpu_on_cpu_lock,
- vcpu->pre_pcpu), flags);
- list_del(&vcpu->blocked_vcpu_list);
- spin_unlock_irqrestore(
- &per_cpu(blocked_vcpu_on_cpu_lock,
- vcpu->pre_pcpu), flags);
- vcpu->pre_pcpu = -1;
-
- return 1;
- }
-
- WARN((pi_desc->sn == 1),
- "Warning: SN field of posted-interrupts "
- "is set before blocking\n");
-
- /*
- * Since vCPU can be preempted during this process,
- * vcpu->cpu could be different with pre_pcpu, we
- * need to set pre_pcpu as the destination of wakeup
- * notification event, then we can find the right vCPU
- * to wakeup in wakeup handler if interrupts happen
- * when the vCPU is in blocked state.
- */
- dest = cpu_physical_id(vcpu->pre_pcpu);
-
- if (x2apic_enabled())
- new.ndst = dest;
- else
- new.ndst = (dest << 8) & 0xFF00;
-
- /* set 'NV' to 'wakeup vector' */
- new.nv = POSTED_INTR_WAKEUP_VECTOR;
- } while (cmpxchg(&pi_desc->control, old.control,
- new.control) != old.control);
-
- return 0;
-}
-
-static int vmx_pre_block(struct kvm_vcpu *vcpu)
-{
- if (pi_pre_block(vcpu))
- return 1;
-
- if (kvm_lapic_hv_timer_in_use(vcpu))
- kvm_lapic_switch_to_sw_timer(vcpu);
-
- return 0;
-}
-
-static void pi_post_block(struct kvm_vcpu *vcpu)
-{
- struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
- struct pi_desc old, new;
- unsigned int dest;
- unsigned long flags;
-
- if (!kvm_arch_has_assigned_device(vcpu->kvm) ||
- !irq_remapping_cap(IRQ_POSTING_CAP) ||
- !kvm_vcpu_apicv_active(vcpu))
- return;
-
- do {
- old.control = new.control = pi_desc->control;
-
- dest = cpu_physical_id(vcpu->cpu);
-
- if (x2apic_enabled())
- new.ndst = dest;
- else
- new.ndst = (dest << 8) & 0xFF00;
-
- /* Allow posting non-urgent interrupts */
- new.sn = 0;
-
- /* set 'NV' to 'notification vector' */
- new.nv = POSTED_INTR_VECTOR;
- } while (cmpxchg(&pi_desc->control, old.control,
- new.control) != old.control);
-
- if(vcpu->pre_pcpu != -1) {
- spin_lock_irqsave(
- &per_cpu(blocked_vcpu_on_cpu_lock,
- vcpu->pre_pcpu), flags);
- list_del(&vcpu->blocked_vcpu_list);
- spin_unlock_irqrestore(
- &per_cpu(blocked_vcpu_on_cpu_lock,
- vcpu->pre_pcpu), flags);
- vcpu->pre_pcpu = -1;
- }
-}
-
-static void vmx_post_block(struct kvm_vcpu *vcpu)
-{
- if (kvm_x86_ops->set_hv_timer)
- kvm_lapic_switch_to_hv_timer(vcpu);
-
- pi_post_block(vcpu);
-}
-
-/*
- * vmx_update_pi_irte - set IRTE for Posted-Interrupts
- *
- * @kvm: kvm
- * @host_irq: host irq of the interrupt
- * @guest_irq: gsi of the interrupt
- * @set: set or unset PI
- * returns 0 on success, < 0 on failure
- */
-static int vmx_update_pi_irte(struct kvm *kvm, unsigned int host_irq,
- uint32_t guest_irq, bool set)
-{
- struct kvm_kernel_irq_routing_entry *e;
- struct kvm_irq_routing_table *irq_rt;
- struct kvm_lapic_irq irq;
- struct kvm_vcpu *vcpu;
- struct vcpu_data vcpu_info;
- int idx, ret = -EINVAL;
-
- if (!kvm_arch_has_assigned_device(kvm) ||
- !irq_remapping_cap(IRQ_POSTING_CAP) ||
- !kvm_vcpu_apicv_active(kvm->vcpus[0]))
- return 0;
-
- idx = srcu_read_lock(&kvm->irq_srcu);
- irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu);
- BUG_ON(guest_irq >= irq_rt->nr_rt_entries);
-
- hlist_for_each_entry(e, &irq_rt->map[guest_irq], link) {
- if (e->type != KVM_IRQ_ROUTING_MSI)
- continue;
- /*
- * VT-d PI cannot support posting multicast/broadcast
- * interrupts to a vCPU, we still use interrupt remapping
- * for these kind of interrupts.
- *
- * For lowest-priority interrupts, we only support
- * those with single CPU as the destination, e.g. user
- * configures the interrupts via /proc/irq or uses
- * irqbalance to make the interrupts single-CPU.
- *
- * We will support full lowest-priority interrupt later.
- */
-
- kvm_set_msi_irq(kvm, e, &irq);
- if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu)) {
- /*
- * Make sure the IRTE is in remapped mode if
- * we don't handle it in posted mode.
- */
- ret = irq_set_vcpu_affinity(host_irq, NULL);
- if (ret < 0) {
- printk(KERN_INFO
- "failed to back to remapped mode, irq: %u\n",
- host_irq);
- goto out;
- }
-
- continue;
- }
-
- vcpu_info.pi_desc_addr = __pa(vcpu_to_pi_desc(vcpu));
- vcpu_info.vector = irq.vector;
-
- trace_kvm_pi_irte_update(vcpu->vcpu_id, host_irq, e->gsi,
- vcpu_info.vector, vcpu_info.pi_desc_addr, set);
-
- if (set)
- ret = irq_set_vcpu_affinity(host_irq, &vcpu_info);
- else {
- /* suppress notification event before unposting */
- pi_set_sn(vcpu_to_pi_desc(vcpu));
- ret = irq_set_vcpu_affinity(host_irq, NULL);
- pi_clear_sn(vcpu_to_pi_desc(vcpu));
- }
-
- if (ret < 0) {
- printk(KERN_INFO "%s: failed to update PI IRTE\n",
- __func__);
- goto out;
- }
- }
-
- ret = 0;
-out:
- srcu_read_unlock(&kvm->irq_srcu, idx);
- return ret;
-}
-
-static void vmx_setup_mce(struct kvm_vcpu *vcpu)
-{
- if (vcpu->arch.mcg_cap & MCG_LMCE_P)
- to_vmx(vcpu)->msr_ia32_feature_control_valid_bits |=
- FEATURE_CONTROL_LMCE;
- else
- to_vmx(vcpu)->msr_ia32_feature_control_valid_bits &=
- ~FEATURE_CONTROL_LMCE;
-}
-
-static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
+static struct kvm_x86_ops vmx_x86_ops = {
.cpu_has_kvm_support = cpu_has_kvm_support,
.disabled_by_bios = vmx_disabled_by_bios,
.hardware_setup = hardware_setup,
@@ -11233,7 +9124,8 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
.vcpu_free = vmx_free_vcpu,
.vcpu_reset = vmx_vcpu_reset,
- .prepare_guest_switch = vmx_save_host_state,
+ .save_host_state = vmx_save_host_state,
+ .load_host_state = vmx_load_host_state,
.vcpu_load = vmx_vcpu_load,
.vcpu_put = vmx_vcpu_put,
@@ -11264,11 +9156,6 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
.get_rflags = vmx_get_rflags,
.set_rflags = vmx_set_rflags,
- .get_pkru = vmx_get_pkru,
-
- .fpu_activate = vmx_fpu_activate,
- .fpu_deactivate = vmx_fpu_deactivate,
-
.tlb_flush = vmx_flush_tlb,
.run = vmx_vcpu_run,
@@ -11276,7 +9163,6 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
.skip_emulated_instruction = skip_emulated_instruction,
.set_interrupt_shadow = vmx_set_interrupt_shadow,
.get_interrupt_shadow = vmx_get_interrupt_shadow,
- .patch_hypercall = vmx_patch_hypercall,
.set_irq = vmx_inject_irq,
.set_nmi = vmx_inject_nmi,
.queue_exception = vmx_queue_exception,
@@ -11295,8 +9181,6 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
.load_eoi_exitmap = vmx_load_eoi_exitmap,
.hwapic_irr_update = vmx_hwapic_irr_update,
.hwapic_isr_update = vmx_hwapic_isr_update,
- .sync_pir_to_irr = vmx_sync_pir_to_irr,
- .deliver_posted_interrupt = vmx_deliver_posted_interrupt,
.set_tss_addr = vmx_set_tss_addr,
.get_tdp_level = get_ept_level,
@@ -11326,52 +9210,22 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
.check_nested_events = vmx_check_nested_events,
- .sched_in = vmx_sched_in,
-
.slot_enable_log_dirty = vmx_slot_enable_log_dirty,
.slot_disable_log_dirty = vmx_slot_disable_log_dirty,
.flush_log_dirty = vmx_flush_log_dirty,
.enable_log_dirty_pt_masked = vmx_enable_log_dirty_pt_masked,
- .pre_block = vmx_pre_block,
- .post_block = vmx_post_block,
-
- .pmu_ops = &intel_pmu_ops,
- .update_pi_irte = vmx_update_pi_irte,
-
-#ifdef CONFIG_X86_64
- .set_hv_timer = vmx_set_hv_timer,
- .cancel_hv_timer = vmx_cancel_hv_timer,
-#endif
-
- .setup_mce = vmx_setup_mce,
+ //.pmu_ops = &intel_pmu_ops,
};
-static int __init vmx_init(void)
+int vmx_init(void)
{
- int r = kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx),
- __alignof__(struct vcpu_vmx), THIS_MODULE);
- if (r)
- return r;
-
-#ifdef CONFIG_KEXEC_CORE
- rcu_assign_pointer(crash_vmclear_loaded_vmcss,
- crash_vmclear_local_loaded_vmcss);
-#endif
-
- return 0;
+ return kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx), 0);
}
-static void __exit vmx_exit(void)
+void vmx_exit(void)
{
-#ifdef CONFIG_KEXEC_CORE
- RCU_INIT_POINTER(crash_vmclear_loaded_vmcss, NULL);
- synchronize_rcu();
-#endif
-
kvm_exit();
}
-module_init(vmx_init)
-module_exit(vmx_exit)
diff --git a/arch/x86/kvm/vmx_def.h b/arch/x86/kvm/vmx_def.h
new file mode 100755
index 0000000..89ff76a
--- /dev/null
+++ b/arch/x86/kvm/vmx_def.h
@@ -0,0 +1,425 @@
+/*
+ * Kernel-based Virtual Machine driver for Linux
+ *
+ * This module enables machines with Intel VT-x extensions to run virtual
+ * machines without emulation or binary translation.
+ *
+ * Copyright (C) 2006 Qumranet, Inc.
+ * Copyright 2010 Red Hat, Inc. and/or its affiliates.
+ * Copyright 2019 Google LLC
+ *
+ * Authors:
+ * Avi Kivity <avi@qumranet.com>
+ * Yaniv Kamay <yaniv@qumranet.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+#include "irq.h"
+#include "mmu.h"
+#include "cpuid.h"
+#include "lapic.h"
+
+#include <linux/kvm_host.h>
+#include <linux/list.h>
+#include <ntkrutils.h>
+#include <__asm.h>
+#include "kvm_cache_regs.h"
+#include "x86.h"
+#include <asm/vmx.h>
+
+#include "pmu.h"
+
+/* MTRR memory types, which are defined in SDM */
+#define MTRR_TYPE_UNCACHABLE 0
+#define MTRR_TYPE_WRCOMB 1
+/*#define MTRR_TYPE_ 2*/
+/*#define MTRR_TYPE_ 3*/
+#define MTRR_TYPE_WRTHROUGH 4
+#define MTRR_TYPE_WRPROT 5
+#define MTRR_TYPE_WRBACK 6
+#define MTRR_NUM_TYPES 7
+
+
+#define GVM_GUEST_CR0_MASK (X86_CR0_NW | X86_CR0_CD)
+#define GVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST (X86_CR0_WP | X86_CR0_NE)
+#define GVM_VM_CR0_ALWAYS_ON \
+ (GVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST | X86_CR0_PG | X86_CR0_PE)
+#define GVM_CR4_GUEST_OWNED_BITS \
+ (X86_CR4_PVI | X86_CR4_DE | X86_CR4_PCE | X86_CR4_OSFXSR \
+ | X86_CR4_OSXMMEXCPT | X86_CR4_TSD)
+
+#define GVM_PMODE_VM_CR4_ALWAYS_ON (X86_CR4_PAE | X86_CR4_VMXE)
+#define GVM_RMODE_VM_CR4_ALWAYS_ON (X86_CR4_VME | X86_CR4_PAE | X86_CR4_VMXE)
+
+#define RMODE_GUEST_OWNED_EFLAGS_BITS (~(X86_EFLAGS_IOPL | X86_EFLAGS_VM))
+
+#define VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE 5
+
+#define NR_AUTOLOAD_MSRS 8
+#define VMCS02_POOL_SIZE 1
+
+struct vmcs {
+ u32 revision_id;
+ u32 abort;
+ char data[1016];
+};
+
+/*
+ * Track a VMCS that may be loaded on a certain CPU. If it is (cpu!=-1), also
+ * remember whether it was VMLAUNCHed, and maintain a linked list of all VMCSs
+ * loaded on this CPU (so we can clear them if the CPU goes down).
+ */
+struct loaded_vmcs {
+ struct vmcs *vmcs;
+ struct vmcs *shadow_vmcs;
+ int cpu;
+ int launched;
+};
+
+/*
+ * struct vmcs12 describes the state that our guest hypervisor (L1) keeps for a
+ * single nested guest (L2), hence the name vmcs12. Any VMX implementation has
+ * a VMCS structure, and vmcs12 is our emulated VMX's VMCS. This structure is
+ * stored in guest memory specified by VMPTRLD, but is opaque to the guest,
+ * which must access it using VMREAD/VMWRITE/VMCLEAR instructions.
+ * More than one of these structures may exist, if L1 runs multiple L2 guests.
+ * nested_vmx_run() will use the data here to build a vmcs02: a VMCS for the
+ * underlying hardware which will be used to run L2.
+ * This structure is packed to ensure that its layout is identical across
+ * machines (necessary for live migration).
+ * If there are changes in this struct, VMCS12_REVISION must be changed.
+ */
+typedef u64 natural_width;
+struct __packed vmcs12 {
+ /* According to the Intel spec, a VMCS region must start with the
+ * following two fields. Then follow implementation-specific data.
+ */
+ u32 revision_id;
+ u32 abort;
+
+ u32 launch_state; /* set to 0 by VMCLEAR, to 1 by VMLAUNCH */
+ u32 padding[7]; /* room for future expansion */
+
+ u64 io_bitmap_a;
+ u64 io_bitmap_b;
+ u64 msr_bitmap;
+ u64 vm_exit_msr_store_addr;
+ u64 vm_exit_msr_load_addr;
+ u64 vm_entry_msr_load_addr;
+ u64 tsc_offset;
+ u64 virtual_apic_page_addr;
+ u64 apic_access_addr;
+ u64 posted_intr_desc_addr;
+ u64 ept_pointer;
+ u64 eoi_exit_bitmap0;
+ u64 eoi_exit_bitmap1;
+ u64 eoi_exit_bitmap2;
+ u64 eoi_exit_bitmap3;
+ u64 xss_exit_bitmap;
+ u64 guest_physical_address;
+ u64 vmcs_link_pointer;
+ u64 guest_ia32_debugctl;
+ u64 guest_ia32_pat;
+ u64 guest_ia32_efer;
+ u64 guest_ia32_perf_global_ctrl;
+ u64 guest_pdptr0;
+ u64 guest_pdptr1;
+ u64 guest_pdptr2;
+ u64 guest_pdptr3;
+ u64 guest_bndcfgs;
+ u64 host_ia32_pat;
+ u64 host_ia32_efer;
+ u64 host_ia32_perf_global_ctrl;
+ u64 padding64[8]; /* room for future expansion */
+ /*
+ * To allow migration of L1 (complete with its L2 guests) between
+ * machines of different natural widths (32 or 64 bit), we cannot have
+ * size_t fields with no explict size. We use u64 (aliased
+ * natural_width) instead. Luckily, x86 is little-endian.
+ */
+ natural_width cr0_guest_host_mask;
+ natural_width cr4_guest_host_mask;
+ natural_width cr0_read_shadow;
+ natural_width cr4_read_shadow;
+ natural_width cr3_target_value0;
+ natural_width cr3_target_value1;
+ natural_width cr3_target_value2;
+ natural_width cr3_target_value3;
+ natural_width exit_qualification;
+ natural_width guest_linear_address;
+ natural_width guest_cr0;
+ natural_width guest_cr3;
+ natural_width guest_cr4;
+ natural_width guest_es_base;
+ natural_width guest_cs_base;
+ natural_width guest_ss_base;
+ natural_width guest_ds_base;
+ natural_width guest_fs_base;
+ natural_width guest_gs_base;
+ natural_width guest_ldtr_base;
+ natural_width guest_tr_base;
+ natural_width guest_gdtr_base;
+ natural_width guest_idtr_base;
+ natural_width guest_dr7;
+ natural_width guest_rsp;
+ natural_width guest_rip;
+ natural_width guest_rflags;
+ natural_width guest_pending_dbg_exceptions;
+ natural_width guest_sysenter_esp;
+ natural_width guest_sysenter_eip;
+ natural_width host_cr0;
+ natural_width host_cr3;
+ natural_width host_cr4;
+ natural_width host_fs_base;
+ natural_width host_gs_base;
+ natural_width host_tr_base;
+ natural_width host_gdtr_base;
+ natural_width host_idtr_base;
+ natural_width host_ia32_sysenter_esp;
+ natural_width host_ia32_sysenter_eip;
+ natural_width host_rsp;
+ natural_width host_rip;
+ natural_width paddingl[8]; /* room for future expansion */
+ u32 pin_based_vm_exec_control;
+ u32 cpu_based_vm_exec_control;
+ u32 exception_bitmap;
+ u32 page_fault_error_code_mask;
+ u32 page_fault_error_code_match;
+ u32 cr3_target_count;
+ u32 vm_exit_controls;
+ u32 vm_exit_msr_store_count;
+ u32 vm_exit_msr_load_count;
+ u32 vm_entry_controls;
+ u32 vm_entry_msr_load_count;
+ u32 vm_entry_intr_info_field;
+ u32 vm_entry_exception_error_code;
+ u32 vm_entry_instruction_len;
+ u32 tpr_threshold;
+ u32 secondary_vm_exec_control;
+ u32 vm_instruction_error;
+ u32 vm_exit_reason;
+ u32 vm_exit_intr_info;
+ u32 vm_exit_intr_error_code;
+ u32 idt_vectoring_info_field;
+ u32 idt_vectoring_error_code;
+ u32 vm_exit_instruction_len;
+ u32 vmx_instruction_info;
+ u32 guest_es_limit;
+ u32 guest_cs_limit;
+ u32 guest_ss_limit;
+ u32 guest_ds_limit;
+ u32 guest_fs_limit;
+ u32 guest_gs_limit;
+ u32 guest_ldtr_limit;
+ u32 guest_tr_limit;
+ u32 guest_gdtr_limit;
+ u32 guest_idtr_limit;
+ u32 guest_es_ar_bytes;
+ u32 guest_cs_ar_bytes;
+ u32 guest_ss_ar_bytes;
+ u32 guest_ds_ar_bytes;
+ u32 guest_fs_ar_bytes;
+ u32 guest_gs_ar_bytes;
+ u32 guest_ldtr_ar_bytes;
+ u32 guest_tr_ar_bytes;
+ u32 guest_interruptibility_info;
+ u32 guest_activity_state;
+ u32 guest_sysenter_cs;
+ u32 host_ia32_sysenter_cs;
+ u32 vmx_preemption_timer_value;
+ u32 padding32[7]; /* room for future expansion */
+ u16 virtual_processor_id;
+ u16 posted_intr_nv;
+ u16 guest_es_selector;
+ u16 guest_cs_selector;
+ u16 guest_ss_selector;
+ u16 guest_ds_selector;
+ u16 guest_fs_selector;
+ u16 guest_gs_selector;
+ u16 guest_ldtr_selector;
+ u16 guest_tr_selector;
+ u16 guest_intr_status;
+ u16 host_es_selector;
+ u16 host_cs_selector;
+ u16 host_ss_selector;
+ u16 host_ds_selector;
+ u16 host_fs_selector;
+ u16 host_gs_selector;
+ u16 host_tr_selector;
+};
+
+/*
+ * VMCS12_REVISION is an arbitrary id that should be changed if the content or
+ * layout of struct vmcs12 is changed. MSR_IA32_VMX_BASIC returns this id, and
+ * VMPTRLD verifies that the VMCS region that L1 is loading contains this id.
+ */
+#define VMCS12_REVISION 0x11e57ed0
+
+/*
+ * VMCS12_SIZE is the number of bytes L1 should allocate for the VMXON region
+ * and any VMCS region. Although only sizeof(struct vmcs12) are used by the
+ * current implementation, 4K are reserved to avoid future complications.
+ */
+#define VMCS12_SIZE 0x1000
+
+/* Used to remember the last vmcs02 used for some recently used vmcs12s */
+struct vmcs02_list {
+ struct list_head list;
+ gpa_t vmptr;
+ struct loaded_vmcs vmcs02;
+};
+
+/*
+ * The nested_vmx structure is part of vcpu_vmx, and holds information we need
+ * for correct emulation of VMX (i.e., nested VMX) on this vcpu.
+ */
+struct nested_vmx {
+ /* Has the level1 guest done vmxon? */
+ bool vmxon;
+ gpa_t vmxon_ptr;
+
+ /* The guest-physical address of the current VMCS L1 keeps for L2 */
+ gpa_t current_vmptr;
+ /* The host-usable pointer to the above */
+ PMDL current_vmcs12_mdl;
+ struct vmcs12 *current_vmcs12;
+ /*
+ * Cache of the guest's VMCS, existing outside of guest memory.
+ * Loaded from guest memory during VMPTRLD. Flushed to guest
+ * memory during VMXOFF, VMCLEAR, VMPTRLD.
+ */
+ struct vmcs12 *cached_vmcs12;
+ /*
+ * Indicates if the shadow vmcs must be updated with the
+ * data hold by vmcs12
+ */
+ bool sync_shadow_vmcs;
+
+ /* vmcs02_list cache of VMCSs recently used to run L2 guests */
+ struct list_head vmcs02_pool;
+ int vmcs02_num;
+ bool change_vmcs01_virtual_x2apic_mode;
+ /* L2 must run next, and mustn't decide to exit to L1. */
+ bool nested_run_pending;
+ /*
+ * Guest pages referred to in vmcs02 with host-physical pointers, so
+ * we must keep them pinned while L2 runs.
+ */
+ PMDL apic_access_mdl;
+ PMDL virtual_apic_mdl;
+
+ size_t *msr_bitmap;
+
+ /* to migrate it to L2 if VM_ENTRY_LOAD_DEBUG_CONTROLS is off */
+ u64 vmcs01_debugctl;
+
+ u16 vpid02;
+ u16 last_vpid;
+
+ u32 nested_vmx_procbased_ctls_low;
+ u32 nested_vmx_procbased_ctls_high;
+ u32 nested_vmx_true_procbased_ctls_low;
+ u32 nested_vmx_secondary_ctls_low;
+ u32 nested_vmx_secondary_ctls_high;
+ u32 nested_vmx_pinbased_ctls_low;
+ u32 nested_vmx_pinbased_ctls_high;
+ u32 nested_vmx_exit_ctls_low;
+ u32 nested_vmx_exit_ctls_high;
+ u32 nested_vmx_true_exit_ctls_low;
+ u32 nested_vmx_entry_ctls_low;
+ u32 nested_vmx_entry_ctls_high;
+ u32 nested_vmx_true_entry_ctls_low;
+ u32 nested_vmx_misc_low;
+ u32 nested_vmx_misc_high;
+ u32 nested_vmx_ept_caps;
+ u32 nested_vmx_vpid_caps;
+};
+
+struct vcpu_vmx {
+ struct kvm_vcpu vcpu;
+ size_t host_rsp;
+ u8 fail;
+ bool nmi_known_unmasked;
+ u32 exit_intr_info;
+ u32 idt_vectoring_info;
+ ulong rflags;
+#ifdef CONFIG_X86_64
+ u64 msr_host_kernel_gs_base;
+ u64 msr_guest_kernel_gs_base;
+#endif
+ u32 vm_entry_controls_shadow;
+ u32 vm_exit_controls_shadow;
+ /*
+ * loaded_vmcs points to the VMCS currently used in this vcpu. For a
+ * non-nested (L1) guest, it always points to vmcs01. For a nested
+ * guest (L2), it points to a different VMCS.
+ */
+ struct loaded_vmcs vmcs01;
+ struct loaded_vmcs *loaded_vmcs;
+ bool __launched; /* temporary, used in vmx_vcpu_run */
+ struct msr_autoload {
+ unsigned nr;
+ struct vmx_msr_entry guest[NR_AUTOLOAD_MSRS];
+ struct vmx_msr_entry host[NR_AUTOLOAD_MSRS];
+ } msr_autoload;
+ struct {
+ u16 fs_sel, gs_sel;
+#ifdef CONFIG_X86_64
+ u16 ds_sel, es_sel;
+#endif
+ int gs_reload_needed;
+ int fs_reload_needed;
+ u64 msr_host_bndcfgs;
+ size_t vmcs_host_cr4; /* May not match real cr4 */
+ } host_state;
+ struct {
+ int vm86_active;
+ ulong save_rflags;
+ struct kvm_segment segs[8];
+ } rmode;
+ struct {
+ u32 bitmask; /* 4 bits per segment (1 bit per field) */
+ struct kvm_save_segment {
+ u16 selector;
+ size_t base;
+ u32 limit;
+ u32 ar;
+ } seg[8];
+ } segment_cache;
+ int vpid;
+ bool emulation_required;
+
+ /* Support for vnmi-less CPUs */
+ int soft_vnmi_blocked;
+ ktime_t entry_time;
+ s64 vnmi_blocked_time;
+ u32 exit_reason;
+
+ /* Support for a guest hypervisor (nested VMX) */
+ struct nested_vmx nested;
+
+ /* Support for PML */
+#define PML_ENTITY_NUM 512
+ struct page *pml_pg;
+
+ /*
+ * Only bits masked by msr_ia32_feature_control_valid_bits can be set in
+ * msr_ia32_feature_control. FEATURE_CONTROL_LOCKED is always included
+ * in msr_ia32_feature_control_valid_bits.
+ */
+ u64 msr_ia32_feature_control;
+ u64 msr_ia32_feature_control_valid_bits;
+};
+
+enum segment_cache_field {
+ SEG_FIELD_SEL = 0,
+ SEG_FIELD_BASE = 1,
+ SEG_FIELD_LIMIT = 2,
+ SEG_FIELD_AR = 3,
+
+ SEG_FIELD_NR = 4
+};
+
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 04c5d96..44637f3 100644..100755
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -7,6 +7,7 @@
* Copyright (C) 2008 Qumranet, Inc.
* Copyright IBM Corporation, 2008
* Copyright 2010 Red Hat, Inc. and/or its affiliates.
+ * Copyright 2019 Google LLC
*
* Authors:
* Avi Kivity <avi@qumranet.com>
@@ -19,67 +20,28 @@
*
*/
+#include <gvm_types.h>
+#include <ntkrutils.h>
+#include <gvm-main.h>
#include <linux/kvm_host.h>
#include "irq.h"
#include "mmu.h"
-#include "i8254.h"
#include "tss.h"
#include "kvm_cache_regs.h"
#include "x86.h"
#include "cpuid.h"
-#include "assigned-dev.h"
#include "pmu.h"
-#include "hyperv.h"
-
-#include <linux/clocksource.h>
-#include <linux/interrupt.h>
-#include <linux/kvm.h>
-#include <linux/fs.h>
-#include <linux/vmalloc.h>
-#include <linux/export.h>
-#include <linux/moduleparam.h>
-#include <linux/mman.h>
-#include <linux/highmem.h>
-#include <linux/iommu.h>
-#include <linux/intel-iommu.h>
-#include <linux/cpufreq.h>
-#include <linux/user-return-notifier.h>
-#include <linux/srcu.h>
-#include <linux/slab.h>
-#include <linux/perf_event.h>
-#include <linux/uaccess.h>
-#include <linux/hash.h>
-#include <linux/pci.h>
-#include <linux/timekeeper_internal.h>
-#include <linux/pvclock_gtod.h>
-#include <linux/kvm_irqfd.h>
-#include <linux/irqbypass.h>
-#include <trace/events/kvm.h>
-
-#include <asm/debugreg.h>
-#include <asm/msr.h>
-#include <asm/desc.h>
-#include <asm/mce.h>
-#include <linux/kernel_stat.h>
-#include <asm/fpu/internal.h> /* Ugh! */
-#include <asm/pvclock.h>
-#include <asm/div64.h>
-#include <asm/irq_remapping.h>
-
-#define CREATE_TRACE_POINTS
-#include "trace.h"
+#include <asm/vmx.h>
+
#define MAX_IO_MSRS 256
-#define KVM_MAX_MCE_BANKS 32
-u64 __read_mostly kvm_mce_cap_supported = MCG_CTL_P | MCG_SER_P;
-EXPORT_SYMBOL_GPL(kvm_mce_cap_supported);
#define emul_to_vcpu(ctxt) \
container_of(ctxt, struct kvm_vcpu, arch.emulate_ctxt)
/* EFER defaults:
- * - enable syscall per default because its emulated by KVM
- * - enable LME and LMA per default on 64 bit KVM
+ * - enable syscall per default because its emulated by kvm
+ * - enable LME and LMA per default on 64 bit kvm
*/
#ifdef CONFIG_X86_64
static
@@ -88,219 +50,39 @@ u64 __read_mostly efer_reserved_bits = ~((u64)(EFER_SCE | EFER_LME | EFER_LMA));
static u64 __read_mostly efer_reserved_bits = ~((u64)EFER_SCE);
#endif
-#define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM
-#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
+#define VM_STAT(x) offsetof(struct kvm, stat.x), GVM_STAT_VM
+#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), GVM_STAT_VCPU
-#define KVM_X2APIC_API_VALID_FLAGS (KVM_X2APIC_API_USE_32BIT_IDS | \
- KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK)
+#define GVM_X2APIC_API_VALID_FLAGS (GVM_X2APIC_API_USE_32BIT_IDS | \
+ GVM_X2APIC_API_DISABLE_BROADCAST_QUIRK)
static void update_cr8_intercept(struct kvm_vcpu *vcpu);
static void process_nmi(struct kvm_vcpu *vcpu);
static void enter_smm(struct kvm_vcpu *vcpu);
-static void __kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags);
+static void __kvm_set_rflags(struct kvm_vcpu *vcpu, size_t rflags);
struct kvm_x86_ops *kvm_x86_ops __read_mostly;
-EXPORT_SYMBOL_GPL(kvm_x86_ops);
static bool __read_mostly ignore_msrs = 0;
-module_param(ignore_msrs, bool, S_IRUGO | S_IWUSR);
unsigned int min_timer_period_us = 500;
-module_param(min_timer_period_us, uint, S_IRUGO | S_IWUSR);
-
-static bool __read_mostly kvmclock_periodic_sync = true;
-module_param(kvmclock_periodic_sync, bool, S_IRUGO);
-
-bool __read_mostly kvm_has_tsc_control;
-EXPORT_SYMBOL_GPL(kvm_has_tsc_control);
-u32 __read_mostly kvm_max_guest_tsc_khz;
-EXPORT_SYMBOL_GPL(kvm_max_guest_tsc_khz);
-u8 __read_mostly kvm_tsc_scaling_ratio_frac_bits;
-EXPORT_SYMBOL_GPL(kvm_tsc_scaling_ratio_frac_bits);
-u64 __read_mostly kvm_max_tsc_scaling_ratio;
-EXPORT_SYMBOL_GPL(kvm_max_tsc_scaling_ratio);
-u64 __read_mostly kvm_default_tsc_scaling_ratio;
-EXPORT_SYMBOL_GPL(kvm_default_tsc_scaling_ratio);
/* tsc tolerance in parts per million - default to 1/2 of the NTP threshold */
static u32 __read_mostly tsc_tolerance_ppm = 250;
-module_param(tsc_tolerance_ppm, uint, S_IRUGO | S_IWUSR);
/* lapic timer advance (tscdeadline mode only) in nanoseconds */
unsigned int __read_mostly lapic_timer_advance_ns = 0;
-module_param(lapic_timer_advance_ns, uint, S_IRUGO | S_IWUSR);
static bool __read_mostly vector_hashing = true;
-module_param(vector_hashing, bool, S_IRUGO);
static bool __read_mostly backwards_tsc_observed = false;
-#define KVM_NR_SHARED_MSRS 16
-
-struct kvm_shared_msrs_global {
- int nr;
- u32 msrs[KVM_NR_SHARED_MSRS];
-};
-
-struct kvm_shared_msrs {
- struct user_return_notifier urn;
- bool registered;
- struct kvm_shared_msr_values {
- u64 host;
- u64 curr;
- } values[KVM_NR_SHARED_MSRS];
-};
-
-static struct kvm_shared_msrs_global __read_mostly shared_msrs_global;
-static struct kvm_shared_msrs __percpu *shared_msrs;
-
-struct kvm_stats_debugfs_item debugfs_entries[] = {
- { "pf_fixed", VCPU_STAT(pf_fixed) },
- { "pf_guest", VCPU_STAT(pf_guest) },
- { "tlb_flush", VCPU_STAT(tlb_flush) },
- { "invlpg", VCPU_STAT(invlpg) },
- { "exits", VCPU_STAT(exits) },
- { "io_exits", VCPU_STAT(io_exits) },
- { "mmio_exits", VCPU_STAT(mmio_exits) },
- { "signal_exits", VCPU_STAT(signal_exits) },
- { "irq_window", VCPU_STAT(irq_window_exits) },
- { "nmi_window", VCPU_STAT(nmi_window_exits) },
- { "halt_exits", VCPU_STAT(halt_exits) },
- { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
- { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
- { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
- { "halt_wakeup", VCPU_STAT(halt_wakeup) },
- { "hypercalls", VCPU_STAT(hypercalls) },
- { "request_irq", VCPU_STAT(request_irq_exits) },
- { "irq_exits", VCPU_STAT(irq_exits) },
- { "host_state_reload", VCPU_STAT(host_state_reload) },
- { "efer_reload", VCPU_STAT(efer_reload) },
- { "fpu_reload", VCPU_STAT(fpu_reload) },
- { "insn_emulation", VCPU_STAT(insn_emulation) },
- { "insn_emulation_fail", VCPU_STAT(insn_emulation_fail) },
- { "irq_injections", VCPU_STAT(irq_injections) },
- { "nmi_injections", VCPU_STAT(nmi_injections) },
- { "mmu_shadow_zapped", VM_STAT(mmu_shadow_zapped) },
- { "mmu_pte_write", VM_STAT(mmu_pte_write) },
- { "mmu_pte_updated", VM_STAT(mmu_pte_updated) },
- { "mmu_pde_zapped", VM_STAT(mmu_pde_zapped) },
- { "mmu_flooded", VM_STAT(mmu_flooded) },
- { "mmu_recycled", VM_STAT(mmu_recycled) },
- { "mmu_cache_miss", VM_STAT(mmu_cache_miss) },
- { "mmu_unsync", VM_STAT(mmu_unsync) },
- { "remote_tlb_flush", VM_STAT(remote_tlb_flush) },
- { "largepages", VM_STAT(lpages) },
- { NULL }
-};
-
u64 __read_mostly host_xcr0;
-static int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt);
-
-static inline void kvm_async_pf_hash_reset(struct kvm_vcpu *vcpu)
-{
- int i;
- for (i = 0; i < roundup_pow_of_two(ASYNC_PF_PER_VCPU); i++)
- vcpu->arch.apf.gfns[i] = ~0;
-}
-
-static void kvm_on_user_return(struct user_return_notifier *urn)
-{
- unsigned slot;
- struct kvm_shared_msrs *locals
- = container_of(urn, struct kvm_shared_msrs, urn);
- struct kvm_shared_msr_values *values;
- unsigned long flags;
-
- /*
- * Disabling irqs at this point since the following code could be
- * interrupted and executed through kvm_arch_hardware_disable()
- */
- local_irq_save(flags);
- if (locals->registered) {
- locals->registered = false;
- user_return_notifier_unregister(urn);
- }
- local_irq_restore(flags);
- for (slot = 0; slot < shared_msrs_global.nr; ++slot) {
- values = &locals->values[slot];
- if (values->host != values->curr) {
- wrmsrl(shared_msrs_global.msrs[slot], values->host);
- values->curr = values->host;
- }
- }
-}
-
-static void shared_msr_update(unsigned slot, u32 msr)
-{
- u64 value;
- unsigned int cpu = smp_processor_id();
- struct kvm_shared_msrs *smsr = per_cpu_ptr(shared_msrs, cpu);
-
- /* only read, and nobody should modify it at this time,
- * so don't need lock */
- if (slot >= shared_msrs_global.nr) {
- printk(KERN_ERR "kvm: invalid MSR slot!");
- return;
- }
- rdmsrl_safe(msr, &value);
- smsr->values[slot].host = value;
- smsr->values[slot].curr = value;
-}
-
-void kvm_define_shared_msr(unsigned slot, u32 msr)
-{
- BUG_ON(slot >= KVM_NR_SHARED_MSRS);
- shared_msrs_global.msrs[slot] = msr;
- if (slot >= shared_msrs_global.nr)
- shared_msrs_global.nr = slot + 1;
-}
-EXPORT_SYMBOL_GPL(kvm_define_shared_msr);
-
-static void kvm_shared_msr_cpu_online(void)
-{
- unsigned i;
-
- for (i = 0; i < shared_msrs_global.nr; ++i)
- shared_msr_update(i, shared_msrs_global.msrs[i]);
-}
-
-int kvm_set_shared_msr(unsigned slot, u64 value, u64 mask)
-{
- unsigned int cpu = smp_processor_id();
- struct kvm_shared_msrs *smsr = per_cpu_ptr(shared_msrs, cpu);
- int err;
-
- if (((value ^ smsr->values[slot].curr) & mask) == 0)
- return 0;
- smsr->values[slot].curr = value;
- err = wrmsrl_safe(shared_msrs_global.msrs[slot], value);
- if (err)
- return 1;
-
- if (!smsr->registered) {
- smsr->urn.on_user_return = kvm_on_user_return;
- user_return_notifier_register(&smsr->urn);
- smsr->registered = true;
- }
- return 0;
-}
-EXPORT_SYMBOL_GPL(kvm_set_shared_msr);
-
-static void drop_user_return_notifiers(void)
-{
- unsigned int cpu = smp_processor_id();
- struct kvm_shared_msrs *smsr = per_cpu_ptr(shared_msrs, cpu);
-
- if (smsr->registered)
- kvm_on_user_return(&smsr->urn);
-}
-
u64 kvm_get_apic_base(struct kvm_vcpu *vcpu)
{
return vcpu->arch.apic_base;
}
-EXPORT_SYMBOL_GPL(kvm_get_apic_base);
int kvm_set_apic_base(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
{
@@ -323,14 +105,6 @@ int kvm_set_apic_base(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
kvm_lapic_set_base(vcpu, msr_info->data);
return 0;
}
-EXPORT_SYMBOL_GPL(kvm_set_apic_base);
-
-asmlinkage __visible void kvm_spurious_fault(void)
-{
- /* Fault while not rebooting. We want the trace. */
- BUG();
-}
-EXPORT_SYMBOL_GPL(kvm_spurious_fault);
#define EXCPT_BENIGN 0
#define EXCPT_CONTRIBUTORY 1
@@ -385,7 +159,7 @@ static void kvm_multiple_exception(struct kvm_vcpu *vcpu,
u32 prev_nr;
int class1, class2;
- kvm_make_request(KVM_REQ_EVENT, vcpu);
+ kvm_make_request(GVM_REQ_EVENT, vcpu);
if (!vcpu->arch.exception.pending) {
queue:
@@ -403,7 +177,7 @@ static void kvm_multiple_exception(struct kvm_vcpu *vcpu,
prev_nr = vcpu->arch.exception.nr;
if (prev_nr == DF_VECTOR) {
/* triple fault -> shutdown */
- kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
+ kvm_make_request(GVM_REQ_TRIPLE_FAULT, vcpu);
return;
}
class1 = exception_class(prev_nr);
@@ -426,13 +200,11 @@ void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr)
{
kvm_multiple_exception(vcpu, nr, false, 0, false);
}
-EXPORT_SYMBOL_GPL(kvm_queue_exception);
void kvm_requeue_exception(struct kvm_vcpu *vcpu, unsigned nr)
{
kvm_multiple_exception(vcpu, nr, false, 0, true);
}
-EXPORT_SYMBOL_GPL(kvm_requeue_exception);
void kvm_complete_insn_gp(struct kvm_vcpu *vcpu, int err)
{
@@ -441,7 +213,6 @@ void kvm_complete_insn_gp(struct kvm_vcpu *vcpu, int err)
else
kvm_x86_ops->skip_emulated_instruction(vcpu);
}
-EXPORT_SYMBOL_GPL(kvm_complete_insn_gp);
void kvm_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault)
{
@@ -449,7 +220,6 @@ void kvm_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault)
vcpu->arch.cr2 = fault->address;
kvm_queue_exception_e(vcpu, PF_VECTOR, fault->error_code);
}
-EXPORT_SYMBOL_GPL(kvm_inject_page_fault);
static bool kvm_propagate_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault)
{
@@ -464,21 +234,18 @@ static bool kvm_propagate_fault(struct kvm_vcpu *vcpu, struct x86_exception *fau
void kvm_inject_nmi(struct kvm_vcpu *vcpu)
{
atomic_inc(&vcpu->arch.nmi_queued);
- kvm_make_request(KVM_REQ_NMI, vcpu);
+ kvm_make_request(GVM_REQ_NMI, vcpu);
}
-EXPORT_SYMBOL_GPL(kvm_inject_nmi);
void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code)
{
kvm_multiple_exception(vcpu, nr, true, error_code, false);
}
-EXPORT_SYMBOL_GPL(kvm_queue_exception_e);
void kvm_requeue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code)
{
kvm_multiple_exception(vcpu, nr, true, error_code, true);
}
-EXPORT_SYMBOL_GPL(kvm_requeue_exception_e);
/*
* Checks if cpl <= required_cpl; if true, return true. Otherwise queue
@@ -491,7 +258,6 @@ bool kvm_require_cpl(struct kvm_vcpu *vcpu, int required_cpl)
kvm_queue_exception_e(vcpu, GP_VECTOR, 0);
return false;
}
-EXPORT_SYMBOL_GPL(kvm_require_cpl);
bool kvm_require_dr(struct kvm_vcpu *vcpu, int dr)
{
@@ -501,7 +267,6 @@ bool kvm_require_dr(struct kvm_vcpu *vcpu, int dr)
kvm_queue_exception(vcpu, UD_VECTOR);
return false;
}
-EXPORT_SYMBOL_GPL(kvm_require_dr);
/*
* This function will be used to read from the physical memory of the currently
@@ -525,7 +290,6 @@ int kvm_read_guest_page_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
return kvm_vcpu_read_guest_page(vcpu, real_gfn, data, offset, len);
}
-EXPORT_SYMBOL_GPL(kvm_read_guest_page_mmu);
static int kvm_read_nested_guest_page(struct kvm_vcpu *vcpu, gfn_t gfn,
void *data, int offset, int len, u32 access)
@@ -537,7 +301,7 @@ static int kvm_read_nested_guest_page(struct kvm_vcpu *vcpu, gfn_t gfn,
/*
* Load the pae pdptrs. Return true is they are all valid.
*/
-int load_pdptrs(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, unsigned long cr3)
+int load_pdptrs(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, size_t cr3)
{
gfn_t pdpt_gfn = cr3 >> PAGE_SHIFT;
unsigned offset = ((cr3 & (PAGE_SIZE-1)) >> 5) << 2;
@@ -564,14 +328,13 @@ int load_pdptrs(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, unsigned long cr3)
memcpy(mmu->pdptrs, pdpte, sizeof(mmu->pdptrs));
__set_bit(VCPU_EXREG_PDPTR,
- (unsigned long *)&vcpu->arch.regs_avail);
+ (size_t *)&vcpu->arch.regs_avail);
__set_bit(VCPU_EXREG_PDPTR,
- (unsigned long *)&vcpu->arch.regs_dirty);
+ (size_t *)&vcpu->arch.regs_dirty);
out:
return ret;
}
-EXPORT_SYMBOL_GPL(load_pdptrs);
static bool pdptrs_changed(struct kvm_vcpu *vcpu)
{
@@ -585,7 +348,7 @@ static bool pdptrs_changed(struct kvm_vcpu *vcpu)
return false;
if (!test_bit(VCPU_EXREG_PDPTR,
- (unsigned long *)&vcpu->arch.regs_avail))
+ (size_t *)&vcpu->arch.regs_avail))
return true;
gfn = (kvm_read_cr3(vcpu) & ~31u) >> PAGE_SHIFT;
@@ -600,10 +363,10 @@ out:
return changed;
}
-int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
+int kvm_set_cr0(struct kvm_vcpu *vcpu, size_t cr0)
{
- unsigned long old_cr0 = kvm_read_cr0(vcpu);
- unsigned long update_bits = X86_CR0_PG | X86_CR0_WP;
+ size_t old_cr0 = kvm_read_cr0(vcpu);
+ size_t update_bits = X86_CR0_PG | X86_CR0_WP;
cr0 |= X86_CR0_ET;
@@ -642,28 +405,21 @@ int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
kvm_x86_ops->set_cr0(vcpu, cr0);
- if ((cr0 ^ old_cr0) & X86_CR0_PG) {
- kvm_clear_async_pf_completion_queue(vcpu);
- kvm_async_pf_hash_reset(vcpu);
- }
-
if ((cr0 ^ old_cr0) & update_bits)
kvm_mmu_reset_context(vcpu);
if (((cr0 ^ old_cr0) & X86_CR0_CD) &&
- kvm_arch_has_noncoherent_dma(vcpu->kvm) &&
- !kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_CD_NW_CLEARED))
+ //kvm_arch_has_noncoherent_dma(vcpu->kvm) &&
+ !kvm_check_has_quirk(vcpu->kvm, GVM_X86_QUIRK_CD_NW_CLEARED))
kvm_zap_gfn_range(vcpu->kvm, 0, ~0ULL);
return 0;
}
-EXPORT_SYMBOL_GPL(kvm_set_cr0);
-void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw)
+void kvm_lmsw(struct kvm_vcpu *vcpu, size_t msw)
{
(void)kvm_set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~0x0eul) | (msw & 0x0f));
}
-EXPORT_SYMBOL_GPL(kvm_lmsw);
static void kvm_load_guest_xcr0(struct kvm_vcpu *vcpu)
{
@@ -733,12 +489,11 @@ int kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
}
return 0;
}
-EXPORT_SYMBOL_GPL(kvm_set_xcr);
-int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
+int kvm_set_cr4(struct kvm_vcpu *vcpu, size_t cr4)
{
- unsigned long old_cr4 = kvm_read_cr4(vcpu);
- unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE |
+ size_t old_cr4 = kvm_read_cr4(vcpu);
+ size_t pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE |
X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_PKE;
if (cr4 & CR4_RESERVED_BITS)
@@ -789,9 +544,8 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
return 0;
}
-EXPORT_SYMBOL_GPL(kvm_set_cr4);
-int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
+int kvm_set_cr3(struct kvm_vcpu *vcpu, size_t cr3)
{
#ifdef CONFIG_X86_64
cr3 &= ~CR3_PCID_INVD;
@@ -799,7 +553,7 @@ int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
if (cr3 == kvm_read_cr3(vcpu) && !pdptrs_changed(vcpu)) {
kvm_mmu_sync_roots(vcpu);
- kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
+ kvm_make_request(GVM_REQ_TLB_FLUSH, vcpu);
return 0;
}
@@ -815,9 +569,8 @@ int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
kvm_mmu_new_cr3(vcpu);
return 0;
}
-EXPORT_SYMBOL_GPL(kvm_set_cr3);
-int kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8)
+int kvm_set_cr8(struct kvm_vcpu *vcpu, size_t cr8)
{
if (cr8 & CR8_RESERVED_BITS)
return 1;
@@ -827,46 +580,44 @@ int kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8)
vcpu->arch.cr8 = cr8;
return 0;
}
-EXPORT_SYMBOL_GPL(kvm_set_cr8);
-unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu)
+size_t kvm_get_cr8(struct kvm_vcpu *vcpu)
{
if (lapic_in_kernel(vcpu))
return kvm_lapic_get_cr8(vcpu);
else
return vcpu->arch.cr8;
}
-EXPORT_SYMBOL_GPL(kvm_get_cr8);
static void kvm_update_dr0123(struct kvm_vcpu *vcpu)
{
int i;
- if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) {
- for (i = 0; i < KVM_NR_DB_REGS; i++)
+ if (!(vcpu->guest_debug & GVM_GUESTDBG_USE_HW_BP)) {
+ for (i = 0; i < GVM_NR_DB_REGS; i++)
vcpu->arch.eff_db[i] = vcpu->arch.db[i];
- vcpu->arch.switch_db_regs |= KVM_DEBUGREG_RELOAD;
+ vcpu->arch.switch_db_regs |= GVM_DEBUGREG_RELOAD;
}
}
static void kvm_update_dr6(struct kvm_vcpu *vcpu)
{
- if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP))
+ if (!(vcpu->guest_debug & GVM_GUESTDBG_USE_HW_BP))
kvm_x86_ops->set_dr6(vcpu, vcpu->arch.dr6);
}
static void kvm_update_dr7(struct kvm_vcpu *vcpu)
{
- unsigned long dr7;
+ size_t dr7;
- if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
+ if (vcpu->guest_debug & GVM_GUESTDBG_USE_HW_BP)
dr7 = vcpu->arch.guest_debug_dr7;
else
dr7 = vcpu->arch.dr7;
kvm_x86_ops->set_dr7(vcpu, dr7);
- vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_BP_ENABLED;
+ vcpu->arch.switch_db_regs &= ~GVM_DEBUGREG_BP_ENABLED;
if (dr7 & DR7_BP_EN_MASK)
- vcpu->arch.switch_db_regs |= KVM_DEBUGREG_BP_ENABLED;
+ vcpu->arch.switch_db_regs |= GVM_DEBUGREG_BP_ENABLED;
}
static u64 kvm_dr6_fixed(struct kvm_vcpu *vcpu)
@@ -878,12 +629,15 @@ static u64 kvm_dr6_fixed(struct kvm_vcpu *vcpu)
return fixed;
}
-static int __kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val)
+static int __kvm_set_dr(struct kvm_vcpu *vcpu, int dr, size_t val)
{
switch (dr) {
- case 0 ... 3:
+ case 0:
+ case 1:
+ case 2:
+ case 3:
vcpu->arch.db[dr] = val;
- if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP))
+ if (!(vcpu->guest_debug & GVM_GUESTDBG_USE_HW_BP))
vcpu->arch.eff_db[dr] = val;
break;
case 4:
@@ -907,7 +661,7 @@ static int __kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val)
return 0;
}
-int kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val)
+int kvm_set_dr(struct kvm_vcpu *vcpu, int dr, size_t val)
{
if (__kvm_set_dr(vcpu, dr, val)) {
kvm_inject_gp(vcpu, 0);
@@ -915,18 +669,20 @@ int kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val)
}
return 0;
}
-EXPORT_SYMBOL_GPL(kvm_set_dr);
-int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val)
+int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, size_t *val)
{
switch (dr) {
- case 0 ... 3:
+ case 0:
+ case 1:
+ case 2:
+ case 3:
*val = vcpu->arch.db[dr];
break;
case 4:
/* fall through */
case 6:
- if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
+ if (vcpu->guest_debug & GVM_GUESTDBG_USE_HW_BP)
*val = vcpu->arch.dr6;
else
*val = kvm_x86_ops->get_dr6(vcpu);
@@ -939,8 +695,8 @@ int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val)
}
return 0;
}
-EXPORT_SYMBOL_GPL(kvm_get_dr);
+#if 0
bool kvm_rdpmc(struct kvm_vcpu *vcpu)
{
u32 ecx = kvm_register_read(vcpu, VCPU_REGS_RCX);
@@ -954,11 +710,11 @@ bool kvm_rdpmc(struct kvm_vcpu *vcpu)
kvm_register_write(vcpu, VCPU_REGS_RDX, data >> 32);
return err;
}
-EXPORT_SYMBOL_GPL(kvm_rdpmc);
+#endif
/*
- * List of msr numbers which we expose to userspace through KVM_GET_MSRS
- * and KVM_SET_MSRS, and KVM_GET_MSR_INDEX_LIST.
+ * List of msr numbers which we expose to userspace through GVM_GET_MSRS
+ * and GVM_SET_MSRS, and GVM_GET_MSR_INDEX_LIST.
*
* This list is modified at module load time to reflect the
* capabilities of the host cpu. This capabilities test skips MSRs that are
@@ -972,45 +728,19 @@ static u32 msrs_to_save[] = {
#ifdef CONFIG_X86_64
MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR,
#endif
- MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA,
- MSR_IA32_FEATURE_CONTROL, MSR_IA32_BNDCFGS, MSR_TSC_AUX,
+ MSR_IA32_TSC, MSR_IA32_CR_PAT, //MSR_VM_HSAVE_PA,
+ MSR_IA32_FEATURE_CONTROL, //MSR_IA32_BNDCFGS, MSR_TSC_AUX,
};
static unsigned num_msrs_to_save;
-static u32 emulated_msrs[] = {
- MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
- MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW,
- HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
- HV_X64_MSR_TIME_REF_COUNT, HV_X64_MSR_REFERENCE_TSC,
- HV_X64_MSR_CRASH_P0, HV_X64_MSR_CRASH_P1, HV_X64_MSR_CRASH_P2,
- HV_X64_MSR_CRASH_P3, HV_X64_MSR_CRASH_P4, HV_X64_MSR_CRASH_CTL,
- HV_X64_MSR_RESET,
- HV_X64_MSR_VP_INDEX,
- HV_X64_MSR_VP_RUNTIME,
- HV_X64_MSR_SCONTROL,
- HV_X64_MSR_STIMER0_CONFIG,
- HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME,
- MSR_KVM_PV_EOI_EN,
-
- MSR_IA32_TSC_ADJUST,
- MSR_IA32_TSCDEADLINE,
- MSR_IA32_MISC_ENABLE,
- MSR_IA32_MCG_STATUS,
- MSR_IA32_MCG_CTL,
- MSR_IA32_MCG_EXT_CTL,
- MSR_IA32_SMBASE,
-};
-
-static unsigned num_emulated_msrs;
-
bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer)
{
if (efer & efer_reserved_bits)
return false;
if (efer & EFER_FFXSR) {
- struct kvm_cpuid_entry2 *feat;
+ struct kvm_cpuid_entry *feat;
feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0);
if (!feat || !(feat->edx & bit(X86_FEATURE_FXSR_OPT)))
@@ -1018,7 +748,7 @@ bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer)
}
if (efer & EFER_SVME) {
- struct kvm_cpuid_entry2 *feat;
+ struct kvm_cpuid_entry *feat;
feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0);
if (!feat || !(feat->ecx & bit(X86_FEATURE_SVM)))
@@ -1027,7 +757,6 @@ bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer)
return true;
}
-EXPORT_SYMBOL_GPL(kvm_valid_efer);
static int set_efer(struct kvm_vcpu *vcpu, u64 efer)
{
@@ -1056,7 +785,6 @@ void kvm_enable_efer_bits(u64 mask)
{
efer_reserved_bits &= ~mask;
}
-EXPORT_SYMBOL_GPL(kvm_enable_efer_bits);
/*
* Writes msr value into into the appropriate "register".
@@ -1092,7 +820,6 @@ int kvm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
}
return kvm_x86_ops->set_msr(vcpu, msr);
}
-EXPORT_SYMBOL_GPL(kvm_set_msr);
/*
* Adapt set_msr() to msr_io()'s calling convention
@@ -1122,257 +849,22 @@ static int do_set_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
return kvm_set_msr(vcpu, &msr);
}
-#ifdef CONFIG_X86_64
-struct pvclock_gtod_data {
- seqcount_t seq;
-
- struct { /* extract of a clocksource struct */
- int vclock_mode;
- cycle_t cycle_last;
- cycle_t mask;
- u32 mult;
- u32 shift;
- } clock;
-
- u64 boot_ns;
- u64 nsec_base;
-};
-
-static struct pvclock_gtod_data pvclock_gtod_data;
-
-static void update_pvclock_gtod(struct timekeeper *tk)
-{
- struct pvclock_gtod_data *vdata = &pvclock_gtod_data;
- u64 boot_ns;
-
- boot_ns = ktime_to_ns(ktime_add(tk->tkr_mono.base, tk->offs_boot));
-
- write_seqcount_begin(&vdata->seq);
-
- /* copy pvclock gtod data */
- vdata->clock.vclock_mode = tk->tkr_mono.clock->archdata.vclock_mode;
- vdata->clock.cycle_last = tk->tkr_mono.cycle_last;
- vdata->clock.mask = tk->tkr_mono.mask;
- vdata->clock.mult = tk->tkr_mono.mult;
- vdata->clock.shift = tk->tkr_mono.shift;
-
- vdata->boot_ns = boot_ns;
- vdata->nsec_base = tk->tkr_mono.xtime_nsec;
-
- write_seqcount_end(&vdata->seq);
-}
-#endif
-
void kvm_set_pending_timer(struct kvm_vcpu *vcpu)
{
/*
- * Note: KVM_REQ_PENDING_TIMER is implicitly checked in
+ * Note: GVM_REQ_PENDING_TIMER is implicitly checked in
* vcpu_enter_guest. This function is only called from
* the physical CPU that is running vcpu.
*/
- kvm_make_request(KVM_REQ_PENDING_TIMER, vcpu);
-}
-
-static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock)
-{
- int version;
- int r;
- struct pvclock_wall_clock wc;
- struct timespec64 boot;
-
- if (!wall_clock)
- return;
-
- r = kvm_read_guest(kvm, wall_clock, &version, sizeof(version));
- if (r)
- return;
-
- if (version & 1)
- ++version; /* first time write, random junk */
-
- ++version;
-
- if (kvm_write_guest(kvm, wall_clock, &version, sizeof(version)))
- return;
-
- /*
- * The guest calculates current wall clock time by adding
- * system time (updated by kvm_guest_time_update below) to the
- * wall clock specified here. guest system time equals host
- * system time for us, thus we must fill in host boot time here.
- */
- getboottime64(&boot);
-
- if (kvm->arch.kvmclock_offset) {
- struct timespec64 ts = ns_to_timespec64(kvm->arch.kvmclock_offset);
- boot = timespec64_sub(boot, ts);
- }
- wc.sec = (u32)boot.tv_sec; /* overflow in 2106 guest time */
- wc.nsec = boot.tv_nsec;
- wc.version = version;
-
- kvm_write_guest(kvm, wall_clock, &wc, sizeof(wc));
-
- version++;
- kvm_write_guest(kvm, wall_clock, &version, sizeof(version));
-}
-
-static uint32_t div_frac(uint32_t dividend, uint32_t divisor)
-{
- do_shl32_div32(dividend, divisor);
- return dividend;
-}
-
-static void kvm_get_time_scale(uint64_t scaled_hz, uint64_t base_hz,
- s8 *pshift, u32 *pmultiplier)
-{
- uint64_t scaled64;
- int32_t shift = 0;
- uint64_t tps64;
- uint32_t tps32;
-
- tps64 = base_hz;
- scaled64 = scaled_hz;
- while (tps64 > scaled64*2 || tps64 & 0xffffffff00000000ULL) {
- tps64 >>= 1;
- shift--;
- }
-
- tps32 = (uint32_t)tps64;
- while (tps32 <= scaled64 || scaled64 & 0xffffffff00000000ULL) {
- if (scaled64 & 0xffffffff00000000ULL || tps32 & 0x80000000)
- scaled64 >>= 1;
- else
- tps32 <<= 1;
- shift++;
- }
-
- *pshift = shift;
- *pmultiplier = div_frac(scaled64, tps32);
-
- pr_debug("%s: base_hz %llu => %llu, shift %d, mul %u\n",
- __func__, base_hz, scaled_hz, shift, *pmultiplier);
+ kvm_make_request(GVM_REQ_PENDING_TIMER, vcpu);
}
#ifdef CONFIG_X86_64
static atomic_t kvm_guest_has_master_clock = ATOMIC_INIT(0);
#endif
-static DEFINE_PER_CPU(unsigned long, cpu_tsc_khz);
-static unsigned long max_tsc_khz;
-
-static u32 adjust_tsc_khz(u32 khz, s32 ppm)
-{
- u64 v = (u64)khz * (1000000 + ppm);
- do_div(v, 1000000);
- return v;
-}
-
-static int set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz, bool scale)
-{
- u64 ratio;
-
- /* Guest TSC same frequency as host TSC? */
- if (!scale) {
- vcpu->arch.tsc_scaling_ratio = kvm_default_tsc_scaling_ratio;
- return 0;
- }
-
- /* TSC scaling supported? */
- if (!kvm_has_tsc_control) {
- if (user_tsc_khz > tsc_khz) {
- vcpu->arch.tsc_catchup = 1;
- vcpu->arch.tsc_always_catchup = 1;
- return 0;
- } else {
- WARN(1, "user requested TSC rate below hardware speed\n");
- return -1;
- }
- }
-
- /* TSC scaling required - calculate ratio */
- ratio = mul_u64_u32_div(1ULL << kvm_tsc_scaling_ratio_frac_bits,
- user_tsc_khz, tsc_khz);
-
- if (ratio == 0 || ratio >= kvm_max_tsc_scaling_ratio) {
- WARN_ONCE(1, "Invalid TSC scaling ratio - virtual-tsc-khz=%u\n",
- user_tsc_khz);
- return -1;
- }
-
- vcpu->arch.tsc_scaling_ratio = ratio;
- return 0;
-}
-
-static int kvm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz)
-{
- u32 thresh_lo, thresh_hi;
- int use_scaling = 0;
-
- /* tsc_khz can be zero if TSC calibration fails */
- if (user_tsc_khz == 0) {
- /* set tsc_scaling_ratio to a safe value */
- vcpu->arch.tsc_scaling_ratio = kvm_default_tsc_scaling_ratio;
- return -1;
- }
-
- /* Compute a scale to convert nanoseconds in TSC cycles */
- kvm_get_time_scale(user_tsc_khz * 1000LL, NSEC_PER_SEC,
- &vcpu->arch.virtual_tsc_shift,
- &vcpu->arch.virtual_tsc_mult);
- vcpu->arch.virtual_tsc_khz = user_tsc_khz;
-
- /*
- * Compute the variation in TSC rate which is acceptable
- * within the range of tolerance and decide if the
- * rate being applied is within that bounds of the hardware
- * rate. If so, no scaling or compensation need be done.
- */
- thresh_lo = adjust_tsc_khz(tsc_khz, -tsc_tolerance_ppm);
- thresh_hi = adjust_tsc_khz(tsc_khz, tsc_tolerance_ppm);
- if (user_tsc_khz < thresh_lo || user_tsc_khz > thresh_hi) {
- pr_debug("kvm: requested TSC rate %u falls outside tolerance [%u,%u]\n", user_tsc_khz, thresh_lo, thresh_hi);
- use_scaling = 1;
- }
- return set_tsc_khz(vcpu, user_tsc_khz, use_scaling);
-}
-
-static u64 compute_guest_tsc(struct kvm_vcpu *vcpu, s64 kernel_ns)
-{
- u64 tsc = pvclock_scale_delta(kernel_ns-vcpu->arch.this_tsc_nsec,
- vcpu->arch.virtual_tsc_mult,
- vcpu->arch.virtual_tsc_shift);
- tsc += vcpu->arch.this_tsc_write;
- return tsc;
-}
-
-static void kvm_track_tsc_matching(struct kvm_vcpu *vcpu)
-{
-#ifdef CONFIG_X86_64
- bool vcpus_matched;
- struct kvm_arch *ka = &vcpu->kvm->arch;
- struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
-
- vcpus_matched = (ka->nr_vcpus_matched_tsc + 1 ==
- atomic_read(&vcpu->kvm->online_vcpus));
-
- /*
- * Once the masterclock is enabled, always perform request in
- * order to update it.
- *
- * In order to enable masterclock, the host clocksource must be TSC
- * and the vcpus need to have matched TSCs. When that happens,
- * perform request to enable masterclock.
- */
- if (ka->use_master_clock ||
- (gtod->clock.vclock_mode == VCLOCK_TSC && vcpus_matched))
- kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu);
-
- trace_kvm_track_tsc(vcpu->vcpu_id, ka->nr_vcpus_matched_tsc,
- atomic_read(&vcpu->kvm->online_vcpus),
- ka->use_master_clock, gtod->clock.vclock_mode);
-#endif
-}
+static DEFINE_PER_CPU(size_t, cpu_tsc_khz);
+static size_t max_tsc_khz;
static void update_ia32_tsc_adjust_msr(struct kvm_vcpu *vcpu, s64 offset)
{
@@ -1380,47 +872,19 @@ static void update_ia32_tsc_adjust_msr(struct kvm_vcpu *vcpu, s64 offset)
vcpu->arch.ia32_tsc_adjust_msr += offset - curr_offset;
}
-/*
- * Multiply tsc by a fixed point number represented by ratio.
- *
- * The most significant 64-N bits (mult) of ratio represent the
- * integral part of the fixed point number; the remaining N bits
- * (frac) represent the fractional part, ie. ratio represents a fixed
- * point number (mult + frac * 2^(-N)).
- *
- * N equals to kvm_tsc_scaling_ratio_frac_bits.
- */
-static inline u64 __scale_tsc(u64 ratio, u64 tsc)
-{
- return mul_u64_u64_shr(tsc, ratio, kvm_tsc_scaling_ratio_frac_bits);
-}
-
-u64 kvm_scale_tsc(struct kvm_vcpu *vcpu, u64 tsc)
-{
- u64 _tsc = tsc;
- u64 ratio = vcpu->arch.tsc_scaling_ratio;
-
- if (ratio != kvm_default_tsc_scaling_ratio)
- _tsc = __scale_tsc(ratio, tsc);
-
- return _tsc;
-}
-EXPORT_SYMBOL_GPL(kvm_scale_tsc);
-
static u64 kvm_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc)
{
u64 tsc;
- tsc = kvm_scale_tsc(vcpu, rdtsc());
+ tsc = __rdtsc();
return target_tsc - tsc;
}
u64 kvm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc)
{
- return vcpu->arch.tsc_offset + kvm_scale_tsc(vcpu, host_tsc);
+ return vcpu->arch.tsc_offset + host_tsc;
}
-EXPORT_SYMBOL_GPL(kvm_read_l1_tsc);
static void kvm_vcpu_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
{
@@ -1430,128 +894,19 @@ static void kvm_vcpu_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr)
{
- struct kvm *kvm = vcpu->kvm;
- u64 offset, ns, elapsed;
- unsigned long flags;
- s64 usdiff;
- bool matched;
- bool already_matched;
+ u64 offset;
+ //size_t flags;
u64 data = msr->data;
- raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags);
+ //spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags);
offset = kvm_compute_tsc_offset(vcpu, data);
- ns = ktime_get_boot_ns();
- elapsed = ns - kvm->arch.last_tsc_nsec;
-
- if (vcpu->arch.virtual_tsc_khz) {
- int faulted = 0;
-
- /* n.b - signed multiplication and division required */
- usdiff = data - kvm->arch.last_tsc_write;
-#ifdef CONFIG_X86_64
- usdiff = (usdiff * 1000) / vcpu->arch.virtual_tsc_khz;
-#else
- /* do_div() only does unsigned */
- asm("1: idivl %[divisor]\n"
- "2: xor %%edx, %%edx\n"
- " movl $0, %[faulted]\n"
- "3:\n"
- ".section .fixup,\"ax\"\n"
- "4: movl $1, %[faulted]\n"
- " jmp 3b\n"
- ".previous\n"
-
- _ASM_EXTABLE(1b, 4b)
-
- : "=A"(usdiff), [faulted] "=r" (faulted)
- : "A"(usdiff * 1000), [divisor] "rm"(vcpu->arch.virtual_tsc_khz));
-
-#endif
- do_div(elapsed, 1000);
- usdiff -= elapsed;
- if (usdiff < 0)
- usdiff = -usdiff;
-
- /* idivl overflow => difference is larger than USEC_PER_SEC */
- if (faulted)
- usdiff = USEC_PER_SEC;
- } else
- usdiff = USEC_PER_SEC; /* disable TSC match window below */
-
- /*
- * Special case: TSC write with a small delta (1 second) of virtual
- * cycle time against real time is interpreted as an attempt to
- * synchronize the CPU.
- *
- * For a reliable TSC, we can match TSC offsets, and for an unstable
- * TSC, we add elapsed time in this computation. We could let the
- * compensation code attempt to catch up if we fall behind, but
- * it's better to try to match offsets from the beginning.
- */
- if (usdiff < USEC_PER_SEC &&
- vcpu->arch.virtual_tsc_khz == kvm->arch.last_tsc_khz) {
- if (!check_tsc_unstable()) {
- offset = kvm->arch.cur_tsc_offset;
- pr_debug("kvm: matched tsc offset for %llu\n", data);
- } else {
- u64 delta = nsec_to_cycles(vcpu, elapsed);
- data += delta;
- offset = kvm_compute_tsc_offset(vcpu, data);
- pr_debug("kvm: adjusted tsc offset by %llu\n", delta);
- }
- matched = true;
- already_matched = (vcpu->arch.this_tsc_generation == kvm->arch.cur_tsc_generation);
- } else {
- /*
- * We split periods of matched TSC writes into generations.
- * For each generation, we track the original measured
- * nanosecond time, offset, and write, so if TSCs are in
- * sync, we can match exact offset, and if not, we can match
- * exact software computation in compute_guest_tsc()
- *
- * These values are tracked in kvm->arch.cur_xxx variables.
- */
- kvm->arch.cur_tsc_generation++;
- kvm->arch.cur_tsc_nsec = ns;
- kvm->arch.cur_tsc_write = data;
- kvm->arch.cur_tsc_offset = offset;
- matched = false;
- pr_debug("kvm: new tsc generation %llu, clock %llu\n",
- kvm->arch.cur_tsc_generation, data);
- }
-
- /*
- * We also track th most recent recorded KHZ, write and time to
- * allow the matching interval to be extended at each write.
- */
- kvm->arch.last_tsc_nsec = ns;
- kvm->arch.last_tsc_write = data;
- kvm->arch.last_tsc_khz = vcpu->arch.virtual_tsc_khz;
-
- vcpu->arch.last_guest_tsc = data;
-
- /* Keep track of which generation this VCPU has synchronized to */
- vcpu->arch.this_tsc_generation = kvm->arch.cur_tsc_generation;
- vcpu->arch.this_tsc_nsec = kvm->arch.cur_tsc_nsec;
- vcpu->arch.this_tsc_write = kvm->arch.cur_tsc_write;
-
if (guest_cpuid_has_tsc_adjust(vcpu) && !msr->host_initiated)
update_ia32_tsc_adjust_msr(vcpu, offset);
kvm_vcpu_write_tsc_offset(vcpu, offset);
- raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags);
-
- spin_lock(&kvm->arch.pvclock_gtod_sync_lock);
- if (!matched) {
- kvm->arch.nr_vcpus_matched_tsc = 0;
- } else if (!already_matched) {
- kvm->arch.nr_vcpus_matched_tsc++;
- }
+ //spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags);
- kvm_track_tsc_matching(vcpu);
- spin_unlock(&kvm->arch.pvclock_gtod_sync_lock);
}
-EXPORT_SYMBOL_GPL(kvm_write_tsc);
static inline void adjust_tsc_offset_guest(struct kvm_vcpu *vcpu,
s64 adjustment)
@@ -1559,549 +914,16 @@ static inline void adjust_tsc_offset_guest(struct kvm_vcpu *vcpu,
kvm_vcpu_write_tsc_offset(vcpu, vcpu->arch.tsc_offset + adjustment);
}
-static inline void adjust_tsc_offset_host(struct kvm_vcpu *vcpu, s64 adjustment)
-{
- if (vcpu->arch.tsc_scaling_ratio != kvm_default_tsc_scaling_ratio)
- WARN_ON(adjustment < 0);
- adjustment = kvm_scale_tsc(vcpu, (u64) adjustment);
- adjust_tsc_offset_guest(vcpu, adjustment);
-}
-
-#ifdef CONFIG_X86_64
-
-static cycle_t read_tsc(void)
-{
- cycle_t ret = (cycle_t)rdtsc_ordered();
- u64 last = pvclock_gtod_data.clock.cycle_last;
-
- if (likely(ret >= last))
- return ret;
-
- /*
- * GCC likes to generate cmov here, but this branch is extremely
- * predictable (it's just a function of time and the likely is
- * very likely) and there's a data dependence, so force GCC
- * to generate a branch instead. I don't barrier() because
- * we don't actually need a barrier, and if this function
- * ever gets inlined it will generate worse code.
- */
- asm volatile ("");
- return last;
-}
-
-static inline u64 vgettsc(cycle_t *cycle_now)
-{
- long v;
- struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
-
- *cycle_now = read_tsc();
-
- v = (*cycle_now - gtod->clock.cycle_last) & gtod->clock.mask;
- return v * gtod->clock.mult;
-}
-
-static int do_monotonic_boot(s64 *t, cycle_t *cycle_now)
-{
- struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
- unsigned long seq;
- int mode;
- u64 ns;
-
- do {
- seq = read_seqcount_begin(&gtod->seq);
- mode = gtod->clock.vclock_mode;
- ns = gtod->nsec_base;
- ns += vgettsc(cycle_now);
- ns >>= gtod->clock.shift;
- ns += gtod->boot_ns;
- } while (unlikely(read_seqcount_retry(&gtod->seq, seq)));
- *t = ns;
-
- return mode;
-}
-
-/* returns true if host is using tsc clocksource */
-static bool kvm_get_time_and_clockread(s64 *kernel_ns, cycle_t *cycle_now)
-{
- /* checked again under seqlock below */
- if (pvclock_gtod_data.clock.vclock_mode != VCLOCK_TSC)
- return false;
-
- return do_monotonic_boot(kernel_ns, cycle_now) == VCLOCK_TSC;
-}
-#endif
-
-/*
- *
- * Assuming a stable TSC across physical CPUS, and a stable TSC
- * across virtual CPUs, the following condition is possible.
- * Each numbered line represents an event visible to both
- * CPUs at the next numbered event.
- *
- * "timespecX" represents host monotonic time. "tscX" represents
- * RDTSC value.
- *
- * VCPU0 on CPU0 | VCPU1 on CPU1
- *
- * 1. read timespec0,tsc0
- * 2. | timespec1 = timespec0 + N
- * | tsc1 = tsc0 + M
- * 3. transition to guest | transition to guest
- * 4. ret0 = timespec0 + (rdtsc - tsc0) |
- * 5. | ret1 = timespec1 + (rdtsc - tsc1)
- * | ret1 = timespec0 + N + (rdtsc - (tsc0 + M))
- *
- * Since ret0 update is visible to VCPU1 at time 5, to obey monotonicity:
- *
- * - ret0 < ret1
- * - timespec0 + (rdtsc - tsc0) < timespec0 + N + (rdtsc - (tsc0 + M))
- * ...
- * - 0 < N - M => M < N
- *
- * That is, when timespec0 != timespec1, M < N. Unfortunately that is not
- * always the case (the difference between two distinct xtime instances
- * might be smaller then the difference between corresponding TSC reads,
- * when updating guest vcpus pvclock areas).
- *
- * To avoid that problem, do not allow visibility of distinct
- * system_timestamp/tsc_timestamp values simultaneously: use a master
- * copy of host monotonic time values. Update that master copy
- * in lockstep.
- *
- * Rely on synchronization of host TSCs and guest TSCs for monotonicity.
- *
- */
-
-static void pvclock_update_vm_gtod_copy(struct kvm *kvm)
-{
-#ifdef CONFIG_X86_64
- struct kvm_arch *ka = &kvm->arch;
- int vclock_mode;
- bool host_tsc_clocksource, vcpus_matched;
-
- vcpus_matched = (ka->nr_vcpus_matched_tsc + 1 ==
- atomic_read(&kvm->online_vcpus));
-
- /*
- * If the host uses TSC clock, then passthrough TSC as stable
- * to the guest.
- */
- host_tsc_clocksource = kvm_get_time_and_clockread(
- &ka->master_kernel_ns,
- &ka->master_cycle_now);
-
- ka->use_master_clock = host_tsc_clocksource && vcpus_matched
- && !backwards_tsc_observed
- && !ka->boot_vcpu_runs_old_kvmclock;
-
- if (ka->use_master_clock)
- atomic_set(&kvm_guest_has_master_clock, 1);
-
- vclock_mode = pvclock_gtod_data.clock.vclock_mode;
- trace_kvm_update_master_clock(ka->use_master_clock, vclock_mode,
- vcpus_matched);
-#endif
-}
-
-void kvm_make_mclock_inprogress_request(struct kvm *kvm)
-{
- kvm_make_all_cpus_request(kvm, KVM_REQ_MCLOCK_INPROGRESS);
-}
-
-static void kvm_gen_update_masterclock(struct kvm *kvm)
-{
-#ifdef CONFIG_X86_64
- int i;
- struct kvm_vcpu *vcpu;
- struct kvm_arch *ka = &kvm->arch;
-
- spin_lock(&ka->pvclock_gtod_sync_lock);
- kvm_make_mclock_inprogress_request(kvm);
- /* no guest entries from this point */
- pvclock_update_vm_gtod_copy(kvm);
-
- kvm_for_each_vcpu(i, vcpu, kvm)
- kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
-
- /* guest entries allowed */
- kvm_for_each_vcpu(i, vcpu, kvm)
- clear_bit(KVM_REQ_MCLOCK_INPROGRESS, &vcpu->requests);
-
- spin_unlock(&ka->pvclock_gtod_sync_lock);
-#endif
-}
-
-static u64 __get_kvmclock_ns(struct kvm *kvm)
-{
- struct kvm_arch *ka = &kvm->arch;
- struct pvclock_vcpu_time_info hv_clock;
-
- spin_lock(&ka->pvclock_gtod_sync_lock);
- if (!ka->use_master_clock) {
- spin_unlock(&ka->pvclock_gtod_sync_lock);
- return ktime_get_boot_ns() + ka->kvmclock_offset;
- }
-
- hv_clock.tsc_timestamp = ka->master_cycle_now;
- hv_clock.system_time = ka->master_kernel_ns + ka->kvmclock_offset;
- spin_unlock(&ka->pvclock_gtod_sync_lock);
-
- kvm_get_time_scale(NSEC_PER_SEC, __this_cpu_read(cpu_tsc_khz) * 1000LL,
- &hv_clock.tsc_shift,
- &hv_clock.tsc_to_system_mul);
- return __pvclock_read_cycles(&hv_clock, rdtsc());
-}
-
-u64 get_kvmclock_ns(struct kvm *kvm)
-{
- unsigned long flags;
- s64 ns;
-
- local_irq_save(flags);
- ns = __get_kvmclock_ns(kvm);
- local_irq_restore(flags);
-
- return ns;
-}
-
-static void kvm_setup_pvclock_page(struct kvm_vcpu *v)
-{
- struct kvm_vcpu_arch *vcpu = &v->arch;
- struct pvclock_vcpu_time_info guest_hv_clock;
-
- if (unlikely(kvm_read_guest_cached(v->kvm, &vcpu->pv_time,
- &guest_hv_clock, sizeof(guest_hv_clock))))
- return;
-
- /* This VCPU is paused, but it's legal for a guest to read another
- * VCPU's kvmclock, so we really have to follow the specification where
- * it says that version is odd if data is being modified, and even after
- * it is consistent.
- *
- * Version field updates must be kept separate. This is because
- * kvm_write_guest_cached might use a "rep movs" instruction, and
- * writes within a string instruction are weakly ordered. So there
- * are three writes overall.
- *
- * As a small optimization, only write the version field in the first
- * and third write. The vcpu->pv_time cache is still valid, because the
- * version field is the first in the struct.
- */
- BUILD_BUG_ON(offsetof(struct pvclock_vcpu_time_info, version) != 0);
-
- vcpu->hv_clock.version = guest_hv_clock.version + 1;
- kvm_write_guest_cached(v->kvm, &vcpu->pv_time,
- &vcpu->hv_clock,
- sizeof(vcpu->hv_clock.version));
-
- smp_wmb();
-
- /* retain PVCLOCK_GUEST_STOPPED if set in guest copy */
- vcpu->hv_clock.flags |= (guest_hv_clock.flags & PVCLOCK_GUEST_STOPPED);
-
- if (vcpu->pvclock_set_guest_stopped_request) {
- vcpu->hv_clock.flags |= PVCLOCK_GUEST_STOPPED;
- vcpu->pvclock_set_guest_stopped_request = false;
- }
-
- trace_kvm_pvclock_update(v->vcpu_id, &vcpu->hv_clock);
-
- kvm_write_guest_cached(v->kvm, &vcpu->pv_time,
- &vcpu->hv_clock,
- sizeof(vcpu->hv_clock));
-
- smp_wmb();
-
- vcpu->hv_clock.version++;
- kvm_write_guest_cached(v->kvm, &vcpu->pv_time,
- &vcpu->hv_clock,
- sizeof(vcpu->hv_clock.version));
-}
-
-static int kvm_guest_time_update(struct kvm_vcpu *v)
-{
- unsigned long flags, tgt_tsc_khz;
- struct kvm_vcpu_arch *vcpu = &v->arch;
- struct kvm_arch *ka = &v->kvm->arch;
- s64 kernel_ns;
- u64 tsc_timestamp, host_tsc;
- u8 pvclock_flags;
- bool use_master_clock;
-
- kernel_ns = 0;
- host_tsc = 0;
-
- /*
- * If the host uses TSC clock, then passthrough TSC as stable
- * to the guest.
- */
- spin_lock(&ka->pvclock_gtod_sync_lock);
- use_master_clock = ka->use_master_clock;
- if (use_master_clock) {
- host_tsc = ka->master_cycle_now;
- kernel_ns = ka->master_kernel_ns;
- }
- spin_unlock(&ka->pvclock_gtod_sync_lock);
-
- /* Keep irq disabled to prevent changes to the clock */
- local_irq_save(flags);
- tgt_tsc_khz = __this_cpu_read(cpu_tsc_khz);
- if (unlikely(tgt_tsc_khz == 0)) {
- local_irq_restore(flags);
- kvm_make_request(KVM_REQ_CLOCK_UPDATE, v);
- return 1;
- }
- if (!use_master_clock) {
- host_tsc = rdtsc();
- kernel_ns = ktime_get_boot_ns();
- }
-
- tsc_timestamp = kvm_read_l1_tsc(v, host_tsc);
-
- /*
- * We may have to catch up the TSC to match elapsed wall clock
- * time for two reasons, even if kvmclock is used.
- * 1) CPU could have been running below the maximum TSC rate
- * 2) Broken TSC compensation resets the base at each VCPU
- * entry to avoid unknown leaps of TSC even when running
- * again on the same CPU. This may cause apparent elapsed
- * time to disappear, and the guest to stand still or run
- * very slowly.
- */
- if (vcpu->tsc_catchup) {
- u64 tsc = compute_guest_tsc(v, kernel_ns);
- if (tsc > tsc_timestamp) {
- adjust_tsc_offset_guest(v, tsc - tsc_timestamp);
- tsc_timestamp = tsc;
- }
- }
-
- local_irq_restore(flags);
-
- /* With all the info we got, fill in the values */
-
- if (kvm_has_tsc_control)
- tgt_tsc_khz = kvm_scale_tsc(v, tgt_tsc_khz);
-
- if (unlikely(vcpu->hw_tsc_khz != tgt_tsc_khz)) {
- kvm_get_time_scale(NSEC_PER_SEC, tgt_tsc_khz * 1000LL,
- &vcpu->hv_clock.tsc_shift,
- &vcpu->hv_clock.tsc_to_system_mul);
- vcpu->hw_tsc_khz = tgt_tsc_khz;
- }
-
- vcpu->hv_clock.tsc_timestamp = tsc_timestamp;
- vcpu->hv_clock.system_time = kernel_ns + v->kvm->arch.kvmclock_offset;
- vcpu->last_guest_tsc = tsc_timestamp;
-
- /* If the host uses TSC clocksource, then it is stable */
- pvclock_flags = 0;
- if (use_master_clock)
- pvclock_flags |= PVCLOCK_TSC_STABLE_BIT;
-
- vcpu->hv_clock.flags = pvclock_flags;
-
- if (vcpu->pv_time_enabled)
- kvm_setup_pvclock_page(v);
- if (v == kvm_get_vcpu(v->kvm, 0))
- kvm_hv_setup_tsc_page(v->kvm, &vcpu->hv_clock);
- return 0;
-}
-
-/*
- * kvmclock updates which are isolated to a given vcpu, such as
- * vcpu->cpu migration, should not allow system_timestamp from
- * the rest of the vcpus to remain static. Otherwise ntp frequency
- * correction applies to one vcpu's system_timestamp but not
- * the others.
- *
- * So in those cases, request a kvmclock update for all vcpus.
- * We need to rate-limit these requests though, as they can
- * considerably slow guests that have a large number of vcpus.
- * The time for a remote vcpu to update its kvmclock is bound
- * by the delay we use to rate-limit the updates.
- */
-
-#define KVMCLOCK_UPDATE_DELAY msecs_to_jiffies(100)
-
-static void kvmclock_update_fn(struct work_struct *work)
-{
- int i;
- struct delayed_work *dwork = to_delayed_work(work);
- struct kvm_arch *ka = container_of(dwork, struct kvm_arch,
- kvmclock_update_work);
- struct kvm *kvm = container_of(ka, struct kvm, arch);
- struct kvm_vcpu *vcpu;
-
- kvm_for_each_vcpu(i, vcpu, kvm) {
- kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
- kvm_vcpu_kick(vcpu);
- }
-}
-
-static void kvm_gen_kvmclock_update(struct kvm_vcpu *v)
-{
- struct kvm *kvm = v->kvm;
-
- kvm_make_request(KVM_REQ_CLOCK_UPDATE, v);
- schedule_delayed_work(&kvm->arch.kvmclock_update_work,
- KVMCLOCK_UPDATE_DELAY);
-}
-
-#define KVMCLOCK_SYNC_PERIOD (300 * HZ)
-
-static void kvmclock_sync_fn(struct work_struct *work)
-{
- struct delayed_work *dwork = to_delayed_work(work);
- struct kvm_arch *ka = container_of(dwork, struct kvm_arch,
- kvmclock_sync_work);
- struct kvm *kvm = container_of(ka, struct kvm, arch);
-
- if (!kvmclock_periodic_sync)
- return;
-
- schedule_delayed_work(&kvm->arch.kvmclock_update_work, 0);
- schedule_delayed_work(&kvm->arch.kvmclock_sync_work,
- KVMCLOCK_SYNC_PERIOD);
-}
-
-static int set_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 data)
-{
- u64 mcg_cap = vcpu->arch.mcg_cap;
- unsigned bank_num = mcg_cap & 0xff;
-
- switch (msr) {
- case MSR_IA32_MCG_STATUS:
- vcpu->arch.mcg_status = data;
- break;
- case MSR_IA32_MCG_CTL:
- if (!(mcg_cap & MCG_CTL_P))
- return 1;
- if (data != 0 && data != ~(u64)0)
- return -1;
- vcpu->arch.mcg_ctl = data;
- break;
- default:
- if (msr >= MSR_IA32_MC0_CTL &&
- msr < MSR_IA32_MCx_CTL(bank_num)) {
- u32 offset = msr - MSR_IA32_MC0_CTL;
- /* only 0 or all 1s can be written to IA32_MCi_CTL
- * some Linux kernels though clear bit 10 in bank 4 to
- * workaround a BIOS/GART TBL issue on AMD K8s, ignore
- * this to avoid an uncatched #GP in the guest
- */
- if ((offset & 0x3) == 0 &&
- data != 0 && (data | (1 << 10)) != ~(u64)0)
- return -1;
- vcpu->arch.mce_banks[offset] = data;
- break;
- }
- return 1;
- }
- return 0;
-}
-
-static int xen_hvm_config(struct kvm_vcpu *vcpu, u64 data)
-{
- struct kvm *kvm = vcpu->kvm;
- int lm = is_long_mode(vcpu);
- u8 *blob_addr = lm ? (u8 *)(long)kvm->arch.xen_hvm_config.blob_addr_64
- : (u8 *)(long)kvm->arch.xen_hvm_config.blob_addr_32;
- u8 blob_size = lm ? kvm->arch.xen_hvm_config.blob_size_64
- : kvm->arch.xen_hvm_config.blob_size_32;
- u32 page_num = data & ~PAGE_MASK;
- u64 page_addr = data & PAGE_MASK;
- u8 *page;
- int r;
-
- r = -E2BIG;
- if (page_num >= blob_size)
- goto out;
- r = -ENOMEM;
- page = memdup_user(blob_addr + (page_num * PAGE_SIZE), PAGE_SIZE);
- if (IS_ERR(page)) {
- r = PTR_ERR(page);
- goto out;
- }
- if (kvm_vcpu_write_guest(vcpu, page_addr, page, PAGE_SIZE))
- goto out_free;
- r = 0;
-out_free:
- kfree(page);
-out:
- return r;
-}
-
-static int kvm_pv_enable_async_pf(struct kvm_vcpu *vcpu, u64 data)
-{
- gpa_t gpa = data & ~0x3f;
-
- /* Bits 2:5 are reserved, Should be zero */
- if (data & 0x3c)
- return 1;
-
- vcpu->arch.apf.msr_val = data;
-
- if (!(data & KVM_ASYNC_PF_ENABLED)) {
- kvm_clear_async_pf_completion_queue(vcpu);
- kvm_async_pf_hash_reset(vcpu);
- return 0;
- }
-
- if (kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.apf.data, gpa,
- sizeof(u32)))
- return 1;
-
- vcpu->arch.apf.send_user_only = !(data & KVM_ASYNC_PF_SEND_ALWAYS);
- kvm_async_pf_wakeup_all(vcpu);
- return 0;
-}
-
-static void kvmclock_reset(struct kvm_vcpu *vcpu)
-{
- vcpu->arch.pv_time_enabled = false;
-}
-
-static void record_steal_time(struct kvm_vcpu *vcpu)
-{
- if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
- return;
-
- if (unlikely(kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.st.stime,
- &vcpu->arch.st.steal, sizeof(struct kvm_steal_time))))
- return;
-
- if (vcpu->arch.st.steal.version & 1)
- vcpu->arch.st.steal.version += 1; /* first time write, random junk */
-
- vcpu->arch.st.steal.version += 1;
-
- kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.st.stime,
- &vcpu->arch.st.steal, sizeof(struct kvm_steal_time));
-
- smp_wmb();
-
- vcpu->arch.st.steal.steal += current->sched_info.run_delay -
- vcpu->arch.st.last_steal;
- vcpu->arch.st.last_steal = current->sched_info.run_delay;
-
- kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.st.stime,
- &vcpu->arch.st.steal, sizeof(struct kvm_steal_time));
-
- smp_wmb();
-
- vcpu->arch.st.steal.version += 1;
-
- kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.st.stime,
- &vcpu->arch.st.steal, sizeof(struct kvm_steal_time));
-}
-
int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
{
- bool pr = false;
+ //bool pr = false;
u32 msr = msr_info->index;
u64 data = msr_info->data;
+ if (msr >= 0x200 && msr <= 0x2ff)
+ return kvm_mtrr_set_msr(vcpu, msr, data);
+ if (msr >= APIC_BASE_MSR && msr <= (APIC_BASE_MSR + 0x3ff))
+ return kvm_x2apic_msr_write(vcpu, msr, data);
switch (msr) {
case MSR_AMD64_NB_CFG:
case MSR_IA32_UCODE_REV:
@@ -2143,15 +965,8 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
vcpu_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTLMSR 0x%llx, nop\n",
__func__, data);
break;
- case 0x200 ... 0x2ff:
- return kvm_mtrr_set_msr(vcpu, msr, data);
case MSR_IA32_APICBASE:
return kvm_set_apic_base(vcpu, msr_info);
- case APIC_BASE_MSR ... APIC_BASE_MSR + 0x3ff:
- return kvm_x2apic_msr_write(vcpu, msr, data);
- case MSR_IA32_TSCDEADLINE:
- kvm_set_lapic_tscdeadline_msr(vcpu, data);
- break;
case MSR_IA32_TSC_ADJUST:
if (guest_cpuid_has_tsc_adjust(vcpu)) {
if (!msr_info->host_initiated) {
@@ -2169,81 +984,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
return 1;
vcpu->arch.smbase = data;
break;
- case MSR_KVM_WALL_CLOCK_NEW:
- case MSR_KVM_WALL_CLOCK:
- vcpu->kvm->arch.wall_clock = data;
- kvm_write_wall_clock(vcpu->kvm, data);
- break;
- case MSR_KVM_SYSTEM_TIME_NEW:
- case MSR_KVM_SYSTEM_TIME: {
- u64 gpa_offset;
- struct kvm_arch *ka = &vcpu->kvm->arch;
-
- kvmclock_reset(vcpu);
-
- if (vcpu->vcpu_id == 0 && !msr_info->host_initiated) {
- bool tmp = (msr == MSR_KVM_SYSTEM_TIME);
-
- if (ka->boot_vcpu_runs_old_kvmclock != tmp)
- set_bit(KVM_REQ_MASTERCLOCK_UPDATE,
- &vcpu->requests);
-
- ka->boot_vcpu_runs_old_kvmclock = tmp;
- }
-
- vcpu->arch.time = data;
- kvm_make_request(KVM_REQ_GLOBAL_CLOCK_UPDATE, vcpu);
-
- /* we verify if the enable bit is set... */
- if (!(data & 1))
- break;
-
- gpa_offset = data & ~(PAGE_MASK | 1);
-
- if (kvm_gfn_to_hva_cache_init(vcpu->kvm,
- &vcpu->arch.pv_time, data & ~1ULL,
- sizeof(struct pvclock_vcpu_time_info)))
- vcpu->arch.pv_time_enabled = false;
- else
- vcpu->arch.pv_time_enabled = true;
-
- break;
- }
- case MSR_KVM_ASYNC_PF_EN:
- if (kvm_pv_enable_async_pf(vcpu, data))
- return 1;
- break;
- case MSR_KVM_STEAL_TIME:
-
- if (unlikely(!sched_info_on()))
- return 1;
-
- if (data & KVM_STEAL_RESERVED_MASK)
- return 1;
-
- if (kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.st.stime,
- data & KVM_STEAL_VALID_BITS,
- sizeof(struct kvm_steal_time)))
- return 1;
-
- vcpu->arch.st.msr_val = data;
-
- if (!(data & KVM_MSR_ENABLED))
- break;
-
- kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu);
-
- break;
- case MSR_KVM_PV_EOI_EN:
- if (kvm_lapic_enable_pv_eoi(vcpu, data))
- return 1;
- break;
-
- case MSR_IA32_MCG_CTL:
- case MSR_IA32_MCG_STATUS:
- case MSR_IA32_MC0_CTL ... MSR_IA32_MCx_CTL(KVM_MAX_MCE_BANKS) - 1:
- return set_msr_mce(vcpu, msr, data);
-
+#if 0
case MSR_K7_PERFCTR0 ... MSR_K7_PERFCTR3:
case MSR_P6_PERFCTR0 ... MSR_P6_PERFCTR1:
pr = true; /* fall through */
@@ -2256,6 +997,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
vcpu_unimpl(vcpu, "disabled perfctr wrmsr: "
"0x%x data 0x%llx\n", msr, data);
break;
+#endif
case MSR_K7_CLK_CTL:
/*
* Ignore all writes to this no longer documented MSR.
@@ -2266,18 +1008,14 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
* the need to ignore the workaround.
*/
break;
- case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15:
- case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4:
- case HV_X64_MSR_CRASH_CTL:
- case HV_X64_MSR_STIMER0_CONFIG ... HV_X64_MSR_STIMER3_COUNT:
- return kvm_hv_set_msr_common(vcpu, msr, data,
- msr_info->host_initiated);
+#if 0
case MSR_IA32_BBL_CR_CTL3:
/* Drop writes to this legacy MSR -- see rdmsr
* counterpart for further detail.
*/
vcpu_unimpl(vcpu, "ignored wrmsr: 0x%x data 0x%llx\n", msr, data);
break;
+#endif
case MSR_AMD64_OSVW_ID_LENGTH:
if (!guest_cpuid_has_osvw(vcpu))
return 1;
@@ -2289,8 +1027,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
vcpu->arch.osvw.status = data;
break;
default:
- if (msr && (msr == vcpu->kvm->arch.xen_hvm_config.msr))
- return xen_hvm_config(vcpu, data);
+#if 0
if (kvm_pmu_is_valid_msr(vcpu, msr))
return kvm_pmu_set_msr(vcpu, msr_info);
if (!ignore_msrs) {
@@ -2302,10 +1039,11 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
msr, data);
break;
}
+#endif
+ break;
}
return 0;
}
-EXPORT_SYMBOL_GPL(kvm_set_msr_common);
/*
@@ -2317,45 +1055,13 @@ int kvm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
{
return kvm_x86_ops->get_msr(vcpu, msr);
}
-EXPORT_SYMBOL_GPL(kvm_get_msr);
-
-static int get_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
-{
- u64 data;
- u64 mcg_cap = vcpu->arch.mcg_cap;
- unsigned bank_num = mcg_cap & 0xff;
-
- switch (msr) {
- case MSR_IA32_P5_MC_ADDR:
- case MSR_IA32_P5_MC_TYPE:
- data = 0;
- break;
- case MSR_IA32_MCG_CAP:
- data = vcpu->arch.mcg_cap;
- break;
- case MSR_IA32_MCG_CTL:
- if (!(mcg_cap & MCG_CTL_P))
- return 1;
- data = vcpu->arch.mcg_ctl;
- break;
- case MSR_IA32_MCG_STATUS:
- data = vcpu->arch.mcg_status;
- break;
- default:
- if (msr >= MSR_IA32_MC0_CTL &&
- msr < MSR_IA32_MCx_CTL(bank_num)) {
- u32 offset = msr - MSR_IA32_MC0_CTL;
- data = vcpu->arch.mce_banks[offset];
- break;
- }
- return 1;
- }
- *pdata = data;
- return 0;
-}
int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
{
+ if (msr_info->index >= 0x200 && msr_info->index <= 0x2ff)
+ return kvm_mtrr_get_msr(vcpu, msr_info->index, &msr_info->data);
+ if (msr_info->index >= APIC_BASE_MSR && msr_info->index <= (APIC_BASE_MSR + 0x3ff))
+ return kvm_x2apic_msr_read(vcpu, msr_info->index, &msr_info->data);
switch (msr_info->index) {
case MSR_IA32_PLATFORM_ID:
case MSR_IA32_EBL_CR_POWERON:
@@ -2376,6 +1082,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
case MSR_IA32_PERF_CTL:
msr_info->data = 0;
break;
+#if 0
case MSR_K7_EVNTSEL0 ... MSR_K7_EVNTSEL3:
case MSR_K7_PERFCTR0 ... MSR_K7_PERFCTR3:
case MSR_P6_PERFCTR0 ... MSR_P6_PERFCTR1:
@@ -2384,11 +1091,11 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
return kvm_pmu_get_msr(vcpu, msr_info->index, &msr_info->data);
msr_info->data = 0;
break;
+#endif
case MSR_IA32_UCODE_REV:
msr_info->data = 0x100000000ULL;
break;
case MSR_MTRRcap:
- case 0x200 ... 0x2ff:
return kvm_mtrr_get_msr(vcpu, msr_info->index, &msr_info->data);
case 0xcd: /* fsb frequency */
msr_info->data = 3;
@@ -2410,12 +1117,6 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
case MSR_IA32_APICBASE:
msr_info->data = kvm_get_apic_base(vcpu);
break;
- case APIC_BASE_MSR ... APIC_BASE_MSR + 0x3ff:
- return kvm_x2apic_msr_read(vcpu, msr_info->index, &msr_info->data);
- break;
- case MSR_IA32_TSCDEADLINE:
- msr_info->data = kvm_get_lapic_tscdeadline_msr(vcpu);
- break;
case MSR_IA32_TSC_ADJUST:
msr_info->data = (u64)vcpu->arch.ia32_tsc_adjust_msr;
break;
@@ -2436,30 +1137,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
case MSR_EFER:
msr_info->data = vcpu->arch.efer;
break;
- case MSR_KVM_WALL_CLOCK:
- case MSR_KVM_WALL_CLOCK_NEW:
- msr_info->data = vcpu->kvm->arch.wall_clock;
- break;
- case MSR_KVM_SYSTEM_TIME:
- case MSR_KVM_SYSTEM_TIME_NEW:
- msr_info->data = vcpu->arch.time;
- break;
- case MSR_KVM_ASYNC_PF_EN:
- msr_info->data = vcpu->arch.apf.msr_val;
- break;
- case MSR_KVM_STEAL_TIME:
- msr_info->data = vcpu->arch.st.msr_val;
- break;
- case MSR_KVM_PV_EOI_EN:
- msr_info->data = vcpu->arch.pv_eoi.msr_val;
- break;
- case MSR_IA32_P5_MC_ADDR:
- case MSR_IA32_P5_MC_TYPE:
- case MSR_IA32_MCG_CAP:
- case MSR_IA32_MCG_CTL:
- case MSR_IA32_MCG_STATUS:
- case MSR_IA32_MC0_CTL ... MSR_IA32_MCx_CTL(KVM_MAX_MCE_BANKS) - 1:
- return get_msr_mce(vcpu, msr_info->index, &msr_info->data);
+#if 0
case MSR_K7_CLK_CTL:
/*
* Provide expected ramp-up count for K7. All other
@@ -2472,13 +1150,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
*/
msr_info->data = 0x20000000;
break;
- case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15:
- case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4:
- case HV_X64_MSR_CRASH_CTL:
- case HV_X64_MSR_STIMER0_CONFIG ... HV_X64_MSR_STIMER3_COUNT:
- return kvm_hv_get_msr_common(vcpu,
- msr_info->index, &msr_info->data);
- break;
+#endif
case MSR_IA32_BBL_CR_CTL3:
/* This legacy MSR exists but isn't fully documented in current
* silicon. It is however accessed by winxp in very narrow
@@ -2503,6 +1175,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
msr_info->data = vcpu->arch.osvw.status;
break;
default:
+#if 0
if (kvm_pmu_is_valid_msr(vcpu, msr_info->index))
return kvm_pmu_get_msr(vcpu, msr_info->index, &msr_info->data);
if (!ignore_msrs) {
@@ -2513,10 +1186,11 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
msr_info->data = 0;
}
break;
+#endif
+ break;
}
return 0;
}
-EXPORT_SYMBOL_GPL(kvm_get_msr_common);
/*
* Read or write a bunch of msrs. All parameters are kernel addresses.
@@ -2544,7 +1218,8 @@ static int __msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs *msrs,
*
* @return number of msrs set successfully.
*/
-static int msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs __user *user_msrs,
+static int msr_io(PIRP pIrp, struct kvm_vcpu *vcpu,
+ struct kvm_msrs __user *user_msrs,
int (*do_msr)(struct kvm_vcpu *vcpu,
unsigned index, u64 *data),
int writeback)
@@ -2573,10 +1248,17 @@ static int msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs __user *user_msrs,
if (r < 0)
goto out_free;
- r = -EFAULT;
- if (writeback && copy_to_user(user_msrs->entries, entries, size))
+ /* write back n of msrs handled here*/
+ r = gvmUpdateReturnBuffer(pIrp, 0, &n, sizeof(n));
+ if (r)
goto out_free;
+ if (writeback) {
+ r = gvmUpdateReturnBuffer(pIrp, sizeof(msrs), entries, size);
+ if (r)
+ goto out_free;
+ }
+
r = n;
out_free:
@@ -2590,56 +1272,30 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
int r;
switch (ext) {
- case KVM_CAP_IRQCHIP:
- case KVM_CAP_HLT:
- case KVM_CAP_MMU_SHADOW_CACHE_CONTROL:
- case KVM_CAP_SET_TSS_ADDR:
- case KVM_CAP_EXT_CPUID:
- case KVM_CAP_EXT_EMUL_CPUID:
- case KVM_CAP_CLOCKSOURCE:
- case KVM_CAP_PIT:
- case KVM_CAP_NOP_IO_DELAY:
- case KVM_CAP_MP_STATE:
- case KVM_CAP_SYNC_MMU:
- case KVM_CAP_USER_NMI:
- case KVM_CAP_REINJECT_CONTROL:
- case KVM_CAP_IRQ_INJECT_STATUS:
- case KVM_CAP_IOEVENTFD:
- case KVM_CAP_IOEVENTFD_NO_LENGTH:
- case KVM_CAP_PIT2:
- case KVM_CAP_PIT_STATE2:
- case KVM_CAP_SET_IDENTITY_MAP_ADDR:
- case KVM_CAP_XEN_HVM:
- case KVM_CAP_VCPU_EVENTS:
- case KVM_CAP_HYPERV:
- case KVM_CAP_HYPERV_VAPIC:
- case KVM_CAP_HYPERV_SPIN:
- case KVM_CAP_HYPERV_SYNIC:
- case KVM_CAP_PCI_SEGMENT:
- case KVM_CAP_DEBUGREGS:
- case KVM_CAP_X86_ROBUST_SINGLESTEP:
- case KVM_CAP_XSAVE:
- case KVM_CAP_ASYNC_PF:
- case KVM_CAP_GET_TSC_KHZ:
- case KVM_CAP_KVMCLOCK_CTRL:
- case KVM_CAP_READONLY_MEM:
- case KVM_CAP_HYPERV_TIME:
- case KVM_CAP_IOAPIC_POLARITY_IGNORED:
- case KVM_CAP_TSC_DEADLINE_TIMER:
- case KVM_CAP_ENABLE_CAP_VM:
- case KVM_CAP_DISABLE_QUIRKS:
- case KVM_CAP_SET_BOOT_CPU_ID:
- case KVM_CAP_SPLIT_IRQCHIP:
-#ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
- case KVM_CAP_ASSIGN_DEV_IRQ:
- case KVM_CAP_PCI_2_3:
-#endif
+ case GVM_CAP_IRQCHIP:
+ case GVM_CAP_HLT:
+ case GVM_CAP_MMU_SHADOW_CACHE_CONTROL:
+ case GVM_CAP_EXT_EMUL_CPUID:
+ case GVM_CAP_NOP_IO_DELAY:
+ case GVM_CAP_SYNC_MMU:
+ case GVM_CAP_USER_NMI:
+ case GVM_CAP_REINJECT_CONTROL:
+ case GVM_CAP_SET_IDENTITY_MAP_ADDR:
+ case GVM_CAP_VCPU_EVENTS:
r = 1;
break;
- case KVM_CAP_ADJUST_CLOCK:
- r = KVM_CLOCK_TSC_STABLE;
+ case GVM_CAP_PCI_SEGMENT:
+ case GVM_CAP_DEBUGREGS:
+ case GVM_CAP_X86_ROBUST_SINGLESTEP:
+ case GVM_CAP_XSAVE:
+ case GVM_CAP_READONLY_MEM:
+ case GVM_CAP_IOAPIC_POLARITY_IGNORED:
+ case GVM_CAP_ENABLE_CAP_VM:
+ case GVM_CAP_DISABLE_QUIRKS:
+ case GVM_CAP_SET_BOOT_CPU_ID:
+ r = 0;
break;
- case KVM_CAP_X86_SMM:
+ case GVM_CAP_X86_SMM:
/* SMBASE is usually relocated above 1M on modern chipsets,
* and SMM handlers might indeed rely on 4G segment limits,
* so do not report SMM to be available if real mode is
@@ -2650,41 +1306,21 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
*/
r = kvm_x86_ops->cpu_has_high_real_mode_segbase();
break;
- case KVM_CAP_COALESCED_MMIO:
- r = KVM_COALESCED_MMIO_PAGE_OFFSET;
- break;
- case KVM_CAP_VAPIC:
+ case GVM_CAP_VAPIC:
r = !kvm_x86_ops->cpu_has_accelerated_tpr();
break;
- case KVM_CAP_NR_VCPUS:
- r = KVM_SOFT_MAX_VCPUS;
- break;
- case KVM_CAP_MAX_VCPUS:
- r = KVM_MAX_VCPUS;
- break;
- case KVM_CAP_NR_MEMSLOTS:
- r = KVM_USER_MEM_SLOTS;
- break;
- case KVM_CAP_PV_MMU: /* obsolete */
- r = 0;
+ case GVM_CAP_NR_VCPUS:
+ r = GVM_SOFT_MAX_VCPUS;
break;
-#ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
- case KVM_CAP_IOMMU:
- r = iommu_present(&pci_bus_type);
+ case GVM_CAP_MAX_VCPUS:
+ r = GVM_MAX_VCPUS;
break;
-#endif
- case KVM_CAP_MCE:
- r = KVM_MAX_MCE_BANKS;
+ case GVM_CAP_NR_MEMSLOTS:
+ r = GVM_USER_MEM_SLOTS;
break;
- case KVM_CAP_XCRS:
+ case GVM_CAP_XCRS:
r = boot_cpu_has(X86_FEATURE_XSAVE);
break;
- case KVM_CAP_TSC_CONTROL:
- r = kvm_has_tsc_control;
- break;
- case KVM_CAP_X2APIC_API:
- r = KVM_X2APIC_API_VALID_FLAGS;
- break;
default:
r = 0;
break;
@@ -2693,64 +1329,53 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
}
-long kvm_arch_dev_ioctl(struct file *filp,
- unsigned int ioctl, unsigned long arg)
+long kvm_arch_dev_ioctl(struct gvm_device_extension *devext,
+ PIRP pIrp, unsigned int ioctl)
{
- void __user *argp = (void __user *)arg;
+ void __user *argp = (void __user *)pIrp->AssociatedIrp.SystemBuffer;
+ size_t args = IoGetCurrentIrpStackLocation(pIrp)->Parameters.DeviceIoControl.InputBufferLength;
long r;
switch (ioctl) {
- case KVM_GET_MSR_INDEX_LIST: {
- struct kvm_msr_list __user *user_msr_list = argp;
- struct kvm_msr_list msr_list;
+ case GVM_GET_MSR_INDEX_LIST: {
+ struct kvm_msr_list *msr_list = argp;
unsigned n;
- r = -EFAULT;
- if (copy_from_user(&msr_list, user_msr_list, sizeof msr_list))
- goto out;
- n = msr_list.nmsrs;
- msr_list.nmsrs = num_msrs_to_save + num_emulated_msrs;
- if (copy_to_user(user_msr_list, &msr_list, sizeof msr_list))
- goto out;
- r = -E2BIG;
- if (n < msr_list.nmsrs)
+ if (args < sizeof(struct kvm_msr_list)) {
+ r = -EINVAL;
goto out;
- r = -EFAULT;
- if (copy_to_user(user_msr_list->indices, &msrs_to_save,
- num_msrs_to_save * sizeof(u32)))
+ }
+
+ r = STATUS_SUCCESS;
+ n = msr_list->nmsrs;
+ __u32 nmsrs = num_msrs_to_save;
+ r = gvmUpdateReturnBuffer(pIrp, 0, &nmsrs, sizeof(nmsrs));
+ if (r)
goto out;
- if (copy_to_user(user_msr_list->indices + num_msrs_to_save,
- &emulated_msrs,
- num_emulated_msrs * sizeof(u32)))
+
+ if (n < nmsrs) {
+ r = -E2BIG;
goto out;
- r = 0;
+ }
+
+ r = gvmUpdateReturnBuffer(pIrp, sizeof(nmsrs), &msrs_to_save,
+ num_msrs_to_save * sizeof(u32));
break;
}
- case KVM_GET_SUPPORTED_CPUID:
- case KVM_GET_EMULATED_CPUID: {
- struct kvm_cpuid2 __user *cpuid_arg = argp;
- struct kvm_cpuid2 cpuid;
+ case GVM_GET_SUPPORTED_CPUID:
+ case GVM_GET_EMULATED_CPUID: {
+ struct kvm_cpuid __user *cpuid_arg = argp;
+ struct kvm_cpuid cpuid;
r = -EFAULT;
if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
goto out;
- r = kvm_dev_ioctl_get_cpuid(&cpuid, cpuid_arg->entries,
+ r = kvm_dev_ioctl_get_cpuid(pIrp, &cpuid, cpuid_arg->entries,
ioctl);
if (r)
goto out;
- r = -EFAULT;
- if (copy_to_user(cpuid_arg, &cpuid, sizeof cpuid))
- goto out;
- r = 0;
- break;
- }
- case KVM_X86_GET_MCE_CAP_SUPPORTED: {
- r = -EFAULT;
- if (copy_to_user(argp, &kvm_mce_cap_supported,
- sizeof(kvm_mce_cap_supported)))
- goto out;
r = 0;
break;
}
@@ -2761,84 +1386,20 @@ out:
return r;
}
-static void wbinvd_ipi(void *garbage)
-{
- wbinvd();
-}
-
-static bool need_emulate_wbinvd(struct kvm_vcpu *vcpu)
-{
- return kvm_arch_has_noncoherent_dma(vcpu->kvm);
-}
-
-static inline void kvm_migrate_timers(struct kvm_vcpu *vcpu)
-{
- set_bit(KVM_REQ_MIGRATE_TIMER, &vcpu->requests);
-}
-
void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{
- /* Address WBINVD may be executed by guest */
- if (need_emulate_wbinvd(vcpu)) {
- if (kvm_x86_ops->has_wbinvd_exit())
- cpumask_set_cpu(cpu, vcpu->arch.wbinvd_dirty_mask);
- else if (vcpu->cpu != -1 && vcpu->cpu != cpu)
- smp_call_function_single(vcpu->cpu,
- wbinvd_ipi, NULL, 1);
- }
-
kvm_x86_ops->vcpu_load(vcpu, cpu);
-
- /* Apply any externally detected TSC adjustments (due to suspend) */
- if (unlikely(vcpu->arch.tsc_offset_adjustment)) {
- adjust_tsc_offset_host(vcpu, vcpu->arch.tsc_offset_adjustment);
- vcpu->arch.tsc_offset_adjustment = 0;
- kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
- }
-
- if (unlikely(vcpu->cpu != cpu) || check_tsc_unstable()) {
- s64 tsc_delta = !vcpu->arch.last_host_tsc ? 0 :
- rdtsc() - vcpu->arch.last_host_tsc;
- if (tsc_delta < 0)
- mark_tsc_unstable("KVM discovered backwards TSC");
-
- if (check_tsc_unstable()) {
- u64 offset = kvm_compute_tsc_offset(vcpu,
- vcpu->arch.last_guest_tsc);
- kvm_vcpu_write_tsc_offset(vcpu, offset);
- vcpu->arch.tsc_catchup = 1;
- }
- if (kvm_lapic_hv_timer_in_use(vcpu) &&
- kvm_x86_ops->set_hv_timer(vcpu,
- kvm_get_lapic_tscdeadline_msr(vcpu)))
- kvm_lapic_switch_to_sw_timer(vcpu);
- /*
- * On a host with synchronized TSC, there is no need to update
- * kvmclock on vcpu->cpu migration
- */
- if (!vcpu->kvm->arch.use_master_clock || vcpu->cpu == -1)
- kvm_make_request(KVM_REQ_GLOBAL_CLOCK_UPDATE, vcpu);
- if (vcpu->cpu != cpu)
- kvm_migrate_timers(vcpu);
- vcpu->cpu = cpu;
- }
-
- kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu);
+ vcpu->cpu = cpu;
}
void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
{
kvm_x86_ops->vcpu_put(vcpu);
- kvm_put_guest_fpu(vcpu);
- vcpu->arch.last_host_tsc = rdtsc();
}
static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu,
struct kvm_lapic_state *s)
{
- if (vcpu->arch.apicv_active)
- kvm_x86_ops->sync_pir_to_irr(vcpu);
-
return kvm_apic_get_state(vcpu, s);
}
@@ -2878,12 +1439,12 @@ static int kvm_vcpu_ready_for_interrupt_injection(struct kvm_vcpu *vcpu)
static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu,
struct kvm_interrupt *irq)
{
- if (irq->irq >= KVM_NR_INTERRUPTS)
+ if (irq->irq >= GVM_NR_INTERRUPTS)
return -EINVAL;
if (!irqchip_in_kernel(vcpu->kvm)) {
kvm_queue_interrupt(vcpu, irq->irq, false);
- kvm_make_request(KVM_REQ_EVENT, vcpu);
+ kvm_make_request(GVM_REQ_EVENT, vcpu);
return 0;
}
@@ -2898,7 +1459,7 @@ static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu,
return -EEXIST;
vcpu->arch.pending_external_vector = irq->irq;
- kvm_make_request(KVM_REQ_EVENT, vcpu);
+ kvm_make_request(GVM_REQ_EVENT, vcpu);
return 0;
}
@@ -2911,7 +1472,7 @@ static int kvm_vcpu_ioctl_nmi(struct kvm_vcpu *vcpu)
static int kvm_vcpu_ioctl_smi(struct kvm_vcpu *vcpu)
{
- kvm_make_request(KVM_REQ_SMI, vcpu);
+ kvm_make_request(GVM_REQ_SMI, vcpu);
return 0;
}
@@ -2925,80 +1486,6 @@ static int vcpu_ioctl_tpr_access_reporting(struct kvm_vcpu *vcpu,
return 0;
}
-static int kvm_vcpu_ioctl_x86_setup_mce(struct kvm_vcpu *vcpu,
- u64 mcg_cap)
-{
- int r;
- unsigned bank_num = mcg_cap & 0xff, bank;
-
- r = -EINVAL;
- if (!bank_num || bank_num >= KVM_MAX_MCE_BANKS)
- goto out;
- if (mcg_cap & ~(kvm_mce_cap_supported | 0xff | 0xff0000))
- goto out;
- r = 0;
- vcpu->arch.mcg_cap = mcg_cap;
- /* Init IA32_MCG_CTL to all 1s */
- if (mcg_cap & MCG_CTL_P)
- vcpu->arch.mcg_ctl = ~(u64)0;
- /* Init IA32_MCi_CTL to all 1s */
- for (bank = 0; bank < bank_num; bank++)
- vcpu->arch.mce_banks[bank*4] = ~(u64)0;
-
- if (kvm_x86_ops->setup_mce)
- kvm_x86_ops->setup_mce(vcpu);
-out:
- return r;
-}
-
-static int kvm_vcpu_ioctl_x86_set_mce(struct kvm_vcpu *vcpu,
- struct kvm_x86_mce *mce)
-{
- u64 mcg_cap = vcpu->arch.mcg_cap;
- unsigned bank_num = mcg_cap & 0xff;
- u64 *banks = vcpu->arch.mce_banks;
-
- if (mce->bank >= bank_num || !(mce->status & MCI_STATUS_VAL))
- return -EINVAL;
- /*
- * if IA32_MCG_CTL is not all 1s, the uncorrected error
- * reporting is disabled
- */
- if ((mce->status & MCI_STATUS_UC) && (mcg_cap & MCG_CTL_P) &&
- vcpu->arch.mcg_ctl != ~(u64)0)
- return 0;
- banks += 4 * mce->bank;
- /*
- * if IA32_MCi_CTL is not all 1s, the uncorrected error
- * reporting is disabled for the bank
- */
- if ((mce->status & MCI_STATUS_UC) && banks[0] != ~(u64)0)
- return 0;
- if (mce->status & MCI_STATUS_UC) {
- if ((vcpu->arch.mcg_status & MCG_STATUS_MCIP) ||
- !kvm_read_cr4_bits(vcpu, X86_CR4_MCE)) {
- kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
- return 0;
- }
- if (banks[1] & MCI_STATUS_VAL)
- mce->status |= MCI_STATUS_OVER;
- banks[2] = mce->addr;
- banks[3] = mce->misc;
- vcpu->arch.mcg_status = mce->mcg_status;
- banks[1] = mce->status;
- kvm_queue_exception(vcpu, MC_VECTOR);
- } else if (!(banks[1] & MCI_STATUS_VAL)
- || !(banks[1] & MCI_STATUS_UC)) {
- if (banks[1] & MCI_STATUS_VAL)
- mce->status |= MCI_STATUS_OVER;
- banks[2] = mce->addr;
- banks[3] = mce->misc;
- banks[1] = mce->status;
- } else
- banks[1] |= MCI_STATUS_OVER;
- return 0;
-}
-
static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,
struct kvm_vcpu_events *events)
{
@@ -3030,19 +1517,19 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,
!!(vcpu->arch.hflags & HF_SMM_INSIDE_NMI_MASK);
events->smi.latched_init = kvm_lapic_latched_init(vcpu);
- events->flags = (KVM_VCPUEVENT_VALID_NMI_PENDING
- | KVM_VCPUEVENT_VALID_SHADOW
- | KVM_VCPUEVENT_VALID_SMM);
+ events->flags = (GVM_VCPUEVENT_VALID_NMI_PENDING
+ | GVM_VCPUEVENT_VALID_SHADOW
+ | GVM_VCPUEVENT_VALID_SMM);
memset(&events->reserved, 0, sizeof(events->reserved));
}
static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
struct kvm_vcpu_events *events)
{
- if (events->flags & ~(KVM_VCPUEVENT_VALID_NMI_PENDING
- | KVM_VCPUEVENT_VALID_SIPI_VECTOR
- | KVM_VCPUEVENT_VALID_SHADOW
- | KVM_VCPUEVENT_VALID_SMM))
+ if (events->flags & ~(GVM_VCPUEVENT_VALID_NMI_PENDING
+ | GVM_VCPUEVENT_VALID_SIPI_VECTOR
+ | GVM_VCPUEVENT_VALID_SHADOW
+ | GVM_VCPUEVENT_VALID_SMM))
return -EINVAL;
if (events->exception.injected &&
@@ -3058,20 +1545,20 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
vcpu->arch.interrupt.pending = events->interrupt.injected;
vcpu->arch.interrupt.nr = events->interrupt.nr;
vcpu->arch.interrupt.soft = events->interrupt.soft;
- if (events->flags & KVM_VCPUEVENT_VALID_SHADOW)
+ if (events->flags & GVM_VCPUEVENT_VALID_SHADOW)
kvm_x86_ops->set_interrupt_shadow(vcpu,
events->interrupt.shadow);
vcpu->arch.nmi_injected = events->nmi.injected;
- if (events->flags & KVM_VCPUEVENT_VALID_NMI_PENDING)
+ if (events->flags & GVM_VCPUEVENT_VALID_NMI_PENDING)
vcpu->arch.nmi_pending = events->nmi.pending;
kvm_x86_ops->set_nmi_mask(vcpu, events->nmi.masked);
- if (events->flags & KVM_VCPUEVENT_VALID_SIPI_VECTOR &&
+ if (events->flags & GVM_VCPUEVENT_VALID_SIPI_VECTOR &&
lapic_in_kernel(vcpu))
vcpu->arch.apic->sipi_vector = events->sipi_vector;
- if (events->flags & KVM_VCPUEVENT_VALID_SMM) {
+ if (events->flags & GVM_VCPUEVENT_VALID_SMM) {
if (events->smi.smm)
vcpu->arch.hflags |= HF_SMM_MASK;
else
@@ -3083,13 +1570,13 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
vcpu->arch.hflags &= ~HF_SMM_INSIDE_NMI_MASK;
if (lapic_in_kernel(vcpu)) {
if (events->smi.latched_init)
- set_bit(KVM_APIC_INIT, &vcpu->arch.apic->pending_events);
+ set_bit(GVM_APIC_INIT, &vcpu->arch.apic->pending_events);
else
- clear_bit(KVM_APIC_INIT, &vcpu->arch.apic->pending_events);
+ clear_bit(GVM_APIC_INIT, &vcpu->arch.apic->pending_events);
}
}
- kvm_make_request(KVM_REQ_EVENT, vcpu);
+ kvm_make_request(GVM_REQ_EVENT, vcpu);
return 0;
}
@@ -3097,7 +1584,7 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
static void kvm_vcpu_ioctl_x86_get_debugregs(struct kvm_vcpu *vcpu,
struct kvm_debugregs *dbgregs)
{
- unsigned long val;
+ size_t val;
memcpy(dbgregs->db, vcpu->arch.db, sizeof(vcpu->arch.db));
kvm_get_dr(vcpu, 6, &val);
@@ -3128,11 +1615,87 @@ static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu,
return 0;
}
+u64 xfeatures_mask;
+static unsigned int xstate_offsets[XFEATURE_MAX] = { 0 };
+static unsigned int xstate_sizes[XFEATURE_MAX] = { 0 };
+static unsigned int xstate_comp_offsets[sizeof(xfeatures_mask)*8];
+
+/*
+ * Note that in the future we will likely need a pair of
+ * functions here: one for user xstates and the other for
+ * system xstates. For now, they are the same.
+ */
+static int xfeature_enabled(enum xfeature xfeature)
+{
+ return !!(xfeatures_mask & ((u64)1 << xfeature));
+}
+
+/*
+ * Given an xstate feature mask, calculate where in the xsave
+ * buffer the state is. Callers should ensure that the buffer
+ * is valid.
+ *
+ * Note: does not work for compacted buffers.
+ */
+static void *__raw_xsave_addr(struct xregs_state *xsave, int xstate_feature_mask)
+{
+ int feature_nr = fls64(xstate_feature_mask) - 1;
+
+ if (!xfeature_enabled(feature_nr)) {
+ return NULL;
+ }
+
+ return (u8 *)xsave + xstate_comp_offsets[feature_nr];
+}
+
+/*
+ * Given the xsave area and a state inside, this function returns the
+ * address of the state.
+ *
+ * This is the API that is called to get xstate address in either
+ * standard format or compacted format of xsave area.
+ *
+ * Note that if there is no data for the field in the xsave buffer
+ * this will return NULL.
+ *
+ * Inputs:
+ * xstate: the thread's storage area for all FPU data
+ * xstate_feature: state which is defined in xsave.h (e.g.
+ * XFEATURE_MASK_FP, XFEATURE_MASK_SSE, etc...)
+ * Output:
+ * address of the state in the xsave area, or NULL if the
+ * field is not present in the xsave buffer.
+ */
+void *get_xsave_addr(struct xregs_state *xsave, int xstate_feature)
+{
+ /*
+ * Do we even *have* xsave state?
+ */
+ if (!boot_cpu_has(X86_FEATURE_XSAVE))
+ return NULL;
+
+ /*
+ * This assumes the last 'xsave*' instruction to
+ * have requested that 'xstate_feature' be saved.
+ * If it did not, we might be seeing and old value
+ * of the field in the buffer.
+ *
+ * This can happen because the last 'xsave' did not
+ * request that this feature be saved (unlikely)
+ * or because the "init optimization" caused it
+ * to not be saved.
+ */
+ if (!(xsave->header.xfeatures & xstate_feature))
+ return NULL;
+
+ return __raw_xsave_addr(xsave, xstate_feature);
+}
+
#define XSTATE_COMPACTION_ENABLED (1ULL << 63)
static void fill_xsave(u8 *dest, struct kvm_vcpu *vcpu)
{
- struct xregs_state *xsave = &vcpu->arch.guest_fpu.state.xsave;
+ struct xregs_state *xsave = &vcpu->arch.guest_fpu.xsave;
u64 xstate_bv = xsave->header.xfeatures;
u64 valid;
@@ -3151,7 +1714,7 @@ static void fill_xsave(u8 *dest, struct kvm_vcpu *vcpu)
*/
valid = xstate_bv & ~XFEATURE_MASK_FPSSE;
while (valid) {
- u64 feature = valid & -valid;
+ u64 feature = valid & -(s64)valid;
int index = fls64(feature) - 1;
void *src = get_xsave_addr(xsave, feature);
@@ -3168,7 +1731,7 @@ static void fill_xsave(u8 *dest, struct kvm_vcpu *vcpu)
static void load_xsave(struct kvm_vcpu *vcpu, u8 *src)
{
- struct xregs_state *xsave = &vcpu->arch.guest_fpu.state.xsave;
+ struct xregs_state *xsave = &vcpu->arch.guest_fpu.xsave;
u64 xstate_bv = *(u64 *)(src + XSAVE_HDR_OFFSET);
u64 valid;
@@ -3189,7 +1752,7 @@ static void load_xsave(struct kvm_vcpu *vcpu, u8 *src)
*/
valid = xstate_bv & ~XFEATURE_MASK_FPSSE;
while (valid) {
- u64 feature = valid & -valid;
+ u64 feature = valid & -(s64)valid;
int index = fls64(feature) - 1;
void *dest = get_xsave_addr(xsave, feature);
@@ -3212,7 +1775,7 @@ static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu,
fill_xsave((u8 *) guest_xsave->region, vcpu);
} else {
memcpy(guest_xsave->region,
- &vcpu->arch.guest_fpu.state.fxsave,
+ &vcpu->arch.guest_fpu.fxsave,
sizeof(struct fxregs_state));
*(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)] =
XFEATURE_MASK_FPSSE;
@@ -3237,7 +1800,7 @@ static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu,
} else {
if (xstate_bv & ~XFEATURE_MASK_FPSSE)
return -EINVAL;
- memcpy(&vcpu->arch.guest_fpu.state.fxsave,
+ memcpy(&vcpu->arch.guest_fpu.fxsave,
guest_xsave->region, sizeof(struct fxregs_state));
}
return 0;
@@ -3265,7 +1828,7 @@ static int kvm_vcpu_ioctl_x86_set_xcrs(struct kvm_vcpu *vcpu,
if (!boot_cpu_has(X86_FEATURE_XSAVE))
return -EINVAL;
- if (guest_xcrs->nr_xcrs > KVM_MAX_XCRS || guest_xcrs->flags)
+ if (guest_xcrs->nr_xcrs > GVM_MAX_XCRS || guest_xcrs->flags)
return -EINVAL;
for (i = 0; i < guest_xcrs->nr_xcrs; i++)
@@ -3280,40 +1843,11 @@ static int kvm_vcpu_ioctl_x86_set_xcrs(struct kvm_vcpu *vcpu,
return r;
}
-/*
- * kvm_set_guest_paused() indicates to the guest kernel that it has been
- * stopped by the hypervisor. This function will be called from the host only.
- * EINVAL is returned when the host attempts to set the flag for a guest that
- * does not support pv clocks.
- */
-static int kvm_set_guest_paused(struct kvm_vcpu *vcpu)
-{
- if (!vcpu->arch.pv_time_enabled)
- return -EINVAL;
- vcpu->arch.pvclock_set_guest_stopped_request = true;
- kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
- return 0;
-}
-
-static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
- struct kvm_enable_cap *cap)
+long kvm_arch_vcpu_ioctl(struct gvm_device_extension *devext,
+ PIRP pIrp, unsigned int ioctl)
{
- if (cap->flags)
- return -EINVAL;
-
- switch (cap->cap) {
- case KVM_CAP_HYPERV_SYNIC:
- return kvm_hv_activate_synic(vcpu);
- default:
- return -EINVAL;
- }
-}
-
-long kvm_arch_vcpu_ioctl(struct file *filp,
- unsigned int ioctl, unsigned long arg)
-{
- struct kvm_vcpu *vcpu = filp->private_data;
- void __user *argp = (void __user *)arg;
+ struct kvm_vcpu *vcpu = devext->PrivData;
+ void __user *argp = (void __user *)pIrp->AssociatedIrp.SystemBuffer;
int r;
union {
struct kvm_lapic_state *lapic;
@@ -3324,7 +1858,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
u.buffer = NULL;
switch (ioctl) {
- case KVM_GET_LAPIC: {
+ case GVM_GET_LAPIC: {
r = -EINVAL;
if (!lapic_in_kernel(vcpu))
goto out;
@@ -3336,13 +1870,11 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
r = kvm_vcpu_ioctl_get_lapic(vcpu, u.lapic);
if (r)
goto out;
- r = -EFAULT;
- if (copy_to_user(argp, u.lapic, sizeof(struct kvm_lapic_state)))
- goto out;
- r = 0;
+ r = gvmUpdateReturnBuffer(pIrp, 0, u.lapic,
+ sizeof(struct kvm_lapic_state));
break;
}
- case KVM_SET_LAPIC: {
+ case GVM_SET_LAPIC: {
r = -EINVAL;
if (!lapic_in_kernel(vcpu))
goto out;
@@ -3353,7 +1885,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
r = kvm_vcpu_ioctl_set_lapic(vcpu, u.lapic);
break;
}
- case KVM_INTERRUPT: {
+ case GVM_INTERRUPT: {
struct kvm_interrupt irq;
r = -EFAULT;
@@ -3362,59 +1894,50 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
r = kvm_vcpu_ioctl_interrupt(vcpu, &irq);
break;
}
- case KVM_NMI: {
+ case GVM_NMI: {
r = kvm_vcpu_ioctl_nmi(vcpu);
break;
}
- case KVM_SMI: {
+ case GVM_SMI: {
r = kvm_vcpu_ioctl_smi(vcpu);
break;
}
- case KVM_SET_CPUID: {
+ case GVM_SET_CPUID: {
struct kvm_cpuid __user *cpuid_arg = argp;
struct kvm_cpuid cpuid;
r = -EFAULT;
if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
goto out;
- r = kvm_vcpu_ioctl_set_cpuid(vcpu, &cpuid, cpuid_arg->entries);
- break;
- }
- case KVM_SET_CPUID2: {
- struct kvm_cpuid2 __user *cpuid_arg = argp;
- struct kvm_cpuid2 cpuid;
-
- r = -EFAULT;
- if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
- goto out;
- r = kvm_vcpu_ioctl_set_cpuid2(vcpu, &cpuid,
+ r = kvm_vcpu_ioctl_set_cpuid(vcpu, &cpuid,
cpuid_arg->entries);
break;
}
- case KVM_GET_CPUID2: {
- struct kvm_cpuid2 __user *cpuid_arg = argp;
- struct kvm_cpuid2 cpuid;
+ case GVM_GET_CPUID: {
+ struct kvm_cpuid __user *cpuid_arg = argp;
+ struct kvm_cpuid cpuid;
r = -EFAULT;
if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
goto out;
- r = kvm_vcpu_ioctl_get_cpuid2(vcpu, &cpuid,
+ r = kvm_vcpu_ioctl_get_cpuid(vcpu, &cpuid,
cpuid_arg->entries);
if (r)
goto out;
- r = -EFAULT;
- if (copy_to_user(cpuid_arg, &cpuid, sizeof cpuid))
+ r = gvmUpdateReturnBuffer(pIrp, 0, &cpuid, sizeof(cpuid));
+ if (r)
goto out;
- r = 0;
+ r = gvmUpdateReturnBuffer(pIrp, sizeof(cpuid), &vcpu->arch.cpuid_entries,
+ vcpu->arch.cpuid_nent * sizeof(struct kvm_cpuid_entry));
break;
}
- case KVM_GET_MSRS:
- r = msr_io(vcpu, argp, do_get_msr, 1);
+ case GVM_GET_MSRS:
+ r = msr_io(pIrp, vcpu, argp, do_get_msr, 1);
break;
- case KVM_SET_MSRS:
- r = msr_io(vcpu, argp, do_set_msr, 0);
+ case GVM_SET_MSRS:
+ r = msr_io(pIrp, vcpu, argp, do_set_msr, 0);
break;
- case KVM_TPR_ACCESS_REPORTING: {
+ case GVM_TPR_ACCESS_REPORTING: {
struct kvm_tpr_access_ctl tac;
r = -EFAULT;
@@ -3423,13 +1946,10 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
r = vcpu_ioctl_tpr_access_reporting(vcpu, &tac);
if (r)
goto out;
- r = -EFAULT;
- if (copy_to_user(argp, &tac, sizeof tac))
- goto out;
- r = 0;
+ r = gvmUpdateReturnBuffer(pIrp, 0, &tac, sizeof(tac));
break;
};
- case KVM_SET_VAPIC_ADDR: {
+ case GVM_SET_VAPIC_ADDR: {
struct kvm_vapic_addr va;
int idx;
@@ -3444,36 +1964,16 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
srcu_read_unlock(&vcpu->kvm->srcu, idx);
break;
}
- case KVM_X86_SETUP_MCE: {
- u64 mcg_cap;
-
- r = -EFAULT;
- if (copy_from_user(&mcg_cap, argp, sizeof mcg_cap))
- goto out;
- r = kvm_vcpu_ioctl_x86_setup_mce(vcpu, mcg_cap);
- break;
- }
- case KVM_X86_SET_MCE: {
- struct kvm_x86_mce mce;
-
- r = -EFAULT;
- if (copy_from_user(&mce, argp, sizeof mce))
- goto out;
- r = kvm_vcpu_ioctl_x86_set_mce(vcpu, &mce);
- break;
- }
- case KVM_GET_VCPU_EVENTS: {
+ case GVM_GET_VCPU_EVENTS: {
struct kvm_vcpu_events events;
kvm_vcpu_ioctl_x86_get_vcpu_events(vcpu, &events);
- r = -EFAULT;
- if (copy_to_user(argp, &events, sizeof(struct kvm_vcpu_events)))
- break;
- r = 0;
+ r = gvmUpdateReturnBuffer(pIrp, 0, &events,
+ sizeof(struct kvm_vcpu_events));
break;
}
- case KVM_SET_VCPU_EVENTS: {
+ case GVM_SET_VCPU_EVENTS: {
struct kvm_vcpu_events events;
r = -EFAULT;
@@ -3483,19 +1983,16 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
r = kvm_vcpu_ioctl_x86_set_vcpu_events(vcpu, &events);
break;
}
- case KVM_GET_DEBUGREGS: {
+ case GVM_GET_DEBUGREGS: {
struct kvm_debugregs dbgregs;
kvm_vcpu_ioctl_x86_get_debugregs(vcpu, &dbgregs);
- r = -EFAULT;
- if (copy_to_user(argp, &dbgregs,
- sizeof(struct kvm_debugregs)))
- break;
- r = 0;
+ r = gvmUpdateReturnBuffer(pIrp, 0, &dbgregs,
+ sizeof(struct kvm_debugregs));
break;
}
- case KVM_SET_DEBUGREGS: {
+ case GVM_SET_DEBUGREGS: {
struct kvm_debugregs dbgregs;
r = -EFAULT;
@@ -3506,7 +2003,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
r = kvm_vcpu_ioctl_x86_set_debugregs(vcpu, &dbgregs);
break;
}
- case KVM_GET_XSAVE: {
+ case GVM_GET_XSAVE: {
u.xsave = kzalloc(sizeof(struct kvm_xsave), GFP_KERNEL);
r = -ENOMEM;
if (!u.xsave)
@@ -3514,13 +2011,11 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
kvm_vcpu_ioctl_x86_get_xsave(vcpu, u.xsave);
- r = -EFAULT;
- if (copy_to_user(argp, u.xsave, sizeof(struct kvm_xsave)))
- break;
- r = 0;
+ r = gvmUpdateReturnBuffer(pIrp, 0, u.xsave,
+ sizeof(struct kvm_xsave));
break;
}
- case KVM_SET_XSAVE: {
+ case GVM_SET_XSAVE: {
u.xsave = memdup_user(argp, sizeof(*u.xsave));
if (IS_ERR(u.xsave))
return PTR_ERR(u.xsave);
@@ -3528,7 +2023,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
r = kvm_vcpu_ioctl_x86_set_xsave(vcpu, u.xsave);
break;
}
- case KVM_GET_XCRS: {
+ case GVM_GET_XCRS: {
u.xcrs = kzalloc(sizeof(struct kvm_xcrs), GFP_KERNEL);
r = -ENOMEM;
if (!u.xcrs)
@@ -3536,14 +2031,11 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
kvm_vcpu_ioctl_x86_get_xcrs(vcpu, u.xcrs);
- r = -EFAULT;
- if (copy_to_user(argp, u.xcrs,
- sizeof(struct kvm_xcrs)))
- break;
- r = 0;
+ r = gvmUpdateReturnBuffer(pIrp, 0, u.xcrs,
+ sizeof(struct kvm_xcrs));
break;
}
- case KVM_SET_XCRS: {
+ case GVM_SET_XCRS: {
u.xcrs = memdup_user(argp, sizeof(*u.xcrs));
if (IS_ERR(u.xcrs))
return PTR_ERR(u.xcrs);
@@ -3551,40 +2043,6 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
r = kvm_vcpu_ioctl_x86_set_xcrs(vcpu, u.xcrs);
break;
}
- case KVM_SET_TSC_KHZ: {
- u32 user_tsc_khz;
-
- r = -EINVAL;
- user_tsc_khz = (u32)arg;
-
- if (user_tsc_khz >= kvm_max_guest_tsc_khz)
- goto out;
-
- if (user_tsc_khz == 0)
- user_tsc_khz = tsc_khz;
-
- if (!kvm_set_tsc_khz(vcpu, user_tsc_khz))
- r = 0;
-
- goto out;
- }
- case KVM_GET_TSC_KHZ: {
- r = vcpu->arch.virtual_tsc_khz;
- goto out;
- }
- case KVM_KVMCLOCK_CTRL: {
- r = kvm_set_guest_paused(vcpu);
- goto out;
- }
- case KVM_ENABLE_CAP: {
- struct kvm_enable_cap cap;
-
- r = -EFAULT;
- if (copy_from_user(&cap, argp, sizeof(cap)))
- goto out;
- r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
- break;
- }
default:
r = -EINVAL;
}
@@ -3593,12 +2051,7 @@ out:
return r;
}
-int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
-{
- return VM_FAULT_SIGBUS;
-}
-
-static int kvm_vm_ioctl_set_tss_addr(struct kvm *kvm, unsigned long addr)
+static int kvm_vm_ioctl_set_tss_addr(struct kvm *kvm, size_t addr)
{
int ret;
@@ -3618,7 +2071,7 @@ static int kvm_vm_ioctl_set_identity_map_addr(struct kvm *kvm,
static int kvm_vm_ioctl_set_nr_mmu_pages(struct kvm *kvm,
u32 kvm_nr_mmu_pages)
{
- if (kvm_nr_mmu_pages < KVM_MIN_ALLOC_MMU_PAGES)
+ if (kvm_nr_mmu_pages < GVM_MIN_ALLOC_MMU_PAGES)
return -EINVAL;
mutex_lock(&kvm->slots_lock);
@@ -3641,17 +2094,17 @@ static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
r = 0;
switch (chip->chip_id) {
- case KVM_IRQCHIP_PIC_MASTER:
+ case GVM_IRQCHIP_PIC_MASTER:
memcpy(&chip->chip.pic,
&pic_irqchip(kvm)->pics[0],
sizeof(struct kvm_pic_state));
break;
- case KVM_IRQCHIP_PIC_SLAVE:
+ case GVM_IRQCHIP_PIC_SLAVE:
memcpy(&chip->chip.pic,
&pic_irqchip(kvm)->pics[1],
sizeof(struct kvm_pic_state));
break;
- case KVM_IRQCHIP_IOAPIC:
+ case GVM_IRQCHIP_IOAPIC:
r = kvm_get_ioapic(kvm, &chip->chip.ioapic);
break;
default:
@@ -3667,21 +2120,21 @@ static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
r = 0;
switch (chip->chip_id) {
- case KVM_IRQCHIP_PIC_MASTER:
+ case GVM_IRQCHIP_PIC_MASTER:
spin_lock(&pic_irqchip(kvm)->lock);
memcpy(&pic_irqchip(kvm)->pics[0],
&chip->chip.pic,
sizeof(struct kvm_pic_state));
spin_unlock(&pic_irqchip(kvm)->lock);
break;
- case KVM_IRQCHIP_PIC_SLAVE:
+ case GVM_IRQCHIP_PIC_SLAVE:
spin_lock(&pic_irqchip(kvm)->lock);
memcpy(&pic_irqchip(kvm)->pics[1],
&chip->chip.pic,
sizeof(struct kvm_pic_state));
spin_unlock(&pic_irqchip(kvm)->lock);
break;
- case KVM_IRQCHIP_IOAPIC:
+ case GVM_IRQCHIP_IOAPIC:
r = kvm_set_ioapic(kvm, &chip->chip.ioapic);
break;
default:
@@ -3692,83 +2145,6 @@ static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
return r;
}
-static int kvm_vm_ioctl_get_pit(struct kvm *kvm, struct kvm_pit_state *ps)
-{
- struct kvm_kpit_state *kps = &kvm->arch.vpit->pit_state;
-
- BUILD_BUG_ON(sizeof(*ps) != sizeof(kps->channels));
-
- mutex_lock(&kps->lock);
- memcpy(ps, &kps->channels, sizeof(*ps));
- mutex_unlock(&kps->lock);
- return 0;
-}
-
-static int kvm_vm_ioctl_set_pit(struct kvm *kvm, struct kvm_pit_state *ps)
-{
- int i;
- struct kvm_pit *pit = kvm->arch.vpit;
-
- mutex_lock(&pit->pit_state.lock);
- memcpy(&pit->pit_state.channels, ps, sizeof(*ps));
- for (i = 0; i < 3; i++)
- kvm_pit_load_count(pit, i, ps->channels[i].count, 0);
- mutex_unlock(&pit->pit_state.lock);
- return 0;
-}
-
-static int kvm_vm_ioctl_get_pit2(struct kvm *kvm, struct kvm_pit_state2 *ps)
-{
- mutex_lock(&kvm->arch.vpit->pit_state.lock);
- memcpy(ps->channels, &kvm->arch.vpit->pit_state.channels,
- sizeof(ps->channels));
- ps->flags = kvm->arch.vpit->pit_state.flags;
- mutex_unlock(&kvm->arch.vpit->pit_state.lock);
- memset(&ps->reserved, 0, sizeof(ps->reserved));
- return 0;
-}
-
-static int kvm_vm_ioctl_set_pit2(struct kvm *kvm, struct kvm_pit_state2 *ps)
-{
- int start = 0;
- int i;
- u32 prev_legacy, cur_legacy;
- struct kvm_pit *pit = kvm->arch.vpit;
-
- mutex_lock(&pit->pit_state.lock);
- prev_legacy = pit->pit_state.flags & KVM_PIT_FLAGS_HPET_LEGACY;
- cur_legacy = ps->flags & KVM_PIT_FLAGS_HPET_LEGACY;
- if (!prev_legacy && cur_legacy)
- start = 1;
- memcpy(&pit->pit_state.channels, &ps->channels,
- sizeof(pit->pit_state.channels));
- pit->pit_state.flags = ps->flags;
- for (i = 0; i < 3; i++)
- kvm_pit_load_count(pit, i, pit->pit_state.channels[i].count,
- start && i == 0);
- mutex_unlock(&pit->pit_state.lock);
- return 0;
-}
-
-static int kvm_vm_ioctl_reinject(struct kvm *kvm,
- struct kvm_reinject_control *control)
-{
- struct kvm_pit *pit = kvm->arch.vpit;
-
- if (!pit)
- return -ENXIO;
-
- /* pit->pit_state.lock was overloaded to prevent userspace from getting
- * an inconsistent state after running multiple KVM_REINJECT_CONTROL
- * ioctls in parallel. Use a separate lock if that ioctl isn't rare.
- */
- mutex_lock(&pit->pit_state.lock);
- kvm_pit_set_reinject(pit, control->pit_reinject);
- mutex_unlock(&pit->pit_state.lock);
-
- return 0;
-}
-
/**
* kvm_vm_ioctl_get_dirty_log - get and clear the log of dirty pages in a slot
* @kvm: kvm instance
@@ -3779,7 +2155,7 @@ static int kvm_vm_ioctl_reinject(struct kvm *kvm,
*
* We call kvm_get_dirty_log_protect() to handle steps 1-3, upon return we
* always flush the TLB (step 4) even if previous step failed and the dirty
- * bitmap may be corrupt. Regardless of previous outcome the KVM logging API
+ * bitmap may be corrupt. Regardless of previous outcome the kvm logging API
* does not preclude user space subsequent dirty log read. Flushing TLB ensures
* writes will be marked dirty for next log read.
*
@@ -3791,7 +2167,7 @@ static int kvm_vm_ioctl_reinject(struct kvm *kvm,
int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
{
bool is_dirty = false;
- int r;
+ int r = 0;
mutex_lock(&kvm->slots_lock);
@@ -3807,7 +2183,6 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
* All the TLBs can be flushed out of mmu lock, see the comments in
* kvm_mmu_slot_remove_write_access().
*/
- lockdep_assert_held(&kvm->slots_lock);
if (is_dirty)
kvm_flush_remote_tlbs(kvm);
@@ -3821,7 +2196,7 @@ int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event,
if (!irqchip_in_kernel(kvm))
return -ENXIO;
- irq_event->status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
+ irq_event->status = kvm_set_irq(kvm, GVM_USERSPACE_IRQ_SOURCE_ID,
irq_event->irq, irq_event->level,
line_status);
return 0;
@@ -3836,44 +2211,10 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
return -EINVAL;
switch (cap->cap) {
- case KVM_CAP_DISABLE_QUIRKS:
+ case GVM_CAP_DISABLE_QUIRKS:
kvm->arch.disabled_quirks = cap->args[0];
r = 0;
break;
- case KVM_CAP_SPLIT_IRQCHIP: {
- mutex_lock(&kvm->lock);
- r = -EINVAL;
- if (cap->args[0] > MAX_NR_RESERVED_IOAPIC_PINS)
- goto split_irqchip_unlock;
- r = -EEXIST;
- if (irqchip_in_kernel(kvm))
- goto split_irqchip_unlock;
- if (kvm->created_vcpus)
- goto split_irqchip_unlock;
- r = kvm_setup_empty_irq_routing(kvm);
- if (r)
- goto split_irqchip_unlock;
- /* Pairs with irqchip_in_kernel. */
- smp_wmb();
- kvm->arch.irqchip_split = true;
- kvm->arch.nr_reserved_ioapic_pins = cap->args[0];
- r = 0;
-split_irqchip_unlock:
- mutex_unlock(&kvm->lock);
- break;
- }
- case KVM_CAP_X2APIC_API:
- r = -EINVAL;
- if (cap->args[0] & ~KVM_X2APIC_API_VALID_FLAGS)
- break;
-
- if (cap->args[0] & KVM_X2APIC_API_USE_32BIT_IDS)
- kvm->arch.x2apic_format = true;
- if (cap->args[0] & KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK)
- kvm->arch.x2apic_broadcast_quirk_disabled = true;
-
- r = 0;
- break;
default:
r = -EINVAL;
break;
@@ -3881,43 +2222,39 @@ split_irqchip_unlock:
return r;
}
-long kvm_arch_vm_ioctl(struct file *filp,
- unsigned int ioctl, unsigned long arg)
+long kvm_arch_vm_ioctl(struct gvm_device_extension *devext,
+ PIRP pIrp, unsigned int ioctl)
{
- struct kvm *kvm = filp->private_data;
- void __user *argp = (void __user *)arg;
+ struct kvm *kvm = devext->PrivData;
+ void __user *argp = (void __user *)pIrp->AssociatedIrp.SystemBuffer;
int r = -ENOTTY;
- /*
- * This union makes it completely explicit to gcc-3.x
- * that these two variables' stack usage should be
- * combined, not added together.
- */
- union {
- struct kvm_pit_state ps;
- struct kvm_pit_state2 ps2;
- struct kvm_pit_config pit_config;
- } u;
switch (ioctl) {
- case KVM_SET_TSS_ADDR:
- r = kvm_vm_ioctl_set_tss_addr(kvm, arg);
+ case GVM_SET_TSS_ADDR:
+ r = -EFAULT;
+ if (IoGetCurrentIrpStackLocation(pIrp)->Parameters.DeviceIoControl.InputBufferLength
+ < sizeof(size_t))
+ goto out;
+ r = kvm_vm_ioctl_set_tss_addr(kvm, *(size_t *)argp);
break;
- case KVM_SET_IDENTITY_MAP_ADDR: {
+ case GVM_SET_IDENTITY_MAP_ADDR: {
u64 ident_addr;
r = -EFAULT;
- if (copy_from_user(&ident_addr, argp, sizeof ident_addr))
+ if (IoGetCurrentIrpStackLocation(pIrp)->Parameters.DeviceIoControl.InputBufferLength
+ < sizeof(ident_addr))
goto out;
+ ident_addr = *(u64 *)argp;
r = kvm_vm_ioctl_set_identity_map_addr(kvm, ident_addr);
break;
}
- case KVM_SET_NR_MMU_PAGES:
- r = kvm_vm_ioctl_set_nr_mmu_pages(kvm, arg);
+ case GVM_SET_NR_MMU_PAGES:
+ r = kvm_vm_ioctl_set_nr_mmu_pages(kvm, *(unsigned int*)argp);
break;
- case KVM_GET_NR_MMU_PAGES:
+ case GVM_GET_NR_MMU_PAGES:
r = kvm_vm_ioctl_get_nr_mmu_pages(kvm);
break;
- case KVM_CREATE_IRQCHIP: {
+ case GVM_CREATE_IRQCHIP: {
struct kvm_pic *vpic;
mutex_lock(&kvm->lock);
@@ -3956,27 +2293,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
mutex_unlock(&kvm->lock);
break;
}
- case KVM_CREATE_PIT:
- u.pit_config.flags = KVM_PIT_SPEAKER_DUMMY;
- goto create_pit;
- case KVM_CREATE_PIT2:
- r = -EFAULT;
- if (copy_from_user(&u.pit_config, argp,
- sizeof(struct kvm_pit_config)))
- goto out;
- create_pit:
- mutex_lock(&kvm->lock);
- r = -EEXIST;
- if (kvm->arch.vpit)
- goto create_pit_unlock;
- r = -ENOMEM;
- kvm->arch.vpit = kvm_create_pit(kvm, u.pit_config.flags);
- if (kvm->arch.vpit)
- r = 0;
- create_pit_unlock:
- mutex_unlock(&kvm->lock);
- break;
- case KVM_GET_IRQCHIP: {
+ case GVM_GET_IRQCHIP: {
/* 0: PIC master, 1: PIC slave, 2: IOAPIC */
struct kvm_irqchip *chip;
@@ -3987,20 +2304,17 @@ long kvm_arch_vm_ioctl(struct file *filp,
}
r = -ENXIO;
- if (!irqchip_in_kernel(kvm) || irqchip_split(kvm))
+ if (!irqchip_in_kernel(kvm))
goto get_irqchip_out;
r = kvm_vm_ioctl_get_irqchip(kvm, chip);
if (r)
goto get_irqchip_out;
- r = -EFAULT;
- if (copy_to_user(argp, chip, sizeof *chip))
- goto get_irqchip_out;
- r = 0;
+ r = gvmUpdateReturnBuffer(pIrp, 0, chip, sizeof(*chip));
get_irqchip_out:
kfree(chip);
break;
}
- case KVM_SET_IRQCHIP: {
+ case GVM_SET_IRQCHIP: {
/* 0: PIC master, 1: PIC slave, 2: IOAPIC */
struct kvm_irqchip *chip;
@@ -4011,7 +2325,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
}
r = -ENXIO;
- if (!irqchip_in_kernel(kvm) || irqchip_split(kvm))
+ if (!irqchip_in_kernel(kvm))
goto set_irqchip_out;
r = kvm_vm_ioctl_set_irqchip(kvm, chip);
if (r)
@@ -4021,121 +2335,16 @@ long kvm_arch_vm_ioctl(struct file *filp,
kfree(chip);
break;
}
- case KVM_GET_PIT: {
- r = -EFAULT;
- if (copy_from_user(&u.ps, argp, sizeof(struct kvm_pit_state)))
- goto out;
- r = -ENXIO;
- if (!kvm->arch.vpit)
- goto out;
- r = kvm_vm_ioctl_get_pit(kvm, &u.ps);
- if (r)
- goto out;
- r = -EFAULT;
- if (copy_to_user(argp, &u.ps, sizeof(struct kvm_pit_state)))
- goto out;
- r = 0;
- break;
- }
- case KVM_SET_PIT: {
- r = -EFAULT;
- if (copy_from_user(&u.ps, argp, sizeof u.ps))
- goto out;
- r = -ENXIO;
- if (!kvm->arch.vpit)
- goto out;
- r = kvm_vm_ioctl_set_pit(kvm, &u.ps);
- break;
- }
- case KVM_GET_PIT2: {
- r = -ENXIO;
- if (!kvm->arch.vpit)
- goto out;
- r = kvm_vm_ioctl_get_pit2(kvm, &u.ps2);
- if (r)
- goto out;
- r = -EFAULT;
- if (copy_to_user(argp, &u.ps2, sizeof(u.ps2)))
- goto out;
- r = 0;
- break;
- }
- case KVM_SET_PIT2: {
- r = -EFAULT;
- if (copy_from_user(&u.ps2, argp, sizeof(u.ps2)))
- goto out;
- r = -ENXIO;
- if (!kvm->arch.vpit)
- goto out;
- r = kvm_vm_ioctl_set_pit2(kvm, &u.ps2);
- break;
- }
- case KVM_REINJECT_CONTROL: {
- struct kvm_reinject_control control;
- r = -EFAULT;
- if (copy_from_user(&control, argp, sizeof(control)))
- goto out;
- r = kvm_vm_ioctl_reinject(kvm, &control);
- break;
- }
- case KVM_SET_BOOT_CPU_ID:
+ case GVM_SET_BOOT_CPU_ID:
r = 0;
mutex_lock(&kvm->lock);
if (kvm->created_vcpus)
r = -EBUSY;
else
- kvm->arch.bsp_vcpu_id = arg;
+ kvm->arch.bsp_vcpu_id = *(u32 *)argp;
mutex_unlock(&kvm->lock);
break;
- case KVM_XEN_HVM_CONFIG: {
- r = -EFAULT;
- if (copy_from_user(&kvm->arch.xen_hvm_config, argp,
- sizeof(struct kvm_xen_hvm_config)))
- goto out;
- r = -EINVAL;
- if (kvm->arch.xen_hvm_config.flags)
- goto out;
- r = 0;
- break;
- }
- case KVM_SET_CLOCK: {
- struct kvm_clock_data user_ns;
- u64 now_ns;
-
- r = -EFAULT;
- if (copy_from_user(&user_ns, argp, sizeof(user_ns)))
- goto out;
-
- r = -EINVAL;
- if (user_ns.flags)
- goto out;
-
- r = 0;
- local_irq_disable();
- now_ns = __get_kvmclock_ns(kvm);
- kvm->arch.kvmclock_offset += user_ns.clock - now_ns;
- local_irq_enable();
- kvm_gen_update_masterclock(kvm);
- break;
- }
- case KVM_GET_CLOCK: {
- struct kvm_clock_data user_ns;
- u64 now_ns;
-
- local_irq_disable();
- now_ns = __get_kvmclock_ns(kvm);
- user_ns.clock = now_ns;
- user_ns.flags = kvm->arch.use_master_clock ? KVM_CLOCK_TSC_STABLE : 0;
- local_irq_enable();
- memset(&user_ns.pad, 0, sizeof(user_ns.pad));
-
- r = -EFAULT;
- if (copy_to_user(argp, &user_ns, sizeof(user_ns)))
- goto out;
- r = 0;
- break;
- }
- case KVM_ENABLE_CAP: {
+ case GVM_ENABLE_CAP: {
struct kvm_enable_cap cap;
r = -EFAULT;
@@ -4145,7 +2354,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
break;
}
default:
- r = kvm_vm_ioctl_assigned_device(kvm, ioctl, arg);
+ break;
}
out:
return r;
@@ -4183,6 +2392,7 @@ static void kvm_init_msr_list(void)
}
num_msrs_to_save = j;
+#if 0
for (i = j = 0; i < ARRAY_SIZE(emulated_msrs); i++) {
switch (emulated_msrs[i]) {
case MSR_IA32_SMBASE:
@@ -4198,6 +2408,7 @@ static void kvm_init_msr_list(void)
j++;
}
num_emulated_msrs = j;
+#endif
}
static int vcpu_mmio_write(struct kvm_vcpu *vcpu, gpa_t addr, int len,
@@ -4205,17 +2416,20 @@ static int vcpu_mmio_write(struct kvm_vcpu *vcpu, gpa_t addr, int len,
{
int handled = 0;
int n;
+ const char *__v = v;
do {
n = min(len, 8);
if (!(lapic_in_kernel(vcpu) &&
!kvm_iodevice_write(vcpu, &vcpu->arch.apic->dev, addr, n, v))
- && kvm_io_bus_write(vcpu, KVM_MMIO_BUS, addr, n, v))
+ && kvm_io_bus_write(vcpu, GVM_MMIO_BUS, addr, n, v))
break;
handled += n;
addr += n;
len -= n;
- v += n;
+ __v = (char *)v;
+ __v += n;
+ v = (void *)__v;
} while (len);
return handled;
@@ -4225,19 +2439,21 @@ static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v)
{
int handled = 0;
int n;
+ char *__v;
do {
n = min(len, 8);
if (!(lapic_in_kernel(vcpu) &&
!kvm_iodevice_read(vcpu, &vcpu->arch.apic->dev,
addr, n, v))
- && kvm_io_bus_read(vcpu, KVM_MMIO_BUS, addr, n, v))
+ && kvm_io_bus_read(vcpu, GVM_MMIO_BUS, addr, n, v))
break;
- trace_kvm_mmio(KVM_TRACE_MMIO_READ, n, addr, *(u64 *)v);
handled += n;
addr += n;
len -= n;
- v += n;
+ __v = (char *)v;
+ __v += n;
+ v = (void *)__v;
} while (len);
return handled;
@@ -4304,6 +2520,7 @@ static int kvm_read_guest_virt_helper(gva_t addr, void *val, unsigned int bytes,
struct x86_exception *exception)
{
void *data = val;
+ char *__data;
int r = X86EMUL_CONTINUE;
while (bytes) {
@@ -4323,7 +2540,9 @@ static int kvm_read_guest_virt_helper(gva_t addr, void *val, unsigned int bytes,
}
bytes -= toread;
- data += toread;
+ __data = (char *)data;
+ __data += toread;
+ data = (void *)__data;
addr += toread;
}
out:
@@ -4367,7 +2586,6 @@ int kvm_read_guest_virt(struct x86_emulate_ctxt *ctxt,
return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, access,
exception);
}
-EXPORT_SYMBOL_GPL(kvm_read_guest_virt);
static int kvm_read_guest_virt_system(struct x86_emulate_ctxt *ctxt,
gva_t addr, void *val, unsigned int bytes,
@@ -4378,7 +2596,7 @@ static int kvm_read_guest_virt_system(struct x86_emulate_ctxt *ctxt,
}
static int kvm_read_guest_phys_system(struct x86_emulate_ctxt *ctxt,
- unsigned long addr, void *val, unsigned int bytes)
+ size_t addr, void *val, unsigned int bytes)
{
struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
int r = kvm_vcpu_read_guest(vcpu, addr, val, bytes);
@@ -4393,6 +2611,7 @@ int kvm_write_guest_virt_system(struct x86_emulate_ctxt *ctxt,
{
struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
void *data = val;
+ char *__data;
int r = X86EMUL_CONTINUE;
while (bytes) {
@@ -4412,15 +2631,16 @@ int kvm_write_guest_virt_system(struct x86_emulate_ctxt *ctxt,
}
bytes -= towrite;
- data += towrite;
+ __data = (char *)data;
+ __data += towrite;
+ data = (void *)__data;
addr += towrite;
}
out:
return r;
}
-EXPORT_SYMBOL_GPL(kvm_write_guest_virt_system);
-static int vcpu_mmio_gva_to_gpa(struct kvm_vcpu *vcpu, unsigned long gva,
+static int vcpu_mmio_gva_to_gpa(struct kvm_vcpu *vcpu, size_t gva,
gpa_t *gpa, struct x86_exception *exception,
bool write)
{
@@ -4437,7 +2657,6 @@ static int vcpu_mmio_gva_to_gpa(struct kvm_vcpu *vcpu, unsigned long gva,
vcpu->arch.access, 0, access)) {
*gpa = vcpu->arch.mmio_gfn << PAGE_SHIFT |
(gva & (PAGE_SIZE - 1));
- trace_vcpu_match_mmio(gva, *gpa, write, false);
return 1;
}
@@ -4451,7 +2670,6 @@ static int vcpu_mmio_gva_to_gpa(struct kvm_vcpu *vcpu, unsigned long gva,
return 1;
if (vcpu_match_mmio_gpa(vcpu, *gpa)) {
- trace_vcpu_match_mmio(gva, *gpa, write, true);
return 1;
}
@@ -4485,8 +2703,6 @@ struct read_write_emulator_ops {
static int read_prepare(struct kvm_vcpu *vcpu, void *val, int bytes)
{
if (vcpu->mmio_read_completed) {
- trace_kvm_mmio(KVM_TRACE_MMIO_READ, bytes,
- vcpu->mmio_fragments[0].gpa, *(u64 *)val);
vcpu->mmio_read_completed = 0;
return 1;
}
@@ -4508,14 +2724,12 @@ static int write_emulate(struct kvm_vcpu *vcpu, gpa_t gpa,
static int write_mmio(struct kvm_vcpu *vcpu, gpa_t gpa, int bytes, void *val)
{
- trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, bytes, gpa, *(u64 *)val);
return vcpu_mmio_write(vcpu, gpa, bytes, val);
}
static int read_exit_mmio(struct kvm_vcpu *vcpu, gpa_t gpa,
void *val, int bytes)
{
- trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, 0);
return X86EMUL_IO_NEEDED;
}
@@ -4542,7 +2756,7 @@ static const struct read_write_emulator_ops write_emultor = {
.write = true,
};
-static int emulator_read_write_onepage(unsigned long addr, void *val,
+static int emulator_read_write_onepage(size_t addr, void *val,
unsigned int bytes,
struct x86_exception *exception,
struct kvm_vcpu *vcpu,
@@ -4552,6 +2766,7 @@ static int emulator_read_write_onepage(unsigned long addr, void *val,
int handled, ret;
bool write = ops->write;
struct kvm_mmio_fragment *frag;
+ char *__val;
ret = vcpu_mmio_gva_to_gpa(vcpu, addr, &gpa, exception, write);
@@ -4575,9 +2790,11 @@ mmio:
gpa += handled;
bytes -= handled;
- val += handled;
+ __val = val;
+ __val += handled;
+ val = __val;
- WARN_ON(vcpu->mmio_nr_fragments >= KVM_MAX_MMIO_FRAGMENTS);
+ WARN_ON(vcpu->mmio_nr_fragments >= GVM_MAX_MMIO_FRAGMENTS);
frag = &vcpu->mmio_fragments[vcpu->mmio_nr_fragments++];
frag->gpa = gpa;
frag->data = val;
@@ -4586,7 +2803,7 @@ mmio:
}
static int emulator_read_write(struct x86_emulate_ctxt *ctxt,
- unsigned long addr,
+ size_t addr,
void *val, unsigned int bytes,
struct x86_exception *exception,
const struct read_write_emulator_ops *ops)
@@ -4594,6 +2811,7 @@ static int emulator_read_write(struct x86_emulate_ctxt *ctxt,
struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
gpa_t gpa;
int rc;
+ char *__val;
if (ops->read_write_prepare &&
ops->read_write_prepare(vcpu, val, bytes))
@@ -4605,7 +2823,7 @@ static int emulator_read_write(struct x86_emulate_ctxt *ctxt,
if (((addr + bytes - 1) ^ addr) & PAGE_MASK) {
int now;
- now = -addr & ~PAGE_MASK;
+ now = -(ssize_t)addr & ~PAGE_MASK;
rc = emulator_read_write_onepage(addr, val, now, exception,
vcpu, ops);
@@ -4614,7 +2832,9 @@ static int emulator_read_write(struct x86_emulate_ctxt *ctxt,
addr += now;
if (ctxt->mode != X86EMUL_MODE_PROT64)
addr = (u32)addr;
- val += now;
+ __val = val;
+ __val += now;
+ val = __val;
bytes -= now;
}
@@ -4633,14 +2853,14 @@ static int emulator_read_write(struct x86_emulate_ctxt *ctxt,
vcpu->run->mmio.len = min(8u, vcpu->mmio_fragments[0].len);
vcpu->run->mmio.is_write = vcpu->mmio_is_write = ops->write;
- vcpu->run->exit_reason = KVM_EXIT_MMIO;
+ vcpu->run->exit_reason = GVM_EXIT_MMIO;
vcpu->run->mmio.phys_addr = gpa;
return ops->read_write_exit_mmio(vcpu, gpa, val, bytes);
}
static int emulator_read_emulated(struct x86_emulate_ctxt *ctxt,
- unsigned long addr,
+ size_t addr,
void *val,
unsigned int bytes,
struct x86_exception *exception)
@@ -4650,7 +2870,7 @@ static int emulator_read_emulated(struct x86_emulate_ctxt *ctxt,
}
static int emulator_write_emulated(struct x86_emulate_ctxt *ctxt,
- unsigned long addr,
+ size_t addr,
const void *val,
unsigned int bytes,
struct x86_exception *exception)
@@ -4670,7 +2890,7 @@ static int emulator_write_emulated(struct x86_emulate_ctxt *ctxt,
#endif
static int emulator_cmpxchg_emulated(struct x86_emulate_ctxt *ctxt,
- unsigned long addr,
+ size_t addr,
const void *old,
const void *new,
unsigned int bytes,
@@ -4678,9 +2898,10 @@ static int emulator_cmpxchg_emulated(struct x86_emulate_ctxt *ctxt,
{
struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
gpa_t gpa;
- struct page *page;
char *kaddr;
bool exchanged;
+ size_t hva;
+ PMDL kmap_mdl;
/* guests cmpxchg8b have to be emulated atomically */
if (bytes > 8 || (bytes & (bytes - 1)))
@@ -4695,11 +2916,16 @@ static int emulator_cmpxchg_emulated(struct x86_emulate_ctxt *ctxt,
if (((gpa + bytes - 1) & PAGE_MASK) != (gpa & PAGE_MASK))
goto emul_write;
- page = kvm_vcpu_gfn_to_page(vcpu, gpa >> PAGE_SHIFT);
- if (is_error_page(page))
+ hva = gfn_to_hva(vcpu->kvm, gpa >> PAGE_SHIFT);
+ if (kvm_is_error_hva(hva))
goto emul_write;
- kaddr = kmap_atomic(page);
+ if (get_user_pages_fast(hva, 1, 1, &kmap_mdl) != 1)
+ goto emul_write;
+
+ kaddr = kmap_atomic(kmap_mdl);
+ if (!kaddr)
+ goto emul_write;
kaddr += offset_in_page(gpa);
switch (bytes) {
case 1:
@@ -4717,8 +2943,7 @@ static int emulator_cmpxchg_emulated(struct x86_emulate_ctxt *ctxt,
default:
BUG();
}
- kunmap_atomic(kaddr);
- kvm_release_page_dirty(page);
+ kunmap_atomic(kmap_mdl);
if (!exchanged)
return X86EMUL_CMPXCHG_FAILED;
@@ -4740,10 +2965,10 @@ static int kernel_pio(struct kvm_vcpu *vcpu, void *pd)
int r;
if (vcpu->arch.pio.in)
- r = kvm_io_bus_read(vcpu, KVM_PIO_BUS, vcpu->arch.pio.port,
+ r = kvm_io_bus_read(vcpu, GVM_PIO_BUS, vcpu->arch.pio.port,
vcpu->arch.pio.size, pd);
else
- r = kvm_io_bus_write(vcpu, KVM_PIO_BUS,
+ r = kvm_io_bus_write(vcpu, GVM_PIO_BUS,
vcpu->arch.pio.port, vcpu->arch.pio.size,
pd);
return r;
@@ -4763,10 +2988,10 @@ static int emulator_pio_in_out(struct kvm_vcpu *vcpu, int size,
return 1;
}
- vcpu->run->exit_reason = KVM_EXIT_IO;
- vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT;
+ vcpu->run->exit_reason = GVM_EXIT_IO;
+ vcpu->run->io.direction = in ? GVM_EXIT_IO_IN : GVM_EXIT_IO_OUT;
vcpu->run->io.size = size;
- vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE;
+ vcpu->run->io.data_offset = GVM_PIO_PAGE_OFFSET * PAGE_SIZE;
vcpu->run->io.count = count;
vcpu->run->io.port = port;
@@ -4787,7 +3012,6 @@ static int emulator_pio_in_emulated(struct x86_emulate_ctxt *ctxt,
if (ret) {
data_avail:
memcpy(val, vcpu->arch.pio_data, size * count);
- trace_kvm_pio(KVM_PIO_IN, port, size, count, vcpu->arch.pio_data);
vcpu->arch.pio.count = 0;
return 1;
}
@@ -4802,11 +3026,10 @@ static int emulator_pio_out_emulated(struct x86_emulate_ctxt *ctxt,
struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
memcpy(vcpu->arch.pio_data, val, size * count);
- trace_kvm_pio(KVM_PIO_OUT, port, size, count, vcpu->arch.pio_data);
return emulator_pio_in_out(vcpu, size, port, (void *)val, count, false);
}
-static unsigned long get_segment_base(struct kvm_vcpu *vcpu, int seg)
+static size_t get_segment_base(struct kvm_vcpu *vcpu, int seg)
{
return kvm_x86_ops->get_segment_base(vcpu, seg);
}
@@ -4818,19 +3041,6 @@ static void emulator_invlpg(struct x86_emulate_ctxt *ctxt, ulong address)
int kvm_emulate_wbinvd_noskip(struct kvm_vcpu *vcpu)
{
- if (!need_emulate_wbinvd(vcpu))
- return X86EMUL_CONTINUE;
-
- if (kvm_x86_ops->has_wbinvd_exit()) {
- int cpu = get_cpu();
-
- cpumask_set_cpu(cpu, vcpu->arch.wbinvd_dirty_mask);
- smp_call_function_many(vcpu->arch.wbinvd_dirty_mask,
- wbinvd_ipi, NULL, 1);
- put_cpu();
- cpumask_clear(vcpu->arch.wbinvd_dirty_mask);
- } else
- wbinvd();
return X86EMUL_CONTINUE;
}
@@ -4839,7 +3049,6 @@ int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu)
kvm_x86_ops->skip_emulated_instruction(vcpu);
return kvm_emulate_wbinvd_noskip(vcpu);
}
-EXPORT_SYMBOL_GPL(kvm_emulate_wbinvd);
@@ -4849,16 +3058,17 @@ static void emulator_wbinvd(struct x86_emulate_ctxt *ctxt)
}
static int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr,
- unsigned long *dest)
+ size_t *dest)
{
- return kvm_get_dr(emul_to_vcpu(ctxt), dr, dest);
+ //return kvm_get_dr(emul_to_vcpu(ctxt), dr, dest);
+ return 0;
}
static int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr,
- unsigned long value)
+ size_t value)
{
-
- return __kvm_set_dr(emul_to_vcpu(ctxt), dr, value);
+ return 0;
+ //return __kvm_set_dr(emul_to_vcpu(ctxt), dr, value);
}
static u64 mk_cr_64(u64 curr_cr, u32 new_val)
@@ -4866,10 +3076,10 @@ static u64 mk_cr_64(u64 curr_cr, u32 new_val)
return (curr_cr & ~((1ULL << 32) - 1)) | new_val;
}
-static unsigned long emulator_get_cr(struct x86_emulate_ctxt *ctxt, int cr)
+static size_t emulator_get_cr(struct x86_emulate_ctxt *ctxt, int cr)
{
struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
- unsigned long value;
+ size_t value;
switch (cr) {
case 0:
@@ -4888,7 +3098,7 @@ static unsigned long emulator_get_cr(struct x86_emulate_ctxt *ctxt, int cr)
value = kvm_get_cr8(vcpu);
break;
default:
- kvm_err("%s: unexpected cr %u\n", __func__, cr);
+ //kvm_err("%s: unexpected cr %u\n", __func__, cr);
return 0;
}
@@ -4917,7 +3127,7 @@ static int emulator_set_cr(struct x86_emulate_ctxt *ctxt, int cr, ulong val)
res = kvm_set_cr8(vcpu, val);
break;
default:
- kvm_err("%s: unexpected cr %u\n", __func__, cr);
+ //kvm_err("%s: unexpected cr %u\n", __func__, cr);
res = -1;
}
@@ -4949,7 +3159,7 @@ static void emulator_set_idt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt)
kvm_x86_ops->set_idt(emul_to_vcpu(ctxt), dt);
}
-static unsigned long emulator_get_cached_segment_base(
+static size_t emulator_get_cached_segment_base(
struct x86_emulate_ctxt *ctxt, int seg)
{
return get_segment_base(emul_to_vcpu(ctxt), seg);
@@ -4972,7 +3182,7 @@ static bool emulator_get_segment(struct x86_emulate_ctxt *ctxt, u16 *selector,
if (var.g)
var.limit >>= 12;
set_desc_limit(desc, var.limit);
- set_desc_base(desc, (unsigned long)var.base);
+ set_desc_base(desc, (size_t)var.base);
#ifdef CONFIG_X86_64
if (base3)
*base3 = var.base >> 32;
@@ -5063,13 +3273,15 @@ static void emulator_set_smbase(struct x86_emulate_ctxt *ctxt, u64 smbase)
static int emulator_check_pmc(struct x86_emulate_ctxt *ctxt,
u32 pmc)
{
- return kvm_pmu_is_valid_msr_idx(emul_to_vcpu(ctxt), pmc);
+ //return kvm_pmu_is_valid_msr_idx(emul_to_vcpu(ctxt), pmc);
+ return 0;
}
static int emulator_read_pmc(struct x86_emulate_ctxt *ctxt,
u32 pmc, u64 *pdata)
{
- return kvm_pmu_rdpmc(emul_to_vcpu(ctxt), pmc, pdata);
+ //return kvm_pmu_rdpmc(emul_to_vcpu(ctxt), pmc, pdata);
+ return 0;
}
static void emulator_halt(struct x86_emulate_ctxt *ctxt)
@@ -5085,11 +3297,12 @@ static void emulator_get_fpu(struct x86_emulate_ctxt *ctxt)
* CR0.TS may reference the host fpu state, not the guest fpu state,
* so it may be clear at this point.
*/
- clts();
+ __clts();
}
static void emulator_put_fpu(struct x86_emulate_ctxt *ctxt)
{
+ kvm_save_guest_fpu(emul_to_vcpu(ctxt));
preempt_enable();
}
@@ -5154,7 +3367,6 @@ static const struct x86_emulate_ops emulate_ops = {
.read_pmc = emulator_read_pmc,
.halt = emulator_halt,
.wbinvd = emulator_wbinvd,
- .fix_hypercall = emulator_fix_hypercall,
.get_fpu = emulator_get_fpu,
.put_fpu = emulator_put_fpu,
.intercept = emulator_intercept,
@@ -5177,7 +3389,7 @@ static void toggle_interruptibility(struct kvm_vcpu *vcpu, u32 mask)
if (unlikely(int_shadow || mask)) {
kvm_x86_ops->set_interrupt_shadow(vcpu, mask);
if (!mask)
- kvm_make_request(KVM_REQ_EVENT, vcpu);
+ kvm_make_request(GVM_REQ_EVENT, vcpu);
}
}
@@ -5209,9 +3421,6 @@ static void init_emulate_ctxt(struct kvm_vcpu *vcpu)
(cs_l && is_long_mode(vcpu)) ? X86EMUL_MODE_PROT64 :
cs_db ? X86EMUL_MODE_PROT32 :
X86EMUL_MODE_PROT16;
- BUILD_BUG_ON(HF_GUEST_MASK != X86EMUL_GUEST_MASK);
- BUILD_BUG_ON(HF_SMM_MASK != X86EMUL_SMM_MASK);
- BUILD_BUG_ON(HF_SMM_INSIDE_NMI_MASK != X86EMUL_SMM_INSIDE_NMI_MASK);
ctxt->emul_flags = vcpu->arch.hflags;
init_decode_cache(ctxt);
@@ -5244,17 +3453,15 @@ int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip)
return EMULATE_DONE;
}
-EXPORT_SYMBOL_GPL(kvm_inject_realmode_interrupt);
static int handle_emulation_failure(struct kvm_vcpu *vcpu)
{
int r = EMULATE_DONE;
++vcpu->stat.insn_emulation_fail;
- trace_kvm_emulate_insn_failed(vcpu);
if (!is_guest_mode(vcpu) && kvm_x86_ops->get_cpl(vcpu) == 0) {
- vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
- vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
+ vcpu->run->exit_reason = GVM_EXIT_INTERNAL_ERROR;
+ vcpu->run->internal.suberror = GVM_INTERNAL_ERROR_EMULATION;
vcpu->run->internal.ndata = 0;
r = EMULATE_FAIL;
}
@@ -5303,8 +3510,6 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t cr2,
if (is_error_noslot_pfn(pfn))
return false;
- kvm_release_pfn_clean(pfn);
-
/* The instructions are well-emulated on direct mmu. */
if (vcpu->arch.mmu.direct_map) {
unsigned int indirect_shadow_pages;
@@ -5335,10 +3540,10 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t cr2,
}
static bool retry_instruction(struct x86_emulate_ctxt *ctxt,
- unsigned long cr2, int emulation_type)
+ size_t cr2, int emulation_type)
{
struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
- unsigned long last_retry_eip, last_retry_addr, gpa = cr2;
+ size_t last_retry_eip, last_retry_addr, gpa = cr2;
last_retry_eip = vcpu->arch.last_retry_eip;
last_retry_addr = vcpu->arch.last_retry_addr;
@@ -5384,11 +3589,8 @@ static int complete_emulated_pio(struct kvm_vcpu *vcpu);
static void kvm_smm_changed(struct kvm_vcpu *vcpu)
{
if (!(vcpu->arch.hflags & HF_SMM_MASK)) {
- /* This is a good place to trace that we are exiting SMM. */
- trace_kvm_enter_smm(vcpu->vcpu_id, vcpu->arch.smbase, false);
-
/* Process a latched INIT or SMI, if any. */
- kvm_make_request(KVM_REQ_EVENT, vcpu);
+ kvm_make_request(GVM_REQ_EVENT, vcpu);
}
kvm_mmu_reset_context(vcpu);
@@ -5404,8 +3606,8 @@ static void kvm_set_hflags(struct kvm_vcpu *vcpu, unsigned emul_flags)
kvm_smm_changed(vcpu);
}
-static int kvm_vcpu_check_hw_bp(unsigned long addr, u32 type, u32 dr7,
- unsigned long *db)
+static int kvm_vcpu_check_hw_bp(size_t addr, u32 type, u32 dr7,
+ size_t *db)
{
u32 dr6 = 0;
int i;
@@ -5419,7 +3621,7 @@ static int kvm_vcpu_check_hw_bp(unsigned long addr, u32 type, u32 dr7,
return dr6;
}
-static void kvm_vcpu_check_singlestep(struct kvm_vcpu *vcpu, unsigned long rflags, int *r)
+static void kvm_vcpu_check_singlestep(struct kvm_vcpu *vcpu, size_t rflags, int *r)
{
struct kvm_run *kvm_run = vcpu->run;
@@ -5432,12 +3634,12 @@ static void kvm_vcpu_check_singlestep(struct kvm_vcpu *vcpu, unsigned long rflag
* that sets the TF flag".
*/
if (unlikely(rflags & X86_EFLAGS_TF)) {
- if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) {
+ if (vcpu->guest_debug & GVM_GUESTDBG_SINGLESTEP) {
kvm_run->debug.arch.dr6 = DR6_BS | DR6_FIXED_1 |
DR6_RTM;
kvm_run->debug.arch.pc = vcpu->arch.singlestep_rip;
kvm_run->debug.arch.exception = DB_VECTOR;
- kvm_run->exit_reason = KVM_EXIT_DEBUG;
+ kvm_run->exit_reason = GVM_EXIT_DEBUG;
*r = EMULATE_USER_EXIT;
} else {
vcpu->arch.emulate_ctxt.eflags &= ~X86_EFLAGS_TF;
@@ -5455,10 +3657,10 @@ static void kvm_vcpu_check_singlestep(struct kvm_vcpu *vcpu, unsigned long rflag
static bool kvm_vcpu_check_breakpoint(struct kvm_vcpu *vcpu, int *r)
{
- if (unlikely(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) &&
+ if (unlikely(vcpu->guest_debug & GVM_GUESTDBG_USE_HW_BP) &&
(vcpu->arch.guest_debug_dr7 & DR7_BP_EN_MASK)) {
struct kvm_run *kvm_run = vcpu->run;
- unsigned long eip = kvm_get_linear_rip(vcpu);
+ size_t eip = kvm_get_linear_rip(vcpu);
u32 dr6 = kvm_vcpu_check_hw_bp(eip, 0,
vcpu->arch.guest_debug_dr7,
vcpu->arch.eff_db);
@@ -5467,7 +3669,7 @@ static bool kvm_vcpu_check_breakpoint(struct kvm_vcpu *vcpu, int *r)
kvm_run->debug.arch.dr6 = dr6 | DR6_FIXED_1 | DR6_RTM;
kvm_run->debug.arch.pc = eip;
kvm_run->debug.arch.exception = DB_VECTOR;
- kvm_run->exit_reason = KVM_EXIT_DEBUG;
+ kvm_run->exit_reason = GVM_EXIT_DEBUG;
*r = EMULATE_USER_EXIT;
return true;
}
@@ -5475,7 +3677,7 @@ static bool kvm_vcpu_check_breakpoint(struct kvm_vcpu *vcpu, int *r)
if (unlikely(vcpu->arch.dr7 & DR7_BP_EN_MASK) &&
!(kvm_get_rflags(vcpu) & X86_EFLAGS_RF)) {
- unsigned long eip = kvm_get_linear_rip(vcpu);
+ size_t eip = kvm_get_linear_rip(vcpu);
u32 dr6 = kvm_vcpu_check_hw_bp(eip, 0,
vcpu->arch.dr7,
vcpu->arch.db);
@@ -5493,7 +3695,7 @@ static bool kvm_vcpu_check_breakpoint(struct kvm_vcpu *vcpu, int *r)
}
int x86_emulate_instruction(struct kvm_vcpu *vcpu,
- unsigned long cr2,
+ size_t cr2,
int emulation_type,
void *insn,
int insn_len)
@@ -5531,7 +3733,6 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu,
r = x86_decode_insn(ctxt, insn, insn_len);
- trace_kvm_emulate_insn_start(vcpu);
++vcpu->stat.insn_emulation;
if (r != EMULATION_OK) {
if (emulation_type & EMULTYPE_TRAP_UD)
@@ -5600,7 +3801,7 @@ restart:
r = EMULATE_DONE;
if (writeback) {
- unsigned long rflags = kvm_x86_ops->get_rflags(vcpu);
+ size_t rflags = kvm_x86_ops->get_rflags(vcpu);
toggle_interruptibility(vcpu, ctxt->interruptibility);
vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
if (vcpu->arch.hflags != ctxt->emul_flags)
@@ -5613,214 +3814,40 @@ restart:
__kvm_set_rflags(vcpu, ctxt->eflags);
/*
- * For STI, interrupts are shadowed; so KVM_REQ_EVENT will
+ * For STI, interrupts are shadowed; so GVM_REQ_EVENT will
* do nothing, and it will be requested again as soon as
* the shadow expires. But we still need to check here,
* because POPF has no interrupt shadow.
*/
if (unlikely((ctxt->eflags & ~rflags) & X86_EFLAGS_IF))
- kvm_make_request(KVM_REQ_EVENT, vcpu);
+ kvm_make_request(GVM_REQ_EVENT, vcpu);
} else
vcpu->arch.emulate_regs_need_sync_to_vcpu = true;
return r;
}
-EXPORT_SYMBOL_GPL(x86_emulate_instruction);
int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port)
{
- unsigned long val = kvm_register_read(vcpu, VCPU_REGS_RAX);
+ size_t val = kvm_register_read(vcpu, VCPU_REGS_RAX);
int ret = emulator_pio_out_emulated(&vcpu->arch.emulate_ctxt,
size, port, &val, 1);
/* do not return to emulator after return from userspace */
vcpu->arch.pio.count = 0;
return ret;
}
-EXPORT_SYMBOL_GPL(kvm_fast_pio_out);
-
-static int kvmclock_cpu_down_prep(unsigned int cpu)
-{
- __this_cpu_write(cpu_tsc_khz, 0);
- return 0;
-}
-
-static void tsc_khz_changed(void *data)
-{
- struct cpufreq_freqs *freq = data;
- unsigned long khz = 0;
-
- if (data)
- khz = freq->new;
- else if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
- khz = cpufreq_quick_get(raw_smp_processor_id());
- if (!khz)
- khz = tsc_khz;
- __this_cpu_write(cpu_tsc_khz, khz);
-}
-
-static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
- void *data)
-{
- struct cpufreq_freqs *freq = data;
- struct kvm *kvm;
- struct kvm_vcpu *vcpu;
- int i, send_ipi = 0;
-
- /*
- * We allow guests to temporarily run on slowing clocks,
- * provided we notify them after, or to run on accelerating
- * clocks, provided we notify them before. Thus time never
- * goes backwards.
- *
- * However, we have a problem. We can't atomically update
- * the frequency of a given CPU from this function; it is
- * merely a notifier, which can be called from any CPU.
- * Changing the TSC frequency at arbitrary points in time
- * requires a recomputation of local variables related to
- * the TSC for each VCPU. We must flag these local variables
- * to be updated and be sure the update takes place with the
- * new frequency before any guests proceed.
- *
- * Unfortunately, the combination of hotplug CPU and frequency
- * change creates an intractable locking scenario; the order
- * of when these callouts happen is undefined with respect to
- * CPU hotplug, and they can race with each other. As such,
- * merely setting per_cpu(cpu_tsc_khz) = X during a hotadd is
- * undefined; you can actually have a CPU frequency change take
- * place in between the computation of X and the setting of the
- * variable. To protect against this problem, all updates of
- * the per_cpu tsc_khz variable are done in an interrupt
- * protected IPI, and all callers wishing to update the value
- * must wait for a synchronous IPI to complete (which is trivial
- * if the caller is on the CPU already). This establishes the
- * necessary total order on variable updates.
- *
- * Note that because a guest time update may take place
- * anytime after the setting of the VCPU's request bit, the
- * correct TSC value must be set before the request. However,
- * to ensure the update actually makes it to any guest which
- * starts running in hardware virtualization between the set
- * and the acquisition of the spinlock, we must also ping the
- * CPU after setting the request bit.
- *
- */
-
- if (val == CPUFREQ_PRECHANGE && freq->old > freq->new)
- return 0;
- if (val == CPUFREQ_POSTCHANGE && freq->old < freq->new)
- return 0;
-
- smp_call_function_single(freq->cpu, tsc_khz_changed, freq, 1);
-
- spin_lock(&kvm_lock);
- list_for_each_entry(kvm, &vm_list, vm_list) {
- kvm_for_each_vcpu(i, vcpu, kvm) {
- if (vcpu->cpu != freq->cpu)
- continue;
- kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
- if (vcpu->cpu != smp_processor_id())
- send_ipi = 1;
- }
- }
- spin_unlock(&kvm_lock);
-
- if (freq->old < freq->new && send_ipi) {
- /*
- * We upscale the frequency. Must make the guest
- * doesn't see old kvmclock values while running with
- * the new frequency, otherwise we risk the guest sees
- * time go backwards.
- *
- * In case we update the frequency for another cpu
- * (which might be in guest context) send an interrupt
- * to kick the cpu out of guest context. Next time
- * guest context is entered kvmclock will be updated,
- * so the guest will not see stale values.
- */
- smp_call_function_single(freq->cpu, tsc_khz_changed, freq, 1);
- }
- return 0;
-}
-
-static struct notifier_block kvmclock_cpufreq_notifier_block = {
- .notifier_call = kvmclock_cpufreq_notifier
-};
-
-static int kvmclock_cpu_online(unsigned int cpu)
-{
- tsc_khz_changed(NULL);
- return 0;
-}
-
-static void kvm_timer_init(void)
-{
- max_tsc_khz = tsc_khz;
-
- if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
-#ifdef CONFIG_CPU_FREQ
- struct cpufreq_policy policy;
- int cpu;
-
- memset(&policy, 0, sizeof(policy));
- cpu = get_cpu();
- cpufreq_get_policy(&policy, cpu);
- if (policy.cpuinfo.max_freq)
- max_tsc_khz = policy.cpuinfo.max_freq;
- put_cpu();
-#endif
- cpufreq_register_notifier(&kvmclock_cpufreq_notifier_block,
- CPUFREQ_TRANSITION_NOTIFIER);
- }
- pr_debug("kvm: max_tsc_khz = %ld\n", max_tsc_khz);
-
- cpuhp_setup_state(CPUHP_AP_X86_KVM_CLK_ONLINE, "AP_X86_KVM_CLK_ONLINE",
- kvmclock_cpu_online, kvmclock_cpu_down_prep);
-}
static DEFINE_PER_CPU(struct kvm_vcpu *, current_vcpu);
-int kvm_is_in_guest(void)
-{
- return __this_cpu_read(current_vcpu) != NULL;
-}
-
-static int kvm_is_user_mode(void)
-{
- int user_mode = 3;
-
- if (__this_cpu_read(current_vcpu))
- user_mode = kvm_x86_ops->get_cpl(__this_cpu_read(current_vcpu));
-
- return user_mode != 0;
-}
-
-static unsigned long kvm_get_guest_ip(void)
-{
- unsigned long ip = 0;
-
- if (__this_cpu_read(current_vcpu))
- ip = kvm_rip_read(__this_cpu_read(current_vcpu));
-
- return ip;
-}
-
-static struct perf_guest_info_callbacks kvm_guest_cbs = {
- .is_in_guest = kvm_is_in_guest,
- .is_user_mode = kvm_is_user_mode,
- .get_guest_ip = kvm_get_guest_ip,
-};
-
void kvm_before_handle_nmi(struct kvm_vcpu *vcpu)
{
__this_cpu_write(current_vcpu, vcpu);
}
-EXPORT_SYMBOL_GPL(kvm_before_handle_nmi);
void kvm_after_handle_nmi(struct kvm_vcpu *vcpu)
{
__this_cpu_write(current_vcpu, NULL);
}
-EXPORT_SYMBOL_GPL(kvm_after_handle_nmi);
static void kvm_set_mmio_spte_mask(void)
{
@@ -5852,53 +3879,9 @@ static void kvm_set_mmio_spte_mask(void)
kvm_mmu_set_mmio_spte_mask(mask);
}
-#ifdef CONFIG_X86_64
-static void pvclock_gtod_update_fn(struct work_struct *work)
-{
- struct kvm *kvm;
-
- struct kvm_vcpu *vcpu;
- int i;
-
- spin_lock(&kvm_lock);
- list_for_each_entry(kvm, &vm_list, vm_list)
- kvm_for_each_vcpu(i, vcpu, kvm)
- kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu);
- atomic_set(&kvm_guest_has_master_clock, 0);
- spin_unlock(&kvm_lock);
-}
-
-static DECLARE_WORK(pvclock_gtod_work, pvclock_gtod_update_fn);
-
-/*
- * Notification about pvclock gtod data update.
- */
-static int pvclock_gtod_notify(struct notifier_block *nb, unsigned long unused,
- void *priv)
-{
- struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
- struct timekeeper *tk = priv;
-
- update_pvclock_gtod(tk);
-
- /* disable master clock if host does not trust, or does not
- * use, TSC clocksource
- */
- if (gtod->clock.vclock_mode != VCLOCK_TSC &&
- atomic_read(&kvm_guest_has_master_clock) != 0)
- queue_work(system_long_wq, &pvclock_gtod_work);
-
- return 0;
-}
-
-static struct notifier_block pvclock_gtod_notifier = {
- .notifier_call = pvclock_gtod_notify,
-};
-#endif
-
int kvm_arch_init(void *opaque)
{
- int r;
+ int r = -EFAULT, i;
struct kvm_x86_ops *ops = opaque;
if (kvm_x86_ops) {
@@ -5918,17 +3901,6 @@ int kvm_arch_init(void *opaque)
goto out;
}
- r = -ENOMEM;
- shared_msrs = alloc_percpu(struct kvm_shared_msrs);
- if (!shared_msrs) {
- printk(KERN_ERR "kvm: failed to allocate percpu kvm_shared_msrs\n");
- goto out;
- }
-
- r = kvm_mmu_module_init();
- if (r)
- goto out_free_percpu;
-
kvm_set_mmio_spte_mask();
kvm_x86_ops = ops;
@@ -5936,79 +3908,46 @@ int kvm_arch_init(void *opaque)
kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK,
PT_DIRTY_MASK, PT64_NX_MASK, 0,
PT_PRESENT_MASK);
- kvm_timer_init();
-
- perf_register_guest_info_callbacks(&kvm_guest_cbs);
if (boot_cpu_has(X86_FEATURE_XSAVE))
host_xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
+ /* We have to move array initialization here since gcc's extension
+ * of array initialization is not supported here.
+ */
+ for (i = 0; i < XFEATURE_MAX; i++)
+ xstate_offsets[i] = xstate_sizes[i] = -1;
kvm_lapic_init();
-#ifdef CONFIG_X86_64
- pvclock_gtod_register_notifier(&pvclock_gtod_notifier);
-#endif
return 0;
-out_free_percpu:
- free_percpu(shared_msrs);
out:
return r;
}
void kvm_arch_exit(void)
{
- perf_unregister_guest_info_callbacks(&kvm_guest_cbs);
-
- if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
- cpufreq_unregister_notifier(&kvmclock_cpufreq_notifier_block,
- CPUFREQ_TRANSITION_NOTIFIER);
- cpuhp_remove_state_nocalls(CPUHP_AP_X86_KVM_CLK_ONLINE);
-#ifdef CONFIG_X86_64
- pvclock_gtod_unregister_notifier(&pvclock_gtod_notifier);
-#endif
kvm_x86_ops = NULL;
kvm_mmu_module_exit();
- free_percpu(shared_msrs);
}
int kvm_vcpu_halt(struct kvm_vcpu *vcpu)
{
++vcpu->stat.halt_exits;
if (lapic_in_kernel(vcpu)) {
- vcpu->arch.mp_state = KVM_MP_STATE_HALTED;
+ vcpu->arch.mp_state = GVM_MP_STATE_HALTED;
return 1;
} else {
- vcpu->run->exit_reason = KVM_EXIT_HLT;
+ vcpu->run->exit_reason = GVM_EXIT_HLT;
return 0;
}
}
-EXPORT_SYMBOL_GPL(kvm_vcpu_halt);
int kvm_emulate_halt(struct kvm_vcpu *vcpu)
{
kvm_x86_ops->skip_emulated_instruction(vcpu);
return kvm_vcpu_halt(vcpu);
}
-EXPORT_SYMBOL_GPL(kvm_emulate_halt);
-
-/*
- * kvm_pv_kick_cpu_op: Kick a vcpu.
- *
- * @apicid - apicid of vcpu to be kicked.
- */
-static void kvm_pv_kick_cpu_op(struct kvm *kvm, unsigned long flags, int apicid)
-{
- struct kvm_lapic_irq lapic_irq;
-
- lapic_irq.shorthand = 0;
- lapic_irq.dest_mode = 0;
- lapic_irq.dest_id = apicid;
- lapic_irq.msi_redir_hint = false;
-
- lapic_irq.delivery_mode = APIC_DM_REMRD;
- kvm_irq_delivery_to_apic(kvm, NULL, &lapic_irq, NULL);
-}
void kvm_vcpu_deactivate_apicv(struct kvm_vcpu *vcpu)
{
@@ -6016,70 +3955,6 @@ void kvm_vcpu_deactivate_apicv(struct kvm_vcpu *vcpu)
kvm_x86_ops->refresh_apicv_exec_ctrl(vcpu);
}
-int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
-{
- unsigned long nr, a0, a1, a2, a3, ret;
- int op_64_bit, r = 1;
-
- kvm_x86_ops->skip_emulated_instruction(vcpu);
-
- if (kvm_hv_hypercall_enabled(vcpu->kvm))
- return kvm_hv_hypercall(vcpu);
-
- nr = kvm_register_read(vcpu, VCPU_REGS_RAX);
- a0 = kvm_register_read(vcpu, VCPU_REGS_RBX);
- a1 = kvm_register_read(vcpu, VCPU_REGS_RCX);
- a2 = kvm_register_read(vcpu, VCPU_REGS_RDX);
- a3 = kvm_register_read(vcpu, VCPU_REGS_RSI);
-
- trace_kvm_hypercall(nr, a0, a1, a2, a3);
-
- op_64_bit = is_64_bit_mode(vcpu);
- if (!op_64_bit) {
- nr &= 0xFFFFFFFF;
- a0 &= 0xFFFFFFFF;
- a1 &= 0xFFFFFFFF;
- a2 &= 0xFFFFFFFF;
- a3 &= 0xFFFFFFFF;
- }
-
- if (kvm_x86_ops->get_cpl(vcpu) != 0) {
- ret = -KVM_EPERM;
- goto out;
- }
-
- switch (nr) {
- case KVM_HC_VAPIC_POLL_IRQ:
- ret = 0;
- break;
- case KVM_HC_KICK_CPU:
- kvm_pv_kick_cpu_op(vcpu->kvm, a0, a1);
- ret = 0;
- break;
- default:
- ret = -KVM_ENOSYS;
- break;
- }
-out:
- if (!op_64_bit)
- ret = (u32)ret;
- kvm_register_write(vcpu, VCPU_REGS_RAX, ret);
- ++vcpu->stat.hypercalls;
- return r;
-}
-EXPORT_SYMBOL_GPL(kvm_emulate_hypercall);
-
-static int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt)
-{
- struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
- char instruction[3];
- unsigned long rip = kvm_rip_read(vcpu);
-
- kvm_x86_ops->patch_hypercall(vcpu, instruction);
-
- return emulator_write_emulated(ctxt, rip, instruction, 3, NULL);
-}
-
static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu)
{
return vcpu->run->request_interrupt_window &&
@@ -6091,7 +3966,7 @@ static void post_kvm_run_save(struct kvm_vcpu *vcpu)
struct kvm_run *kvm_run = vcpu->run;
kvm_run->if_flag = (kvm_get_rflags(vcpu) & X86_EFLAGS_IF) != 0;
- kvm_run->flags = is_smm(vcpu) ? KVM_RUN_X86_SMM : 0;
+ kvm_run->flags = is_smm(vcpu) ? GVM_RUN_X86_SMM : 0;
kvm_run->cr8 = kvm_get_cr8(vcpu);
kvm_run->apic_base = kvm_get_apic_base(vcpu);
kvm_run->ready_for_interrupt_injection =
@@ -6131,10 +4006,6 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win)
/* try to reinject previous events if any */
if (vcpu->arch.exception.pending) {
- trace_kvm_inj_exception(vcpu->arch.exception.nr,
- vcpu->arch.exception.has_error_code,
- vcpu->arch.exception.error_code);
-
if (exception_type(vcpu->arch.exception.nr) == EXCPT_FAULT)
__kvm_set_rflags(vcpu, kvm_get_rflags(vcpu) |
X86_EFLAGS_RF);
@@ -6182,7 +4053,7 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win)
* calling check_nested_events again here to avoid a race condition.
* See https://lkml.org/lkml/2014/7/2/60 for discussion about this
* proposal and current concerns. Perhaps we should be setting
- * KVM_REQ_EVENT only on certain events and not unconditionally?
+ * GVM_REQ_EVENT only on certain events and not unconditionally?
*/
if (is_guest_mode(vcpu) && kvm_x86_ops->check_nested_events) {
r = kvm_x86_ops->check_nested_events(vcpu, req_int_win);
@@ -6213,7 +4084,7 @@ static void process_nmi(struct kvm_vcpu *vcpu)
vcpu->arch.nmi_pending += atomic_xchg(&vcpu->arch.nmi_queued, 0);
vcpu->arch.nmi_pending = min(vcpu->arch.nmi_pending, limit);
- kvm_make_request(KVM_REQ_EVENT, vcpu);
+ kvm_make_request(GVM_REQ_EVENT, vcpu);
}
#define put_smstate(type, buf, offset, val) \
@@ -6273,7 +4144,7 @@ static void enter_smm_save_state_32(struct kvm_vcpu *vcpu, char *buf)
{
struct desc_ptr dt;
struct kvm_segment seg;
- unsigned long val;
+ size_t val;
int i;
put_smstate(u32, buf, 0x7ffc, kvm_read_cr0(vcpu));
@@ -6324,7 +4195,7 @@ static void enter_smm_save_state_64(struct kvm_vcpu *vcpu, char *buf)
#ifdef CONFIG_X86_64
struct desc_ptr dt;
struct kvm_segment seg;
- unsigned long val;
+ size_t val;
int i;
for (i = 0; i < 16; i++)
@@ -6383,7 +4254,6 @@ static void enter_smm(struct kvm_vcpu *vcpu)
char buf[512];
u32 cr0;
- trace_kvm_enter_smm(vcpu->vcpu_id, vcpu->arch.smbase, true);
vcpu->arch.hflags |= HF_SMM_MASK;
memset(buf, 0, 512);
if (guest_cpuid_has_longmode(vcpu))
@@ -6448,12 +4318,12 @@ static void enter_smm(struct kvm_vcpu *vcpu)
static void process_smi(struct kvm_vcpu *vcpu)
{
vcpu->arch.smi_pending = true;
- kvm_make_request(KVM_REQ_EVENT, vcpu);
+ kvm_make_request(GVM_REQ_EVENT, vcpu);
}
void kvm_make_scan_ioapic_request(struct kvm *kvm)
{
- kvm_make_all_cpus_request(kvm, KVM_REQ_SCAN_IOAPIC);
+ kvm_make_all_cpus_request(kvm, GVM_REQ_SCAN_IOAPIC);
}
static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu)
@@ -6465,15 +4335,8 @@ static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu)
bitmap_zero(vcpu->arch.ioapic_handled_vectors, 256);
- if (irqchip_split(vcpu->kvm))
- kvm_scan_ioapic_routes(vcpu, vcpu->arch.ioapic_handled_vectors);
- else {
- if (vcpu->arch.apicv_active)
- kvm_x86_ops->sync_pir_to_irr(vcpu);
- kvm_ioapic_scan_entry(vcpu, vcpu->arch.ioapic_handled_vectors);
- }
- bitmap_or((ulong *)eoi_exit_bitmap, vcpu->arch.ioapic_handled_vectors,
- vcpu_to_synic(vcpu)->vec_bitmap, 256);
+ kvm_ioapic_scan_entry(vcpu, vcpu->arch.ioapic_handled_vectors);
+ bitmap_copy((ulong *)eoi_exit_bitmap, vcpu->arch.ioapic_handled_vectors, 256);
kvm_x86_ops->load_eoi_exitmap(vcpu, eoi_exit_bitmap);
}
@@ -6485,7 +4348,7 @@ static void kvm_vcpu_flush_tlb(struct kvm_vcpu *vcpu)
void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu)
{
- struct page *page = NULL;
+ pfn_t pfn = 0;
if (!lapic_in_kernel(vcpu))
return;
@@ -6493,29 +4356,128 @@ void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu)
if (!kvm_x86_ops->set_apic_access_page_addr)
return;
- page = gfn_to_page(vcpu->kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT);
- if (is_error_page(page))
+ pfn = gfn_to_pfn(vcpu->kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT);
+ if (is_error_noslot_pfn(pfn))
return;
- kvm_x86_ops->set_apic_access_page_addr(vcpu, page_to_phys(page));
+ kvm_x86_ops->set_apic_access_page_addr(vcpu, pfn << PAGE_SHIFT);
/*
* Do not pin apic access page in memory, the MMU notifier
* will call us again if it is migrated or swapped out.
*/
- put_page(page);
+ //put_page(page);
}
-EXPORT_SYMBOL_GPL(kvm_vcpu_reload_apic_access_page);
void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm,
- unsigned long address)
+ size_t address)
{
/*
* The physical address of apic access page is stored in the VMCS.
* Update it when it becomes invalid.
*/
if (address == gfn_to_hva(kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT))
- kvm_make_all_cpus_request(kvm, KVM_REQ_APIC_PAGE_RELOAD);
+ kvm_make_all_cpus_request(kvm, GVM_REQ_APIC_PAGE_RELOAD);
+}
+
+//#define HOST_STAT_DEBUG
+/*
+ * A useful tool to check whether host state remains the same across
+ * host->guest->host switches. In theory, host state should be saved/restored
+ * only when it is subject to change. However, without souce code and
+ * document, you never know. When something goes terribly wrong, this tool
+ * can help check whether it is caused by incomplete host stat restore.
+ */
+#ifdef HOST_STAT_DEBUG
+#include <intrin.h>
+struct host_stat {
+ struct desc_ptr gdt;
+ struct desc_ptr idt;
+ u16 cs_sel;
+ u16 ss_sel;
+ u16 ds_sel;
+ u16 es_sel;
+ u16 fs_sel;
+ u16 gs_sel;
+ u16 ldt_sel;
+ u16 tr_sel;
+ struct desc_struct cs;
+ struct desc_struct ss;
+ struct desc_struct ds;
+ struct desc_struct es;
+ struct desc_struct fs;
+ struct desc_struct gs;
+ struct desc_struct ldt;
+ struct desc_struct tr;
+ u64 fs_base;
+ u64 gs_base;
+ u64 kernel_gs_base;
+ u64 cr0;
+ u64 cr2;
+ u64 cr3;
+ u64 cr4;
+ u64 cr8;
+ u64 efer;
+ u64 star;
+ u64 lstar;
+ u64 cstar;
+ u64 sf_mask;
+ u64 sysenter_cs;
+ u64 sysenter_eip;
+ u64 sysenter_esp;
+};
+
+static void save_host_stat_full(struct host_stat *hs)
+{
+ struct desc_struct *gdt;
+
+ _sgdt(&hs->gdt);
+ __sidt(&hs->idt);
+
+ savesegment(cs, hs->ds_sel);
+ savesegment(ss, hs->ds_sel);
+ savesegment(ds, hs->ds_sel);
+ savesegment(es, hs->es_sel);
+ savesegment(fs, hs->fs_sel);
+ savesegment(gs, hs->gs_sel);
+ hs->ldt_sel = gvm_read_ldt();
+ hs->tr_sel = gvm_read_tr();
+
+ gdt = (struct desc_struct *)hs->gdt.address;
+ hs->cs = gdt[hs->cs_sel >> 3];
+ hs->ss = gdt[hs->ss_sel >> 3];
+ hs->ds = gdt[hs->ds_sel >> 3];
+ hs->es = gdt[hs->es_sel >> 3];
+ hs->fs = gdt[hs->fs_sel >> 3];
+ hs->gs = gdt[hs->gs_sel >> 3];
+ hs->ldt = gdt[hs->ldt_sel >> 3];
+ hs->tr = gdt[hs->tr_sel >> 3];
+
+ hs->fs_base = __readmsr(MSR_FS_BASE);
+ hs->gs_base = __readmsr(MSR_GS_BASE);
+ hs->kernel_gs_base = __readmsr(MSR_KERNEL_GS_BASE);
+
+ hs->cr0 = __readcr0();
+ hs->cr2 = __readcr2();
+ hs->cr3 = __readcr3();
+ hs->cr4 = __readcr4();
+ hs->cr8 = __readcr8();
+
+ hs->efer = __readmsr(MSR_EFER);
+ hs->star = __readmsr(MSR_STAR);
+ hs->lstar = __readmsr(MSR_LSTAR);
+ hs->cstar = __readmsr(MSR_CSTAR);
+ hs->sf_mask = __readmsr(MSR_SYSCALL_MASK);
+
+ hs->sysenter_cs = __readmsr(MSR_IA32_SYSENTER_CS);
+ hs->sysenter_eip = __readmsr(MSR_IA32_SYSENTER_EIP);
+ hs->sysenter_esp = __readmsr(MSR_IA32_SYSENTER_ESP);
+}
+
+static int check_host_stat(struct host_stat *a, struct host_stat *b)
+{
+ return 0;
}
+#endif
/*
* Returns 1 to let vcpu_run() continue the guest execution loop without
@@ -6530,100 +4492,46 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
kvm_cpu_accept_dm_intr(vcpu);
bool req_immediate_exit = false;
+#ifdef HOST_STAT_DEBUG
+ struct host_stat *enter = kzalloc(sizeof(struct host_stat), GFP_KERNEL);
+ struct host_stat *exit = kzalloc(sizeof(struct host_stat), GFP_KERNEL);
+#endif
if (vcpu->requests) {
- if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu))
+ if (kvm_check_request(GVM_REQ_MMU_RELOAD, vcpu))
kvm_mmu_unload(vcpu);
- if (kvm_check_request(KVM_REQ_MIGRATE_TIMER, vcpu))
- __kvm_migrate_timers(vcpu);
- if (kvm_check_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu))
- kvm_gen_update_masterclock(vcpu->kvm);
- if (kvm_check_request(KVM_REQ_GLOBAL_CLOCK_UPDATE, vcpu))
- kvm_gen_kvmclock_update(vcpu);
- if (kvm_check_request(KVM_REQ_CLOCK_UPDATE, vcpu)) {
- r = kvm_guest_time_update(vcpu);
- if (unlikely(r))
- goto out;
- }
- if (kvm_check_request(KVM_REQ_MMU_SYNC, vcpu))
+ if (kvm_check_request(GVM_REQ_MMU_SYNC, vcpu))
kvm_mmu_sync_roots(vcpu);
- if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu))
+ if (kvm_check_request(GVM_REQ_TLB_FLUSH, vcpu))
kvm_vcpu_flush_tlb(vcpu);
- if (kvm_check_request(KVM_REQ_REPORT_TPR_ACCESS, vcpu)) {
- vcpu->run->exit_reason = KVM_EXIT_TPR_ACCESS;
+ if (kvm_check_request(GVM_REQ_REPORT_TPR_ACCESS, vcpu)) {
+ vcpu->run->exit_reason = GVM_EXIT_TPR_ACCESS;
r = 0;
goto out;
}
- if (kvm_check_request(KVM_REQ_TRIPLE_FAULT, vcpu)) {
- vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN;
+ if (kvm_check_request(GVM_REQ_TRIPLE_FAULT, vcpu)) {
+ vcpu->run->exit_reason = GVM_EXIT_SHUTDOWN;
r = 0;
goto out;
}
- if (kvm_check_request(KVM_REQ_DEACTIVATE_FPU, vcpu)) {
- vcpu->fpu_active = 0;
- kvm_x86_ops->fpu_deactivate(vcpu);
- }
- if (kvm_check_request(KVM_REQ_APF_HALT, vcpu)) {
- /* Page is swapped out. Do synthetic halt */
- vcpu->arch.apf.halted = true;
- r = 1;
- goto out;
- }
- if (kvm_check_request(KVM_REQ_STEAL_UPDATE, vcpu))
- record_steal_time(vcpu);
- if (kvm_check_request(KVM_REQ_SMI, vcpu))
+ if (kvm_check_request(GVM_REQ_SMI, vcpu))
process_smi(vcpu);
- if (kvm_check_request(KVM_REQ_NMI, vcpu))
+ if (kvm_check_request(GVM_REQ_NMI, vcpu))
process_nmi(vcpu);
- if (kvm_check_request(KVM_REQ_PMU, vcpu))
+#if 0
+ if (kvm_check_request(GVM_REQ_PMU, vcpu))
kvm_pmu_handle_event(vcpu);
- if (kvm_check_request(KVM_REQ_PMI, vcpu))
+ if (kvm_check_request(GVM_REQ_PMI, vcpu))
kvm_pmu_deliver_pmi(vcpu);
- if (kvm_check_request(KVM_REQ_IOAPIC_EOI_EXIT, vcpu)) {
- BUG_ON(vcpu->arch.pending_ioapic_eoi > 255);
- if (test_bit(vcpu->arch.pending_ioapic_eoi,
- vcpu->arch.ioapic_handled_vectors)) {
- vcpu->run->exit_reason = KVM_EXIT_IOAPIC_EOI;
- vcpu->run->eoi.vector =
- vcpu->arch.pending_ioapic_eoi;
- r = 0;
- goto out;
- }
- }
- if (kvm_check_request(KVM_REQ_SCAN_IOAPIC, vcpu))
+#endif
+ if (kvm_check_request(GVM_REQ_SCAN_IOAPIC, vcpu))
vcpu_scan_ioapic(vcpu);
- if (kvm_check_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu))
+ if (kvm_check_request(GVM_REQ_APIC_PAGE_RELOAD, vcpu))
kvm_vcpu_reload_apic_access_page(vcpu);
- if (kvm_check_request(KVM_REQ_HV_CRASH, vcpu)) {
- vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT;
- vcpu->run->system_event.type = KVM_SYSTEM_EVENT_CRASH;
- r = 0;
- goto out;
- }
- if (kvm_check_request(KVM_REQ_HV_RESET, vcpu)) {
- vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT;
- vcpu->run->system_event.type = KVM_SYSTEM_EVENT_RESET;
- r = 0;
- goto out;
- }
- if (kvm_check_request(KVM_REQ_HV_EXIT, vcpu)) {
- vcpu->run->exit_reason = KVM_EXIT_HYPERV;
- vcpu->run->hyperv = vcpu->arch.hyperv.exit;
- r = 0;
- goto out;
- }
-
- /*
- * KVM_REQ_HV_STIMER has to be processed after
- * KVM_REQ_CLOCK_UPDATE, because Hyper-V SynIC timers
- * depend on the guest clock being up-to-date
- */
- if (kvm_check_request(KVM_REQ_HV_STIMER, vcpu))
- kvm_hv_process_stimers(vcpu);
}
/*
- * KVM_REQ_EVENT is not set when posted interrupts are set by
+ * GVM_REQ_EVENT is not set when posted interrupts are set by
* VT-d hardware, so we have to update RVI unconditionally.
*/
if (kvm_lapic_enabled(vcpu)) {
@@ -6636,9 +4544,9 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
kvm_lapic_find_highest_irr(vcpu));
}
- if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) {
+ if (kvm_check_request(GVM_REQ_EVENT, vcpu) || req_int_win) {
kvm_apic_accept_events(vcpu);
- if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) {
+ if (vcpu->arch.mp_state == GVM_MP_STATE_INIT_RECEIVED) {
r = 1;
goto out;
}
@@ -6674,14 +4582,15 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
goto cancel_injection;
}
- preempt_disable();
+ srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
- kvm_x86_ops->prepare_guest_switch(vcpu);
- if (vcpu->fpu_active)
- kvm_load_guest_fpu(vcpu);
+ local_irq_disable();
+#ifdef HOST_STAT_DEBUG
+ save_host_stat_full(enter);
+#endif
+ kvm_x86_ops->save_host_state(vcpu);
vcpu->mode = IN_GUEST_MODE;
-
- srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
+ vcpu->cpu = smp_processor_id();
/*
* We should set ->mode before check ->requests,
@@ -6690,16 +4599,13 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
* to the page tables done while the VCPU is running.
* Please see the comment in kvm_flush_remote_tlbs.
*/
- smp_mb__after_srcu_read_unlock();
+ smp_mb();
- local_irq_disable();
-
- if (vcpu->mode == EXITING_GUEST_MODE || vcpu->requests
- || need_resched() || signal_pending(current)) {
+ if (vcpu->mode == EXITING_GUEST_MODE || vcpu->requests) {
vcpu->mode = OUTSIDE_GUEST_MODE;
smp_wmb();
+ kvm_x86_ops->load_host_state(vcpu);
local_irq_enable();
- preempt_enable();
vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
r = 1;
goto cancel_injection;
@@ -6708,14 +4614,10 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
kvm_load_guest_xcr0(vcpu);
if (req_immediate_exit) {
- kvm_make_request(KVM_REQ_EVENT, vcpu);
+ kvm_make_request(GVM_REQ_EVENT, vcpu);
smp_send_reschedule(vcpu->cpu);
}
- trace_kvm_entry(vcpu->vcpu_id);
- wait_lapic_expire(vcpu);
- guest_enter_irqoff();
-
if (unlikely(vcpu->arch.switch_db_regs)) {
set_debugreg(0, 7);
set_debugreg(vcpu->arch.eff_db[0], 0);
@@ -6723,26 +4625,29 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
set_debugreg(vcpu->arch.eff_db[2], 2);
set_debugreg(vcpu->arch.eff_db[3], 3);
set_debugreg(vcpu->arch.dr6, 6);
- vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_RELOAD;
+ vcpu->arch.switch_db_regs &= ~GVM_DEBUGREG_RELOAD;
}
+ kvm_load_guest_fpu(vcpu);
+
kvm_x86_ops->run(vcpu);
/*
* Do this here before restoring debug registers on the host. And
* since we do this before handling the vmexit, a DR access vmexit
* can (a) read the correct value of the debug registers, (b) set
- * KVM_DEBUGREG_WONT_EXIT again.
+ * GVM_DEBUGREG_WONT_EXIT again.
*/
- if (unlikely(vcpu->arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT)) {
- WARN_ON(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP);
+ if (unlikely(vcpu->arch.switch_db_regs & GVM_DEBUGREG_WONT_EXIT)) {
+ WARN_ON(vcpu->guest_debug & GVM_GUESTDBG_USE_HW_BP);
kvm_x86_ops->sync_dirty_debug_regs(vcpu);
kvm_update_dr0123(vcpu);
kvm_update_dr6(vcpu);
kvm_update_dr7(vcpu);
- vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_RELOAD;
+ vcpu->arch.switch_db_regs &= ~GVM_DEBUGREG_RELOAD;
}
+#if 0
/*
* If the guest has used debug registers, at least dr7
* will be disabled while returning to the host.
@@ -6752,36 +4657,34 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
*/
if (hw_breakpoint_active())
hw_breakpoint_restore();
+#endif
vcpu->arch.last_guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
+ kvm_save_guest_fpu(vcpu);
+
+ //Set CPU to -1 since we don't know when we got scheduled to another
+ //cpu by Windows scheduler.
+ vcpu->cpu = -1;
vcpu->mode = OUTSIDE_GUEST_MODE;
smp_wmb();
kvm_put_guest_xcr0(vcpu);
+ kvm_x86_ops->load_host_state(vcpu);
+ kvm_x86_ops->vcpu_put(vcpu);
+#ifdef HOST_STAT_DEBUG
+ save_host_stat_full(exit);
+ BUG_ON(check_host_stat(enter, exit));
+#endif
kvm_x86_ops->handle_external_intr(vcpu);
++vcpu->stat.exits;
- guest_exit_irqoff();
-
local_irq_enable();
- preempt_enable();
vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
- /*
- * Profile KVM exit RIPs:
- */
- if (unlikely(prof_on == KVM_PROFILING)) {
- unsigned long rip = kvm_rip_read(vcpu);
- profile_hit(KVM_PROFILING, (void *)rip);
- }
-
- if (unlikely(vcpu->arch.tsc_always_catchup))
- kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
-
if (vcpu->arch.apic_attention)
kvm_lapic_sync_from_vapic(vcpu);
@@ -6790,7 +4693,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
cancel_injection:
kvm_x86_ops->cancel_injection(vcpu);
- if (unlikely(vcpu->arch.apic_attention))
+ if ((vcpu->arch.apic_attention))
kvm_lapic_sync_from_vapic(vcpu);
out:
return r;
@@ -6798,29 +4701,23 @@ out:
static inline int vcpu_block(struct kvm *kvm, struct kvm_vcpu *vcpu)
{
- if (!kvm_arch_vcpu_runnable(vcpu) &&
- (!kvm_x86_ops->pre_block || kvm_x86_ops->pre_block(vcpu) == 0)) {
+ if (!kvm_arch_vcpu_runnable(vcpu)) {
srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
kvm_vcpu_block(vcpu);
vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
- if (kvm_x86_ops->post_block)
- kvm_x86_ops->post_block(vcpu);
-
- if (!kvm_check_request(KVM_REQ_UNHALT, vcpu))
+ if (!kvm_check_request(GVM_REQ_UNHALT, vcpu))
return 1;
}
kvm_apic_accept_events(vcpu);
switch(vcpu->arch.mp_state) {
- case KVM_MP_STATE_HALTED:
- vcpu->arch.pv.pv_unhalted = false;
+ case GVM_MP_STATE_HALTED:
vcpu->arch.mp_state =
- KVM_MP_STATE_RUNNABLE;
- case KVM_MP_STATE_RUNNABLE:
- vcpu->arch.apf.halted = false;
+ GVM_MP_STATE_RUNNABLE;
+ case GVM_MP_STATE_RUNNABLE:
break;
- case KVM_MP_STATE_INIT_RECEIVED:
+ case GVM_MP_STATE_INIT_RECEIVED:
break;
default:
return -EINTR;
@@ -6831,8 +4728,7 @@ static inline int vcpu_block(struct kvm *kvm, struct kvm_vcpu *vcpu)
static inline bool kvm_vcpu_running(struct kvm_vcpu *vcpu)
{
- return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE &&
- !vcpu->arch.apf.halted);
+ return (vcpu->arch.mp_state == GVM_MP_STATE_RUNNABLE);
}
static int vcpu_run(struct kvm_vcpu *vcpu)
@@ -6852,31 +4748,21 @@ static int vcpu_run(struct kvm_vcpu *vcpu)
if (r <= 0)
break;
- clear_bit(KVM_REQ_PENDING_TIMER, &vcpu->requests);
+ clear_bit(GVM_REQ_PENDING_TIMER, &vcpu->requests);
if (kvm_cpu_has_pending_timer(vcpu))
kvm_inject_pending_timer_irqs(vcpu);
if (dm_request_for_irq_injection(vcpu) &&
kvm_vcpu_ready_for_interrupt_injection(vcpu)) {
r = 0;
- vcpu->run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN;
+ vcpu->run->exit_reason = GVM_EXIT_IRQ_WINDOW_OPEN;
++vcpu->stat.request_irq_exits;
break;
}
-
- kvm_check_async_pf_completion(vcpu);
-
- if (signal_pending(current)) {
- r = -EINTR;
- vcpu->run->exit_reason = KVM_EXIT_INTR;
- ++vcpu->stat.signal_exits;
+ if (test_and_clear_bit(0, (size_t *)&vcpu->run->user_event_pending)) {
+ vcpu->run->exit_reason = GVM_EXIT_INTR;
break;
}
- if (need_resched()) {
- srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
- cond_resched();
- vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
- }
}
srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
@@ -6925,6 +4811,7 @@ static int complete_emulated_mmio(struct kvm_vcpu *vcpu)
struct kvm_run *run = vcpu->run;
struct kvm_mmio_fragment *frag;
unsigned len;
+ char *__data;
BUG_ON(!vcpu->mmio_needed);
@@ -6940,7 +4827,9 @@ static int complete_emulated_mmio(struct kvm_vcpu *vcpu)
vcpu->mmio_cur_fragment++;
} else {
/* Go forward to the next mmio piece. */
- frag->data += len;
+ __data = frag->data;
+ __data += len;
+ frag->data = __data;
frag->gpa += len;
frag->len -= len;
}
@@ -6955,7 +4844,7 @@ static int complete_emulated_mmio(struct kvm_vcpu *vcpu)
return complete_emulated_io(vcpu);
}
- run->exit_reason = KVM_EXIT_MMIO;
+ run->exit_reason = GVM_EXIT_MMIO;
run->mmio.phys_addr = frag->gpa;
if (vcpu->mmio_is_write)
memcpy(run->mmio.data, frag->data, min(8u, frag->len));
@@ -6968,19 +4857,12 @@ static int complete_emulated_mmio(struct kvm_vcpu *vcpu)
int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
{
- struct fpu *fpu = &current->thread.fpu;
int r;
- sigset_t sigsaved;
-
- fpu__activate_curr(fpu);
- if (vcpu->sigset_active)
- sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
-
- if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) {
+ if (unlikely(vcpu->arch.mp_state == GVM_MP_STATE_UNINITIALIZED)) {
kvm_vcpu_block(vcpu);
kvm_apic_accept_events(vcpu);
- clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
+ clear_bit(GVM_REQ_UNHALT, &vcpu->requests);
r = -EAGAIN;
goto out;
}
@@ -7006,9 +4888,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
out:
post_kvm_run_save(vcpu);
- if (vcpu->sigset_active)
- sigprocmask(SIG_SETMASK, &sigsaved, NULL);
-
return r;
}
@@ -7079,7 +4958,7 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
vcpu->arch.exception.pending = false;
- kvm_make_request(KVM_REQ_EVENT, vcpu);
+ kvm_make_request(GVM_REQ_EVENT, vcpu);
return 0;
}
@@ -7092,7 +4971,6 @@ void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l)
*db = cs.db;
*l = cs.l;
}
-EXPORT_SYMBOL_GPL(kvm_get_cs_db_l_bits);
int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
struct kvm_sregs *sregs)
@@ -7128,7 +5006,7 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
if (vcpu->arch.interrupt.pending && !vcpu->arch.interrupt.soft)
set_bit(vcpu->arch.interrupt.nr,
- (unsigned long *)sregs->interrupt_bitmap);
+ (size_t *)sregs->interrupt_bitmap);
return 0;
}
@@ -7137,11 +5015,7 @@ int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
struct kvm_mp_state *mp_state)
{
kvm_apic_accept_events(vcpu);
- if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED &&
- vcpu->arch.pv.pv_unhalted)
- mp_state->mp_state = KVM_MP_STATE_RUNNABLE;
- else
- mp_state->mp_state = vcpu->arch.mp_state;
+ mp_state->mp_state = vcpu->arch.mp_state;
return 0;
}
@@ -7150,15 +5024,15 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
struct kvm_mp_state *mp_state)
{
if (!lapic_in_kernel(vcpu) &&
- mp_state->mp_state != KVM_MP_STATE_RUNNABLE)
+ mp_state->mp_state != GVM_MP_STATE_RUNNABLE)
return -EINVAL;
- if (mp_state->mp_state == KVM_MP_STATE_SIPI_RECEIVED) {
- vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED;
- set_bit(KVM_APIC_SIPI, &vcpu->arch.apic->pending_events);
+ if (mp_state->mp_state == GVM_MP_STATE_SIPI_RECEIVED) {
+ vcpu->arch.mp_state = GVM_MP_STATE_INIT_RECEIVED;
+ set_bit(GVM_APIC_SIPI, &vcpu->arch.apic->pending_events);
} else
vcpu->arch.mp_state = mp_state->mp_state;
- kvm_make_request(KVM_REQ_EVENT, vcpu);
+ kvm_make_request(GVM_REQ_EVENT, vcpu);
return 0;
}
@@ -7178,10 +5052,9 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index,
kvm_rip_write(vcpu, ctxt->eip);
kvm_set_rflags(vcpu, ctxt->eflags);
- kvm_make_request(KVM_REQ_EVENT, vcpu);
+ kvm_make_request(GVM_REQ_EVENT, vcpu);
return EMULATE_DONE;
}
-EXPORT_SYMBOL_GPL(kvm_task_switch);
int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
struct kvm_sregs *sregs)
@@ -7233,9 +5106,9 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
if (mmu_reset_needed)
kvm_mmu_reset_context(vcpu);
- max_bits = KVM_NR_INTERRUPTS;
+ max_bits = GVM_NR_INTERRUPTS;
pending_vec = find_first_bit(
- (const unsigned long *)sregs->interrupt_bitmap, max_bits);
+ (const size_t *)sregs->interrupt_bitmap, max_bits);
if (pending_vec < max_bits) {
kvm_queue_interrupt(vcpu, pending_vec, false);
pr_debug("Set back pending irq %d\n", pending_vec);
@@ -7257,9 +5130,9 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
if (kvm_vcpu_is_bsp(vcpu) && kvm_rip_read(vcpu) == 0xfff0 &&
sregs->cs.selector == 0xf000 && sregs->cs.base == 0xffff0000 &&
!is_protmode(vcpu))
- vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
+ vcpu->arch.mp_state = GVM_MP_STATE_RUNNABLE;
- kvm_make_request(KVM_REQ_EVENT, vcpu);
+ kvm_make_request(GVM_REQ_EVENT, vcpu);
return 0;
}
@@ -7267,14 +5140,14 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
struct kvm_guest_debug *dbg)
{
- unsigned long rflags;
+ size_t rflags;
int i, r;
- if (dbg->control & (KVM_GUESTDBG_INJECT_DB | KVM_GUESTDBG_INJECT_BP)) {
+ if (dbg->control & (GVM_GUESTDBG_INJECT_DB | GVM_GUESTDBG_INJECT_BP)) {
r = -EBUSY;
if (vcpu->arch.exception.pending)
goto out;
- if (dbg->control & KVM_GUESTDBG_INJECT_DB)
+ if (dbg->control & GVM_GUESTDBG_INJECT_DB)
kvm_queue_exception(vcpu, DB_VECTOR);
else
kvm_queue_exception(vcpu, BP_VECTOR);
@@ -7287,20 +5160,20 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
rflags = kvm_get_rflags(vcpu);
vcpu->guest_debug = dbg->control;
- if (!(vcpu->guest_debug & KVM_GUESTDBG_ENABLE))
+ if (!(vcpu->guest_debug & GVM_GUESTDBG_ENABLE))
vcpu->guest_debug = 0;
- if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) {
- for (i = 0; i < KVM_NR_DB_REGS; ++i)
+ if (vcpu->guest_debug & GVM_GUESTDBG_USE_HW_BP) {
+ for (i = 0; i < GVM_NR_DB_REGS; ++i)
vcpu->arch.eff_db[i] = dbg->arch.debugreg[i];
vcpu->arch.guest_debug_dr7 = dbg->arch.debugreg[7];
} else {
- for (i = 0; i < KVM_NR_DB_REGS; i++)
+ for (i = 0; i < GVM_NR_DB_REGS; i++)
vcpu->arch.eff_db[i] = vcpu->arch.db[i];
}
kvm_update_dr7(vcpu);
- if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
+ if (vcpu->guest_debug & GVM_GUESTDBG_SINGLESTEP)
vcpu->arch.singlestep_rip = kvm_rip_read(vcpu) +
get_segment_base(vcpu, VCPU_SREG_CS);
@@ -7325,7 +5198,7 @@ out:
int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
struct kvm_translation *tr)
{
- unsigned long vaddr = tr->linear_address;
+ size_t vaddr = tr->linear_address;
gpa_t gpa;
int idx;
@@ -7343,7 +5216,7 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
{
struct fxregs_state *fxsave =
- &vcpu->arch.guest_fpu.state.fxsave;
+ &vcpu->arch.guest_fpu.fxsave;
memcpy(fpu->fpr, fxsave->st_space, 128);
fpu->fcw = fxsave->cwd;
@@ -7360,7 +5233,7 @@ int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
{
struct fxregs_state *fxsave =
- &vcpu->arch.guest_fpu.state.fxsave;
+ &vcpu->arch.guest_fpu.fxsave;
memcpy(fxsave->st_space, fpu->fpr, 128);
fxsave->cwd = fpu->fcw;
@@ -7374,11 +5247,28 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
return 0;
}
+static inline void fpstate_init_fxstate(struct fxregs_state *fx)
+{
+ fx->cwd = 0x37f;
+ fx->mxcsr = 0x1f80;
+}
+
+static void fpstate_init(union fpu_state *state)
+{
+ memset(state, 0, PAGE_SIZE);
+
+#if 0
+ if (static_cpu_has(X86_FEATURE_XSAVES))
+ fpstate_init_xstate(&state->xsave);
+#endif
+ fpstate_init_fxstate(&state->fxsave);
+}
+
static void fx_init(struct kvm_vcpu *vcpu)
{
- fpstate_init(&vcpu->arch.guest_fpu.state);
+ fpstate_init(&vcpu->arch.guest_fpu);
if (boot_cpu_has(X86_FEATURE_XSAVES))
- vcpu->arch.guest_fpu.state.xsave.header.xcomp_bv =
+ vcpu->arch.guest_fpu.xsave.header.xcomp_bv =
host_xcr0 | XSTATE_COMPACTION_ENABLED;
/*
@@ -7389,54 +5279,78 @@ static void fx_init(struct kvm_vcpu *vcpu)
vcpu->arch.cr0 |= X86_CR0_ET;
}
-void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
+/*
+ * These must be called with preempt disabled. Returns
+ * 'true' if the FPU state is still intact and we can
+ * keep registers active.
+ *
+ * The legacy FNSAVE instruction cleared all FPU state
+ * unconditionally, so registers are essentially destroyed.
+ * Modern FPU state can be kept in registers, if there are
+ * no pending FP exceptions.
+ */
+static inline void fpu_fxsave(union fpu_state *fpu)
{
- if (vcpu->guest_fpu_loaded)
- return;
+#if 0
+ if (likely(use_xsave())) {
+ copy_xregs_to_kernel(&fpu->state.xsave);
+ }
+#endif
- /*
- * Restore all possible states in the guest,
- * and assume host would use all available bits.
- * Guest xcr0 would be loaded later.
- */
- vcpu->guest_fpu_loaded = 1;
- __kernel_fpu_begin();
- __copy_kernel_to_fpregs(&vcpu->arch.guest_fpu.state);
- trace_kvm_fpu(1);
+#ifdef _WIN64
+ _fxsave64(&fpu->fxsave);
+#else
+ _fxsave(&fpu->fxsave);
+#endif
}
-void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
+static inline void fpu_fxstore(union fpu_state *fpu)
{
- if (!vcpu->guest_fpu_loaded) {
- vcpu->fpu_counter = 0;
+#if 0
+ if (use_xsave()) {
+ copy_kernel_to_xregs(&fpstate->xsave, mask);
return;
}
+#endif
+#ifdef _WIN64
+ _fxrstor64(&fpu->fxsave);
+#else
+ _fxrstor(&fpu->fxsave);
+#endif
+}
- vcpu->guest_fpu_loaded = 0;
- copy_fpregs_to_fpstate(&vcpu->arch.guest_fpu);
- __kernel_fpu_end();
- ++vcpu->stat.fpu_reload;
- /*
- * If using eager FPU mode, or if the guest is a frequent user
- * of the FPU, just leave the FPU active for next time.
- * Every 255 times fpu_counter rolls over to 0; a guest that uses
- * the FPU in bursts will revert to loading it on demand.
- */
- if (!use_eager_fpu()) {
- if (++vcpu->fpu_counter < 5)
- kvm_make_request(KVM_REQ_DEACTIVATE_FPU, vcpu);
- }
- trace_kvm_fpu(0);
+void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
+{
+ uint64_t efer;
+
+ rdmsrl(MSR_EFER, efer);
+ wrmsrl(MSR_EFER, efer & ~EFER_FFXSR);
+
+ fpu_fxsave(&vcpu->arch.host_fpu);
+ fpu_fxstore(&vcpu->arch.guest_fpu);
+
+ if (efer & EFER_FFXSR)
+ wrmsrl(MSR_EFER, efer);
}
-void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
+void kvm_save_guest_fpu(struct kvm_vcpu *vcpu)
{
- void *wbinvd_dirty_mask = vcpu->arch.wbinvd_dirty_mask;
+ uint64_t efer;
- kvmclock_reset(vcpu);
+ rdmsrl(MSR_EFER, efer);
+ if (efer & EFER_FFXSR)
+ wrmsrl(MSR_EFER, efer & ~EFER_FFXSR);
+ fpu_fxsave(&vcpu->arch.guest_fpu);
+ fpu_fxstore(&vcpu->arch.host_fpu);
+
+ if (efer & EFER_FFXSR)
+ wrmsrl(MSR_EFER, efer);
+}
+
+void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
+{
kvm_x86_ops->vcpu_free(vcpu);
- free_cpumask_var(wbinvd_dirty_mask);
}
struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
@@ -7456,47 +5370,25 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
{
- int r;
-
kvm_vcpu_mtrr_init(vcpu);
- r = vcpu_load(vcpu);
- if (r)
- return r;
kvm_vcpu_reset(vcpu, false);
kvm_mmu_setup(vcpu);
- vcpu_put(vcpu);
- return r;
+ return 0;
}
void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
{
struct msr_data msr;
- struct kvm *kvm = vcpu->kvm;
- if (vcpu_load(vcpu))
- return;
msr.data = 0x0;
msr.index = MSR_IA32_TSC;
msr.host_initiated = true;
kvm_write_tsc(vcpu, &msr);
- vcpu_put(vcpu);
-
- if (!kvmclock_periodic_sync)
- return;
-
- schedule_delayed_work(&kvm->arch.kvmclock_sync_work,
- KVMCLOCK_SYNC_PERIOD);
}
void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
{
- int r;
- vcpu->arch.apf.msr_val = 0;
-
- r = vcpu_load(vcpu);
- BUG_ON(r);
kvm_mmu_unload(vcpu);
- vcpu_put(vcpu);
kvm_x86_ops->vcpu_free(vcpu);
}
@@ -7521,18 +5413,10 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
vcpu->arch.cr2 = 0;
- kvm_make_request(KVM_REQ_EVENT, vcpu);
- vcpu->arch.apf.msr_val = 0;
- vcpu->arch.st.msr_val = 0;
-
- kvmclock_reset(vcpu);
-
- kvm_clear_async_pf_completion_queue(vcpu);
- kvm_async_pf_hash_reset(vcpu);
- vcpu->arch.apf.halted = false;
+ kvm_make_request(GVM_REQ_EVENT, vcpu);
if (!init_event) {
- kvm_pmu_reset(vcpu);
+ //kvm_pmu_reset(vcpu);
vcpu->arch.smbase = 0x30000;
}
@@ -7556,99 +5440,12 @@ void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector)
int kvm_arch_hardware_enable(void)
{
- struct kvm *kvm;
- struct kvm_vcpu *vcpu;
- int i;
- int ret;
- u64 local_tsc;
- u64 max_tsc = 0;
- bool stable, backwards_tsc = false;
-
- kvm_shared_msr_cpu_online();
- ret = kvm_x86_ops->hardware_enable();
- if (ret != 0)
- return ret;
-
- local_tsc = rdtsc();
- stable = !check_tsc_unstable();
- list_for_each_entry(kvm, &vm_list, vm_list) {
- kvm_for_each_vcpu(i, vcpu, kvm) {
- if (!stable && vcpu->cpu == smp_processor_id())
- kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
- if (stable && vcpu->arch.last_host_tsc > local_tsc) {
- backwards_tsc = true;
- if (vcpu->arch.last_host_tsc > max_tsc)
- max_tsc = vcpu->arch.last_host_tsc;
- }
- }
- }
-
- /*
- * Sometimes, even reliable TSCs go backwards. This happens on
- * platforms that reset TSC during suspend or hibernate actions, but
- * maintain synchronization. We must compensate. Fortunately, we can
- * detect that condition here, which happens early in CPU bringup,
- * before any KVM threads can be running. Unfortunately, we can't
- * bring the TSCs fully up to date with real time, as we aren't yet far
- * enough into CPU bringup that we know how much real time has actually
- * elapsed; our helper function, ktime_get_boot_ns() will be using boot
- * variables that haven't been updated yet.
- *
- * So we simply find the maximum observed TSC above, then record the
- * adjustment to TSC in each VCPU. When the VCPU later gets loaded,
- * the adjustment will be applied. Note that we accumulate
- * adjustments, in case multiple suspend cycles happen before some VCPU
- * gets a chance to run again. In the event that no KVM threads get a
- * chance to run, we will miss the entire elapsed period, as we'll have
- * reset last_host_tsc, so VCPUs will not have the TSC adjusted and may
- * loose cycle time. This isn't too big a deal, since the loss will be
- * uniform across all VCPUs (not to mention the scenario is extremely
- * unlikely). It is possible that a second hibernate recovery happens
- * much faster than a first, causing the observed TSC here to be
- * smaller; this would require additional padding adjustment, which is
- * why we set last_host_tsc to the local tsc observed here.
- *
- * N.B. - this code below runs only on platforms with reliable TSC,
- * as that is the only way backwards_tsc is set above. Also note
- * that this runs for ALL vcpus, which is not a bug; all VCPUs should
- * have the same delta_cyc adjustment applied if backwards_tsc
- * is detected. Note further, this adjustment is only done once,
- * as we reset last_host_tsc on all VCPUs to stop this from being
- * called multiple times (one for each physical CPU bringup).
- *
- * Platforms with unreliable TSCs don't have to deal with this, they
- * will be compensated by the logic in vcpu_load, which sets the TSC to
- * catchup mode. This will catchup all VCPUs to real time, but cannot
- * guarantee that they stay in perfect synchronization.
- */
- if (backwards_tsc) {
- u64 delta_cyc = max_tsc - local_tsc;
- backwards_tsc_observed = true;
- list_for_each_entry(kvm, &vm_list, vm_list) {
- kvm_for_each_vcpu(i, vcpu, kvm) {
- vcpu->arch.tsc_offset_adjustment += delta_cyc;
- vcpu->arch.last_host_tsc = local_tsc;
- kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu);
- }
-
- /*
- * We have to disable TSC offset matching.. if you were
- * booting a VM while issuing an S4 host suspend....
- * you may have some problem. Solving this issue is
- * left as an exercise to the reader.
- */
- kvm->arch.last_tsc_nsec = 0;
- kvm->arch.last_tsc_write = 0;
- }
-
- }
- return 0;
+ return kvm_x86_ops->hardware_enable();
}
void kvm_arch_hardware_disable(void)
{
kvm_x86_ops->hardware_disable();
- drop_user_return_notifiers();
}
int kvm_arch_hardware_setup(void)
@@ -7659,20 +5456,6 @@ int kvm_arch_hardware_setup(void)
if (r != 0)
return r;
- if (kvm_has_tsc_control) {
- /*
- * Make sure the user can only configure tsc_khz values that
- * fit into a signed integer.
- * A min value is not calculated needed because it will always
- * be 1 on all machines.
- */
- u64 max = min(0x7fffffffULL,
- __scale_tsc(kvm_max_tsc_scaling_ratio, tsc_khz));
- kvm_max_guest_tsc_khz = max;
-
- kvm_default_tsc_scaling_ratio = 1ULL << kvm_tsc_scaling_ratio_frac_bits;
- }
-
kvm_init_msr_list();
return 0;
}
@@ -7691,19 +5474,16 @@ bool kvm_vcpu_is_reset_bsp(struct kvm_vcpu *vcpu)
{
return vcpu->kvm->arch.bsp_vcpu_id == vcpu->vcpu_id;
}
-EXPORT_SYMBOL_GPL(kvm_vcpu_is_reset_bsp);
bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu)
{
return (vcpu->arch.apic_base & MSR_IA32_APICBASE_BSP) != 0;
}
-struct static_key kvm_no_apic_vcpu __read_mostly;
-EXPORT_SYMBOL_GPL(kvm_no_apic_vcpu);
+int kvm_no_apic_vcpu = 1;
int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
{
- struct page *page;
struct kvm *kvm;
int r;
@@ -7711,50 +5491,27 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
kvm = vcpu->kvm;
vcpu->arch.apicv_active = kvm_x86_ops->get_enable_apicv();
- vcpu->arch.pv.pv_unhalted = false;
vcpu->arch.emulate_ctxt.ops = &emulate_ops;
if (!irqchip_in_kernel(kvm) || kvm_vcpu_is_reset_bsp(vcpu))
- vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
+ vcpu->arch.mp_state = GVM_MP_STATE_RUNNABLE;
else
- vcpu->arch.mp_state = KVM_MP_STATE_UNINITIALIZED;
-
- page = alloc_page(GFP_KERNEL | __GFP_ZERO);
- if (!page) {
- r = -ENOMEM;
- goto fail;
- }
- vcpu->arch.pio_data = page_address(page);
+ vcpu->arch.mp_state = GVM_MP_STATE_UNINITIALIZED;
- kvm_set_tsc_khz(vcpu, max_tsc_khz);
+ vcpu->arch.pio_data = (void *)((size_t)vcpu->run + PAGE_SIZE);
r = kvm_mmu_create(vcpu);
if (r < 0)
- goto fail_free_pio_data;
+ goto fail;
if (irqchip_in_kernel(kvm)) {
r = kvm_create_lapic(vcpu);
if (r < 0)
goto fail_mmu_destroy;
- } else
- static_key_slow_inc(&kvm_no_apic_vcpu);
-
- vcpu->arch.mce_banks = kzalloc(KVM_MAX_MCE_BANKS * sizeof(u64) * 4,
- GFP_KERNEL);
- if (!vcpu->arch.mce_banks) {
- r = -ENOMEM;
- goto fail_free_lapic;
- }
- vcpu->arch.mcg_cap = KVM_MAX_MCE_BANKS;
-
- if (!zalloc_cpumask_var(&vcpu->arch.wbinvd_dirty_mask, GFP_KERNEL)) {
- r = -ENOMEM;
- goto fail_free_mce_banks;
- }
+ }
fx_init(vcpu);
vcpu->arch.ia32_tsc_adjust_msr = 0x0;
- vcpu->arch.pv_time_enabled = false;
vcpu->arch.guest_supported_xcr0 = 0;
vcpu->arch.guest_xstate_size = XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET;
@@ -7763,23 +5520,14 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
vcpu->arch.pat = MSR_IA32_CR_PAT_DEFAULT;
- kvm_async_pf_hash_reset(vcpu);
- kvm_pmu_init(vcpu);
+ //kvm_pmu_init(vcpu);
vcpu->arch.pending_external_vector = -1;
- kvm_hv_vcpu_init(vcpu);
-
return 0;
-fail_free_mce_banks:
- kfree(vcpu->arch.mce_banks);
-fail_free_lapic:
- kvm_free_lapic(vcpu);
fail_mmu_destroy:
kvm_mmu_destroy(vcpu);
-fail_free_pio_data:
- free_page((unsigned long)vcpu->arch.pio_data);
fail:
return r;
}
@@ -7788,24 +5536,14 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
{
int idx;
- kvm_hv_vcpu_uninit(vcpu);
- kvm_pmu_destroy(vcpu);
- kfree(vcpu->arch.mce_banks);
+ //kvm_pmu_destroy(vcpu);
kvm_free_lapic(vcpu);
idx = srcu_read_lock(&vcpu->kvm->srcu);
kvm_mmu_destroy(vcpu);
srcu_read_unlock(&vcpu->kvm->srcu, idx);
- free_page((unsigned long)vcpu->arch.pio_data);
- if (!lapic_in_kernel(vcpu))
- static_key_slow_dec(&kvm_no_apic_vcpu);
-}
-
-void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu)
-{
- kvm_x86_ops->sched_in(vcpu, cpu);
}
-int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
+int kvm_arch_init_vm(struct kvm *kvm, size_t type)
{
if (type)
return -EINVAL;
@@ -7813,24 +5551,12 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
INIT_HLIST_HEAD(&kvm->arch.mask_notifier_list);
INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
INIT_LIST_HEAD(&kvm->arch.zapped_obsolete_pages);
- INIT_LIST_HEAD(&kvm->arch.assigned_dev_head);
- atomic_set(&kvm->arch.noncoherent_dma_count, 0);
/* Reserve bit 0 of irq_sources_bitmap for userspace irq source */
- set_bit(KVM_USERSPACE_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap);
- /* Reserve bit 1 of irq_sources_bitmap for irqfd-resampler */
- set_bit(KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID,
- &kvm->arch.irq_sources_bitmap);
+ set_bit(GVM_USERSPACE_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap);
raw_spin_lock_init(&kvm->arch.tsc_write_lock);
mutex_init(&kvm->arch.apic_map_lock);
- spin_lock_init(&kvm->arch.pvclock_gtod_sync_lock);
-
- kvm->arch.kvmclock_offset = -ktime_get_boot_ns();
- pvclock_update_vm_gtod_copy(kvm);
-
- INIT_DELAYED_WORK(&kvm->arch.kvmclock_update_work, kvmclock_update_fn);
- INIT_DELAYED_WORK(&kvm->arch.kvmclock_sync_work, kvmclock_sync_fn);
kvm_page_track_init(kvm);
kvm_mmu_init_vm(kvm);
@@ -7843,11 +5569,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu)
{
- int r;
- r = vcpu_load(vcpu);
- BUG_ON(r);
kvm_mmu_unload(vcpu);
- vcpu_put(vcpu);
}
static void kvm_free_vcpus(struct kvm *kvm)
@@ -7859,7 +5581,6 @@ static void kvm_free_vcpus(struct kvm *kvm)
* Unpin any mmu pages first.
*/
kvm_for_each_vcpu(i, vcpu, kvm) {
- kvm_clear_async_pf_completion_queue(vcpu);
kvm_unload_vcpu_mmu(vcpu);
}
kvm_for_each_vcpu(i, vcpu, kvm)
@@ -7873,23 +5594,15 @@ static void kvm_free_vcpus(struct kvm *kvm)
mutex_unlock(&kvm->lock);
}
-void kvm_arch_sync_events(struct kvm *kvm)
-{
- cancel_delayed_work_sync(&kvm->arch.kvmclock_sync_work);
- cancel_delayed_work_sync(&kvm->arch.kvmclock_update_work);
- kvm_free_all_assigned_devices(kvm);
- kvm_free_pit(kvm);
-}
-
int __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size)
{
int i, r;
- unsigned long hva;
+ size_t hva;
struct kvm_memslots *slots = kvm_memslots(kvm);
struct kvm_memory_slot *slot, old;
/* Called with kvm->slots_lock held. */
- if (WARN_ON(id >= KVM_MEM_SLOTS_NUM))
+ if (WARN_ON(id >= GVM_MEM_SLOTS_NUM))
return -EINVAL;
slot = id_to_memslot(slots, id);
@@ -7913,7 +5626,7 @@ int __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size)
}
old = *slot;
- for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
+ for (i = 0; i < GVM_ADDRESS_SPACE_NUM; i++) {
struct kvm_userspace_memory_region m;
m.slot = id | (i << 16);
@@ -7933,7 +5646,6 @@ int __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size)
return 0;
}
-EXPORT_SYMBOL_GPL(__x86_set_memory_region);
int x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size)
{
@@ -7945,11 +5657,10 @@ int x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size)
return r;
}
-EXPORT_SYMBOL_GPL(x86_set_memory_region);
void kvm_arch_destroy_vm(struct kvm *kvm)
{
- if (current->mm == kvm->mm) {
+ if (IoGetCurrentProcess() == kvm->process) {
/*
* Free memory regions allocated on behalf of userspace,
* unless the the memory map has changed due to process exit
@@ -7961,82 +5672,31 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
}
if (kvm_x86_ops->vm_destroy)
kvm_x86_ops->vm_destroy(kvm);
- kvm_iommu_unmap_guest(kvm);
kfree(kvm->arch.vpic);
kfree(kvm->arch.vioapic);
kvm_free_vcpus(kvm);
- kvfree(rcu_dereference_check(kvm->arch.apic_map, 1));
+ kvfree(rcu_dereference(kvm->arch.apic_map));
kvm_mmu_uninit_vm(kvm);
+ kvm_page_track_destroy(kvm);
}
void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
struct kvm_memory_slot *dont)
{
- int i;
-
- for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) {
- if (!dont || free->arch.rmap[i] != dont->arch.rmap[i]) {
- kvfree(free->arch.rmap[i]);
- free->arch.rmap[i] = NULL;
- }
- if (i == 0)
- continue;
-
- if (!dont || free->arch.lpage_info[i - 1] !=
- dont->arch.lpage_info[i - 1]) {
- kvfree(free->arch.lpage_info[i - 1]);
- free->arch.lpage_info[i - 1] = NULL;
- }
+ if (!dont || free->arch.rmap != dont->arch.rmap) {
+ kvfree(free->arch.rmap);
+ free->arch.rmap = NULL;
}
-
kvm_page_track_free_memslot(free, dont);
}
int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
- unsigned long npages)
+ size_t npages)
{
- int i;
-
- for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) {
- struct kvm_lpage_info *linfo;
- unsigned long ugfn;
- int lpages;
- int level = i + 1;
-
- lpages = gfn_to_index(slot->base_gfn + npages - 1,
- slot->base_gfn, level) + 1;
-
- slot->arch.rmap[i] =
- kvm_kvzalloc(lpages * sizeof(*slot->arch.rmap[i]));
- if (!slot->arch.rmap[i])
- goto out_free;
- if (i == 0)
- continue;
-
- linfo = kvm_kvzalloc(lpages * sizeof(*linfo));
- if (!linfo)
- goto out_free;
-
- slot->arch.lpage_info[i - 1] = linfo;
-
- if (slot->base_gfn & (KVM_PAGES_PER_HPAGE(level) - 1))
- linfo[0].disallow_lpage = 1;
- if ((slot->base_gfn + npages) & (KVM_PAGES_PER_HPAGE(level) - 1))
- linfo[lpages - 1].disallow_lpage = 1;
- ugfn = slot->userspace_addr >> PAGE_SHIFT;
- /*
- * If the gfn and userspace address are not aligned wrt each
- * other, or if explicitly asked to, disable large page
- * support for this slot
- */
- if ((slot->base_gfn ^ ugfn) & (KVM_PAGES_PER_HPAGE(level) - 1) ||
- !kvm_largepages_enabled()) {
- unsigned long j;
-
- for (j = 0; j < lpages; ++j)
- linfo[j].disallow_lpage = 1;
- }
- }
+ slot->arch.rmap =
+ kvm_kvzalloc(npages * sizeof(*slot->arch.rmap));
+ if (!slot->arch.rmap)
+ goto out_free;
if (kvm_page_track_create_memslot(slot, npages))
goto out_free;
@@ -8044,15 +5704,7 @@ int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
return 0;
out_free:
- for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) {
- kvfree(slot->arch.rmap[i]);
- slot->arch.rmap[i] = NULL;
- if (i == 0)
- continue;
-
- kvfree(slot->arch.lpage_info[i - 1]);
- slot->arch.lpage_info[i - 1] = NULL;
- }
+ kvfree(slot->arch.rmap);
return -ENOMEM;
}
@@ -8077,7 +5729,7 @@ static void kvm_mmu_slot_apply_flags(struct kvm *kvm,
struct kvm_memory_slot *new)
{
/* Still write protect RO slot */
- if (new->flags & KVM_MEM_READONLY) {
+ if (new->flags & GVM_MEM_READONLY) {
kvm_mmu_slot_remove_write_access(kvm, new);
return;
}
@@ -8087,8 +5739,8 @@ static void kvm_mmu_slot_apply_flags(struct kvm *kvm,
*
* kvm_x86_ops->slot_disable_log_dirty is called when:
*
- * - KVM_MR_CREATE with dirty logging is disabled
- * - KVM_MR_FLAGS_ONLY with dirty logging is disabled in new flag
+ * - GVM_MR_CREATE with dirty logging is disabled
+ * - GVM_MR_FLAGS_ONLY with dirty logging is disabled in new flag
*
* The reason is, in case of PML, we need to set D-bit for any slots
* with dirty logging disabled in order to eliminate unnecessary GPA
@@ -8112,7 +5764,7 @@ static void kvm_mmu_slot_apply_flags(struct kvm *kvm,
*
* See the comments in fast_page_fault().
*/
- if (new->flags & KVM_MEM_LOG_DIRTY_PAGES) {
+ if (new->flags & GVM_MEM_LOG_DIRTY_PAGES) {
if (kvm_x86_ops->slot_enable_log_dirty)
kvm_x86_ops->slot_enable_log_dirty(kvm, new);
else
@@ -8149,22 +5801,22 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
* which can be collapsed into a single large-page spte. Later
* page faults will create the large-page sptes.
*/
- if ((change != KVM_MR_DELETE) &&
- (old->flags & KVM_MEM_LOG_DIRTY_PAGES) &&
- !(new->flags & KVM_MEM_LOG_DIRTY_PAGES))
+ if ((change != GVM_MR_DELETE) &&
+ (old->flags & GVM_MEM_LOG_DIRTY_PAGES) &&
+ !(new->flags & GVM_MEM_LOG_DIRTY_PAGES))
kvm_mmu_zap_collapsible_sptes(kvm, new);
/*
* Set up write protection and/or dirty logging for the new slot.
*
- * For KVM_MR_DELETE and KVM_MR_MOVE, the shadow pages of old slot have
+ * For GVM_MR_DELETE and GVM_MR_MOVE, the shadow pages of old slot have
* been zapped so no dirty logging staff is needed for old slot. For
- * KVM_MR_FLAGS_ONLY, the old slot is essentially the same one as the
+ * GVM_MR_FLAGS_ONLY, the old slot is essentially the same one as the
* new and it's also covered when dealing with the new slot.
*
* FIXME: const-ify all uses of struct kvm_memory_slot.
*/
- if (change != KVM_MR_DELETE)
+ if (change != GVM_MR_DELETE)
kvm_mmu_slot_apply_flags(kvm, (struct kvm_memory_slot *) new);
}
@@ -8181,28 +5833,19 @@ void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
static inline bool kvm_vcpu_has_events(struct kvm_vcpu *vcpu)
{
- if (!list_empty_careful(&vcpu->async_pf.done))
- return true;
-
if (kvm_apic_has_events(vcpu))
return true;
- if (vcpu->arch.pv.pv_unhalted)
- return true;
-
if (atomic_read(&vcpu->arch.nmi_queued))
return true;
- if (test_bit(KVM_REQ_SMI, &vcpu->requests))
+ if (test_bit(GVM_REQ_SMI, &vcpu->requests))
return true;
if (kvm_arch_interrupt_allowed(vcpu) &&
kvm_cpu_has_interrupt(vcpu))
return true;
- if (kvm_hv_has_stimer_pending(vcpu))
- return true;
-
return false;
}
@@ -8224,295 +5867,45 @@ int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu)
return kvm_x86_ops->interrupt_allowed(vcpu);
}
-unsigned long kvm_get_linear_rip(struct kvm_vcpu *vcpu)
+size_t kvm_get_linear_rip(struct kvm_vcpu *vcpu)
{
if (is_64_bit_mode(vcpu))
return kvm_rip_read(vcpu);
return (u32)(get_segment_base(vcpu, VCPU_SREG_CS) +
kvm_rip_read(vcpu));
}
-EXPORT_SYMBOL_GPL(kvm_get_linear_rip);
-bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip)
+bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, size_t linear_rip)
{
return kvm_get_linear_rip(vcpu) == linear_rip;
}
-EXPORT_SYMBOL_GPL(kvm_is_linear_rip);
-unsigned long kvm_get_rflags(struct kvm_vcpu *vcpu)
+size_t kvm_get_rflags(struct kvm_vcpu *vcpu)
{
- unsigned long rflags;
+ size_t rflags;
rflags = kvm_x86_ops->get_rflags(vcpu);
- if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
+ if (vcpu->guest_debug & GVM_GUESTDBG_SINGLESTEP)
rflags &= ~X86_EFLAGS_TF;
return rflags;
}
-EXPORT_SYMBOL_GPL(kvm_get_rflags);
-static void __kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
+static void __kvm_set_rflags(struct kvm_vcpu *vcpu, size_t rflags)
{
- if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP &&
+ if (vcpu->guest_debug & GVM_GUESTDBG_SINGLESTEP &&
kvm_is_linear_rip(vcpu, vcpu->arch.singlestep_rip))
rflags |= X86_EFLAGS_TF;
kvm_x86_ops->set_rflags(vcpu, rflags);
}
-void kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
+void kvm_set_rflags(struct kvm_vcpu *vcpu, size_t rflags)
{
__kvm_set_rflags(vcpu, rflags);
- kvm_make_request(KVM_REQ_EVENT, vcpu);
-}
-EXPORT_SYMBOL_GPL(kvm_set_rflags);
-
-void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work)
-{
- int r;
-
- if ((vcpu->arch.mmu.direct_map != work->arch.direct_map) ||
- work->wakeup_all)
- return;
-
- r = kvm_mmu_reload(vcpu);
- if (unlikely(r))
- return;
-
- if (!vcpu->arch.mmu.direct_map &&
- work->arch.cr3 != vcpu->arch.mmu.get_cr3(vcpu))
- return;
-
- vcpu->arch.mmu.page_fault(vcpu, work->gva, 0, true);
-}
-
-static inline u32 kvm_async_pf_hash_fn(gfn_t gfn)
-{
- return hash_32(gfn & 0xffffffff, order_base_2(ASYNC_PF_PER_VCPU));
-}
-
-static inline u32 kvm_async_pf_next_probe(u32 key)
-{
- return (key + 1) & (roundup_pow_of_two(ASYNC_PF_PER_VCPU) - 1);
-}
-
-static void kvm_add_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn)
-{
- u32 key = kvm_async_pf_hash_fn(gfn);
-
- while (vcpu->arch.apf.gfns[key] != ~0)
- key = kvm_async_pf_next_probe(key);
-
- vcpu->arch.apf.gfns[key] = gfn;
-}
-
-static u32 kvm_async_pf_gfn_slot(struct kvm_vcpu *vcpu, gfn_t gfn)
-{
- int i;
- u32 key = kvm_async_pf_hash_fn(gfn);
-
- for (i = 0; i < roundup_pow_of_two(ASYNC_PF_PER_VCPU) &&
- (vcpu->arch.apf.gfns[key] != gfn &&
- vcpu->arch.apf.gfns[key] != ~0); i++)
- key = kvm_async_pf_next_probe(key);
-
- return key;
-}
-
-bool kvm_find_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn)
-{
- return vcpu->arch.apf.gfns[kvm_async_pf_gfn_slot(vcpu, gfn)] == gfn;
-}
-
-static void kvm_del_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn)
-{
- u32 i, j, k;
-
- i = j = kvm_async_pf_gfn_slot(vcpu, gfn);
- while (true) {
- vcpu->arch.apf.gfns[i] = ~0;
- do {
- j = kvm_async_pf_next_probe(j);
- if (vcpu->arch.apf.gfns[j] == ~0)
- return;
- k = kvm_async_pf_hash_fn(vcpu->arch.apf.gfns[j]);
- /*
- * k lies cyclically in ]i,j]
- * | i.k.j |
- * |....j i.k.| or |.k..j i...|
- */
- } while ((i <= j) ? (i < k && k <= j) : (i < k || k <= j));
- vcpu->arch.apf.gfns[i] = vcpu->arch.apf.gfns[j];
- i = j;
- }
-}
-
-static int apf_put_user(struct kvm_vcpu *vcpu, u32 val)
-{
-
- return kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.apf.data, &val,
- sizeof(val));
-}
-
-void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
- struct kvm_async_pf *work)
-{
- struct x86_exception fault;
-
- trace_kvm_async_pf_not_present(work->arch.token, work->gva);
- kvm_add_async_pf_gfn(vcpu, work->arch.gfn);
-
- if (!(vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED) ||
- (vcpu->arch.apf.send_user_only &&
- kvm_x86_ops->get_cpl(vcpu) == 0))
- kvm_make_request(KVM_REQ_APF_HALT, vcpu);
- else if (!apf_put_user(vcpu, KVM_PV_REASON_PAGE_NOT_PRESENT)) {
- fault.vector = PF_VECTOR;
- fault.error_code_valid = true;
- fault.error_code = 0;
- fault.nested_page_fault = false;
- fault.address = work->arch.token;
- kvm_inject_page_fault(vcpu, &fault);
- }
-}
-
-void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
- struct kvm_async_pf *work)
-{
- struct x86_exception fault;
-
- trace_kvm_async_pf_ready(work->arch.token, work->gva);
- if (work->wakeup_all)
- work->arch.token = ~0; /* broadcast wakeup */
- else
- kvm_del_async_pf_gfn(vcpu, work->arch.gfn);
-
- if ((vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED) &&
- !apf_put_user(vcpu, KVM_PV_REASON_PAGE_READY)) {
- fault.vector = PF_VECTOR;
- fault.error_code_valid = true;
- fault.error_code = 0;
- fault.nested_page_fault = false;
- fault.address = work->arch.token;
- kvm_inject_page_fault(vcpu, &fault);
- }
- vcpu->arch.apf.halted = false;
- vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
-}
-
-bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
-{
- if (!(vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED))
- return true;
- else
- return !kvm_event_needs_reinjection(vcpu) &&
- kvm_x86_ops->interrupt_allowed(vcpu);
-}
-
-void kvm_arch_start_assignment(struct kvm *kvm)
-{
- atomic_inc(&kvm->arch.assigned_device_count);
-}
-EXPORT_SYMBOL_GPL(kvm_arch_start_assignment);
-
-void kvm_arch_end_assignment(struct kvm *kvm)
-{
- atomic_dec(&kvm->arch.assigned_device_count);
-}
-EXPORT_SYMBOL_GPL(kvm_arch_end_assignment);
-
-bool kvm_arch_has_assigned_device(struct kvm *kvm)
-{
- return atomic_read(&kvm->arch.assigned_device_count);
-}
-EXPORT_SYMBOL_GPL(kvm_arch_has_assigned_device);
-
-void kvm_arch_register_noncoherent_dma(struct kvm *kvm)
-{
- atomic_inc(&kvm->arch.noncoherent_dma_count);
-}
-EXPORT_SYMBOL_GPL(kvm_arch_register_noncoherent_dma);
-
-void kvm_arch_unregister_noncoherent_dma(struct kvm *kvm)
-{
- atomic_dec(&kvm->arch.noncoherent_dma_count);
-}
-EXPORT_SYMBOL_GPL(kvm_arch_unregister_noncoherent_dma);
-
-bool kvm_arch_has_noncoherent_dma(struct kvm *kvm)
-{
- return atomic_read(&kvm->arch.noncoherent_dma_count);
-}
-EXPORT_SYMBOL_GPL(kvm_arch_has_noncoherent_dma);
-
-bool kvm_arch_has_irq_bypass(void)
-{
- return kvm_x86_ops->update_pi_irte != NULL;
-}
-
-int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *cons,
- struct irq_bypass_producer *prod)
-{
- struct kvm_kernel_irqfd *irqfd =
- container_of(cons, struct kvm_kernel_irqfd, consumer);
-
- irqfd->producer = prod;
-
- return kvm_x86_ops->update_pi_irte(irqfd->kvm,
- prod->irq, irqfd->gsi, 1);
-}
-
-void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons,
- struct irq_bypass_producer *prod)
-{
- int ret;
- struct kvm_kernel_irqfd *irqfd =
- container_of(cons, struct kvm_kernel_irqfd, consumer);
-
- WARN_ON(irqfd->producer != prod);
- irqfd->producer = NULL;
-
- /*
- * When producer of consumer is unregistered, we change back to
- * remapped mode, so we can re-use the current implementation
- * when the irq is masked/disabled or the consumer side (KVM
- * int this case doesn't want to receive the interrupts.
- */
- ret = kvm_x86_ops->update_pi_irte(irqfd->kvm, prod->irq, irqfd->gsi, 0);
- if (ret)
- printk(KERN_INFO "irq bypass consumer (token %p) unregistration"
- " fails: %d\n", irqfd->consumer.token, ret);
-}
-
-int kvm_arch_update_irqfd_routing(struct kvm *kvm, unsigned int host_irq,
- uint32_t guest_irq, bool set)
-{
- if (!kvm_x86_ops->update_pi_irte)
- return -EINVAL;
-
- return kvm_x86_ops->update_pi_irte(kvm, host_irq, guest_irq, set);
+ kvm_make_request(GVM_REQ_EVENT, vcpu);
}
bool kvm_vector_hashing_enabled(void)
{
return vector_hashing;
}
-EXPORT_SYMBOL_GPL(kvm_vector_hashing_enabled);
-
-EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit);
-EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_fast_mmio);
-EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq);
-EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_page_fault);
-EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_msr);
-EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_cr);
-EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmrun);
-EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit);
-EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit_inject);
-EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intr_vmexit);
-EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_invlpga);
-EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_skinit);
-EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intercepts);
-EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_write_tsc_offset);
-EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_ple_window);
-EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_pml_full);
-EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_pi_irte_update);
-EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_avic_unaccelerated_access);
-EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_avic_incomplete_ipi);
+
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index e8ff3e4..0b6b308 100644..100755
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -1,9 +1,14 @@
-#ifndef ARCH_X86_KVM_X86_H
-#define ARCH_X86_KVM_X86_H
+/*
+ * Copyright 2019 Google LLC
+ */
+
+#ifndef ARCH_X86_GVM_X86_H
+#define ARCH_X86_GVM_X86_H
#include <linux/kvm_host.h>
-#include <asm/pvclock.h>
+#include <gvm_types.h>
#include "kvm_cache_regs.h"
+#include <asm/msr-index.h>
#define MSR_IA32_CR_PAT_DEFAULT 0x0007040600070406ULL
@@ -67,17 +72,17 @@ static inline bool mmu_is_nested(struct kvm_vcpu *vcpu)
static inline int is_pae(struct kvm_vcpu *vcpu)
{
- return kvm_read_cr4_bits(vcpu, X86_CR4_PAE);
+ return (int)kvm_read_cr4_bits(vcpu, X86_CR4_PAE);
}
static inline int is_pse(struct kvm_vcpu *vcpu)
{
- return kvm_read_cr4_bits(vcpu, X86_CR4_PSE);
+ return (int)kvm_read_cr4_bits(vcpu, X86_CR4_PSE);
}
static inline int is_paging(struct kvm_vcpu *vcpu)
{
- return likely(kvm_read_cr0_bits(vcpu, X86_CR0_PG));
+ return likely((int)kvm_read_cr0_bits(vcpu, X86_CR0_PG));
}
static inline u32 bit(int bitno)
@@ -113,7 +118,7 @@ static inline void vcpu_clear_mmio_info(struct kvm_vcpu *vcpu, gva_t gva)
vcpu->arch.mmio_gva = 0;
}
-static inline bool vcpu_match_mmio_gva(struct kvm_vcpu *vcpu, unsigned long gva)
+static inline bool vcpu_match_mmio_gva(struct kvm_vcpu *vcpu, size_t gva)
{
if (vcpu_match_mmio_gen(vcpu) && vcpu->arch.mmio_gva &&
vcpu->arch.mmio_gva == (gva & PAGE_MASK))
@@ -131,21 +136,21 @@ static inline bool vcpu_match_mmio_gpa(struct kvm_vcpu *vcpu, gpa_t gpa)
return false;
}
-static inline unsigned long kvm_register_readl(struct kvm_vcpu *vcpu,
+static inline size_t kvm_register_readl(struct kvm_vcpu *vcpu,
enum kvm_reg reg)
{
- unsigned long val = kvm_register_read(vcpu, reg);
+ size_t val = kvm_register_read(vcpu, reg);
return is_64_bit_mode(vcpu) ? val : (u32)val;
}
static inline void kvm_register_writel(struct kvm_vcpu *vcpu,
enum kvm_reg reg,
- unsigned long val)
+ size_t val)
{
if (!is_64_bit_mode(vcpu))
val = (u32)val;
- return kvm_register_write(vcpu, reg, val);
+ kvm_register_write(vcpu, reg, val);
}
static inline bool kvm_check_has_quirk(struct kvm *kvm, u64 quirk)
@@ -178,7 +183,7 @@ bool kvm_mtrr_check_gfn_range_consistency(struct kvm_vcpu *vcpu, gfn_t gfn,
int page_num);
bool kvm_vector_hashing_enabled(void);
-#define KVM_SUPPORTED_XCR0 (XFEATURE_MASK_FP | XFEATURE_MASK_SSE \
+#define GVM_SUPPORTED_XCR0 (XFEATURE_MASK_FP | XFEATURE_MASK_SSE \
| XFEATURE_MASK_YMM | XFEATURE_MASK_BNDREGS \
| XFEATURE_MASK_BNDCSR | XFEATURE_MASK_AVX512 \
| XFEATURE_MASK_PKRU)
@@ -190,13 +195,7 @@ extern unsigned int min_timer_period_us;
extern unsigned int lapic_timer_advance_ns;
-extern struct static_key kvm_no_apic_vcpu;
-
-static inline u64 nsec_to_cycles(struct kvm_vcpu *vcpu, u64 nsec)
-{
- return pvclock_scale_delta(nsec, vcpu->arch.virtual_tsc_mult,
- vcpu->arch.virtual_tsc_shift);
-}
+extern int kvm_no_apic_vcpu;
/* Same "calling convention" as do_div:
* - divide (n << 32) by base