summaryrefslogtreecommitdiff
path: root/virt
diff options
context:
space:
mode:
Diffstat (limited to 'virt')
-rw-r--r--virt/kvm/Kconfig52
-rw-r--r--virt/kvm/arm/aarch32.c152
-rw-r--r--virt/kvm/arm/arch_timer.c519
-rw-r--r--virt/kvm/arm/hyp/timer-sr.c68
-rw-r--r--virt/kvm/arm/hyp/vgic-v2-sr.c226
-rw-r--r--virt/kvm/arm/hyp/vgic-v3-sr.c328
-rw-r--r--virt/kvm/arm/pmu.c543
-rw-r--r--virt/kvm/arm/trace.h63
-rw-r--r--virt/kvm/arm/vgic/vgic-init.c445
-rw-r--r--virt/kvm/arm/vgic/vgic-irqfd.c126
-rw-r--r--virt/kvm/arm/vgic/vgic-its.c1570
-rw-r--r--virt/kvm/arm/vgic/vgic-kvm-device.c474
-rw-r--r--virt/kvm/arm/vgic/vgic-mmio-v2.c456
-rw-r--r--virt/kvm/arm/vgic/vgic-mmio-v3.c656
-rw-r--r--virt/kvm/arm/vgic/vgic-mmio.c583
-rw-r--r--virt/kvm/arm/vgic/vgic-mmio.h171
-rw-r--r--virt/kvm/arm/vgic/vgic-v2.c379
-rw-r--r--virt/kvm/arm/vgic/vgic-v3.c363
-rw-r--r--virt/kvm/arm/vgic/vgic.c731
-rw-r--r--virt/kvm/arm/vgic/vgic.h123
-rw-r--r--virt/kvm/async_pf.c246
-rw-r--r--virt/kvm/async_pf.h36
-rw-r--r--virt/kvm/coalesced_mmio.c183
-rw-r--r--virt/kvm/coalesced_mmio.h38
-rw-r--r--virt/kvm/eventfd.c956
-rwxr-xr-x[-rw-r--r--]virt/kvm/irqchip.c51
-rwxr-xr-x[-rw-r--r--]virt/kvm/kvm_main.c2482
-rw-r--r--virt/kvm/vfio.c295
-rw-r--r--virt/kvm/vfio.h17
29 files changed, 688 insertions, 11644 deletions
diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig
deleted file mode 100644
index b0cc1a3..0000000
--- a/virt/kvm/Kconfig
+++ /dev/null
@@ -1,52 +0,0 @@
-# KVM common configuration items and defaults
-
-config HAVE_KVM
- bool
-
-config HAVE_KVM_IRQCHIP
- bool
-
-config HAVE_KVM_IRQFD
- bool
-
-config HAVE_KVM_IRQ_ROUTING
- bool
-
-config HAVE_KVM_EVENTFD
- bool
- select EVENTFD
-
-config KVM_MMIO
- bool
-
-config KVM_ASYNC_PF
- bool
-
-# Toggle to switch between direct notification and batch job
-config KVM_ASYNC_PF_SYNC
- bool
-
-config HAVE_KVM_MSI
- bool
-
-config HAVE_KVM_CPU_RELAX_INTERCEPT
- bool
-
-config KVM_VFIO
- bool
-
-config HAVE_KVM_ARCH_TLB_FLUSH_ALL
- bool
-
-config HAVE_KVM_INVALID_WAKEUPS
- bool
-
-config KVM_GENERIC_DIRTYLOG_READ_PROTECT
- bool
-
-config KVM_COMPAT
- def_bool y
- depends on KVM && COMPAT && !S390
-
-config HAVE_KVM_IRQ_BYPASS
- bool
diff --git a/virt/kvm/arm/aarch32.c b/virt/kvm/arm/aarch32.c
deleted file mode 100644
index 528af4b..0000000
--- a/virt/kvm/arm/aarch32.c
+++ /dev/null
@@ -1,152 +0,0 @@
-/*
- * (not much of an) Emulation layer for 32bit guests.
- *
- * Copyright (C) 2012,2013 - ARM Ltd
- * Author: Marc Zyngier <marc.zyngier@arm.com>
- *
- * based on arch/arm/kvm/emulate.c
- * Copyright (C) 2012 - Virtual Open Systems and Columbia University
- * Author: Christoffer Dall <c.dall@virtualopensystems.com>
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include <linux/kvm_host.h>
-#include <asm/kvm_emulate.h>
-#include <asm/kvm_hyp.h>
-
-#ifndef CONFIG_ARM64
-#define COMPAT_PSR_T_BIT PSR_T_BIT
-#define COMPAT_PSR_IT_MASK PSR_IT_MASK
-#endif
-
-/*
- * stolen from arch/arm/kernel/opcodes.c
- *
- * condition code lookup table
- * index into the table is test code: EQ, NE, ... LT, GT, AL, NV
- *
- * bit position in short is condition code: NZCV
- */
-static const unsigned short cc_map[16] = {
- 0xF0F0, /* EQ == Z set */
- 0x0F0F, /* NE */
- 0xCCCC, /* CS == C set */
- 0x3333, /* CC */
- 0xFF00, /* MI == N set */
- 0x00FF, /* PL */
- 0xAAAA, /* VS == V set */
- 0x5555, /* VC */
- 0x0C0C, /* HI == C set && Z clear */
- 0xF3F3, /* LS == C clear || Z set */
- 0xAA55, /* GE == (N==V) */
- 0x55AA, /* LT == (N!=V) */
- 0x0A05, /* GT == (!Z && (N==V)) */
- 0xF5FA, /* LE == (Z || (N!=V)) */
- 0xFFFF, /* AL always */
- 0 /* NV */
-};
-
-/*
- * Check if a trapped instruction should have been executed or not.
- */
-bool kvm_condition_valid32(const struct kvm_vcpu *vcpu)
-{
- unsigned long cpsr;
- u32 cpsr_cond;
- int cond;
-
- /* Top two bits non-zero? Unconditional. */
- if (kvm_vcpu_get_hsr(vcpu) >> 30)
- return true;
-
- /* Is condition field valid? */
- cond = kvm_vcpu_get_condition(vcpu);
- if (cond == 0xE)
- return true;
-
- cpsr = *vcpu_cpsr(vcpu);
-
- if (cond < 0) {
- /* This can happen in Thumb mode: examine IT state. */
- unsigned long it;
-
- it = ((cpsr >> 8) & 0xFC) | ((cpsr >> 25) & 0x3);
-
- /* it == 0 => unconditional. */
- if (it == 0)
- return true;
-
- /* The cond for this insn works out as the top 4 bits. */
- cond = (it >> 4);
- }
-
- cpsr_cond = cpsr >> 28;
-
- if (!((cc_map[cond] >> cpsr_cond) & 1))
- return false;
-
- return true;
-}
-
-/**
- * adjust_itstate - adjust ITSTATE when emulating instructions in IT-block
- * @vcpu: The VCPU pointer
- *
- * When exceptions occur while instructions are executed in Thumb IF-THEN
- * blocks, the ITSTATE field of the CPSR is not advanced (updated), so we have
- * to do this little bit of work manually. The fields map like this:
- *
- * IT[7:0] -> CPSR[26:25],CPSR[15:10]
- */
-static void __hyp_text kvm_adjust_itstate(struct kvm_vcpu *vcpu)
-{
- unsigned long itbits, cond;
- unsigned long cpsr = *vcpu_cpsr(vcpu);
- bool is_arm = !(cpsr & COMPAT_PSR_T_BIT);
-
- if (is_arm || !(cpsr & COMPAT_PSR_IT_MASK))
- return;
-
- cond = (cpsr & 0xe000) >> 13;
- itbits = (cpsr & 0x1c00) >> (10 - 2);
- itbits |= (cpsr & (0x3 << 25)) >> 25;
-
- /* Perform ITAdvance (see page A2-52 in ARM DDI 0406C) */
- if ((itbits & 0x7) == 0)
- itbits = cond = 0;
- else
- itbits = (itbits << 1) & 0x1f;
-
- cpsr &= ~COMPAT_PSR_IT_MASK;
- cpsr |= cond << 13;
- cpsr |= (itbits & 0x1c) << (10 - 2);
- cpsr |= (itbits & 0x3) << 25;
- *vcpu_cpsr(vcpu) = cpsr;
-}
-
-/**
- * kvm_skip_instr - skip a trapped instruction and proceed to the next
- * @vcpu: The vcpu pointer
- */
-void __hyp_text kvm_skip_instr32(struct kvm_vcpu *vcpu, bool is_wide_instr)
-{
- bool is_thumb;
-
- is_thumb = !!(*vcpu_cpsr(vcpu) & COMPAT_PSR_T_BIT);
- if (is_thumb && !is_wide_instr)
- *vcpu_pc(vcpu) += 2;
- else
- *vcpu_pc(vcpu) += 4;
- kvm_adjust_itstate(vcpu);
-}
diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c
deleted file mode 100644
index 27a1f63..0000000
--- a/virt/kvm/arm/arch_timer.c
+++ /dev/null
@@ -1,519 +0,0 @@
-/*
- * Copyright (C) 2012 ARM Ltd.
- * Author: Marc Zyngier <marc.zyngier@arm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-#include <linux/cpu.h>
-#include <linux/kvm.h>
-#include <linux/kvm_host.h>
-#include <linux/interrupt.h>
-#include <linux/irq.h>
-
-#include <clocksource/arm_arch_timer.h>
-#include <asm/arch_timer.h>
-
-#include <kvm/arm_vgic.h>
-#include <kvm/arm_arch_timer.h>
-
-#include "trace.h"
-
-static struct timecounter *timecounter;
-static unsigned int host_vtimer_irq;
-static u32 host_vtimer_irq_flags;
-
-void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu)
-{
- vcpu->arch.timer_cpu.active_cleared_last = false;
-}
-
-static cycle_t kvm_phys_timer_read(void)
-{
- return timecounter->cc->read(timecounter->cc);
-}
-
-static bool timer_is_armed(struct arch_timer_cpu *timer)
-{
- return timer->armed;
-}
-
-/* timer_arm: as in "arm the timer", not as in ARM the company */
-static void timer_arm(struct arch_timer_cpu *timer, u64 ns)
-{
- timer->armed = true;
- hrtimer_start(&timer->timer, ktime_add_ns(ktime_get(), ns),
- HRTIMER_MODE_ABS);
-}
-
-static void timer_disarm(struct arch_timer_cpu *timer)
-{
- if (timer_is_armed(timer)) {
- hrtimer_cancel(&timer->timer);
- cancel_work_sync(&timer->expired);
- timer->armed = false;
- }
-}
-
-static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id)
-{
- struct kvm_vcpu *vcpu = *(struct kvm_vcpu **)dev_id;
-
- /*
- * We disable the timer in the world switch and let it be
- * handled by kvm_timer_sync_hwstate(). Getting a timer
- * interrupt at this point is a sure sign of some major
- * breakage.
- */
- pr_warn("Unexpected interrupt %d on vcpu %p\n", irq, vcpu);
- return IRQ_HANDLED;
-}
-
-/*
- * Work function for handling the backup timer that we schedule when a vcpu is
- * no longer running, but had a timer programmed to fire in the future.
- */
-static void kvm_timer_inject_irq_work(struct work_struct *work)
-{
- struct kvm_vcpu *vcpu;
-
- vcpu = container_of(work, struct kvm_vcpu, arch.timer_cpu.expired);
- vcpu->arch.timer_cpu.armed = false;
-
- WARN_ON(!kvm_timer_should_fire(vcpu));
-
- /*
- * If the vcpu is blocked we want to wake it up so that it will see
- * the timer has expired when entering the guest.
- */
- kvm_vcpu_kick(vcpu);
-}
-
-static u64 kvm_timer_compute_delta(struct kvm_vcpu *vcpu)
-{
- cycle_t cval, now;
-
- cval = vcpu->arch.timer_cpu.cntv_cval;
- now = kvm_phys_timer_read() - vcpu->kvm->arch.timer.cntvoff;
-
- if (now < cval) {
- u64 ns;
-
- ns = cyclecounter_cyc2ns(timecounter->cc,
- cval - now,
- timecounter->mask,
- &timecounter->frac);
- return ns;
- }
-
- return 0;
-}
-
-static enum hrtimer_restart kvm_timer_expire(struct hrtimer *hrt)
-{
- struct arch_timer_cpu *timer;
- struct kvm_vcpu *vcpu;
- u64 ns;
-
- timer = container_of(hrt, struct arch_timer_cpu, timer);
- vcpu = container_of(timer, struct kvm_vcpu, arch.timer_cpu);
-
- /*
- * Check that the timer has really expired from the guest's
- * PoV (NTP on the host may have forced it to expire
- * early). If we should have slept longer, restart it.
- */
- ns = kvm_timer_compute_delta(vcpu);
- if (unlikely(ns)) {
- hrtimer_forward_now(hrt, ns_to_ktime(ns));
- return HRTIMER_RESTART;
- }
-
- schedule_work(&timer->expired);
- return HRTIMER_NORESTART;
-}
-
-static bool kvm_timer_irq_can_fire(struct kvm_vcpu *vcpu)
-{
- struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
-
- return !(timer->cntv_ctl & ARCH_TIMER_CTRL_IT_MASK) &&
- (timer->cntv_ctl & ARCH_TIMER_CTRL_ENABLE);
-}
-
-bool kvm_timer_should_fire(struct kvm_vcpu *vcpu)
-{
- struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
- cycle_t cval, now;
-
- if (!kvm_timer_irq_can_fire(vcpu))
- return false;
-
- cval = timer->cntv_cval;
- now = kvm_phys_timer_read() - vcpu->kvm->arch.timer.cntvoff;
-
- return cval <= now;
-}
-
-static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level)
-{
- int ret;
- struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
-
- BUG_ON(!vgic_initialized(vcpu->kvm));
-
- timer->active_cleared_last = false;
- timer->irq.level = new_level;
- trace_kvm_timer_update_irq(vcpu->vcpu_id, timer->irq.irq,
- timer->irq.level);
- ret = kvm_vgic_inject_mapped_irq(vcpu->kvm, vcpu->vcpu_id,
- timer->irq.irq,
- timer->irq.level);
- WARN_ON(ret);
-}
-
-/*
- * Check if there was a change in the timer state (should we raise or lower
- * the line level to the GIC).
- */
-static int kvm_timer_update_state(struct kvm_vcpu *vcpu)
-{
- struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
-
- /*
- * If userspace modified the timer registers via SET_ONE_REG before
- * the vgic was initialized, we mustn't set the timer->irq.level value
- * because the guest would never see the interrupt. Instead wait
- * until we call this function from kvm_timer_flush_hwstate.
- */
- if (!vgic_initialized(vcpu->kvm) || !timer->enabled)
- return -ENODEV;
-
- if (kvm_timer_should_fire(vcpu) != timer->irq.level)
- kvm_timer_update_irq(vcpu, !timer->irq.level);
-
- return 0;
-}
-
-/*
- * Schedule the background timer before calling kvm_vcpu_block, so that this
- * thread is removed from its waitqueue and made runnable when there's a timer
- * interrupt to handle.
- */
-void kvm_timer_schedule(struct kvm_vcpu *vcpu)
-{
- struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
-
- BUG_ON(timer_is_armed(timer));
-
- /*
- * No need to schedule a background timer if the guest timer has
- * already expired, because kvm_vcpu_block will return before putting
- * the thread to sleep.
- */
- if (kvm_timer_should_fire(vcpu))
- return;
-
- /*
- * If the timer is not capable of raising interrupts (disabled or
- * masked), then there's no more work for us to do.
- */
- if (!kvm_timer_irq_can_fire(vcpu))
- return;
-
- /* The timer has not yet expired, schedule a background timer */
- timer_arm(timer, kvm_timer_compute_delta(vcpu));
-}
-
-void kvm_timer_unschedule(struct kvm_vcpu *vcpu)
-{
- struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
- timer_disarm(timer);
-}
-
-/**
- * kvm_timer_flush_hwstate - prepare to move the virt timer to the cpu
- * @vcpu: The vcpu pointer
- *
- * Check if the virtual timer has expired while we were running in the host,
- * and inject an interrupt if that was the case.
- */
-void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu)
-{
- struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
- bool phys_active;
- int ret;
-
- if (kvm_timer_update_state(vcpu))
- return;
-
- /*
- * If we enter the guest with the virtual input level to the VGIC
- * asserted, then we have already told the VGIC what we need to, and
- * we don't need to exit from the guest until the guest deactivates
- * the already injected interrupt, so therefore we should set the
- * hardware active state to prevent unnecessary exits from the guest.
- *
- * Also, if we enter the guest with the virtual timer interrupt active,
- * then it must be active on the physical distributor, because we set
- * the HW bit and the guest must be able to deactivate the virtual and
- * physical interrupt at the same time.
- *
- * Conversely, if the virtual input level is deasserted and the virtual
- * interrupt is not active, then always clear the hardware active state
- * to ensure that hardware interrupts from the timer triggers a guest
- * exit.
- */
- phys_active = timer->irq.level ||
- kvm_vgic_map_is_active(vcpu, timer->irq.irq);
-
- /*
- * We want to avoid hitting the (re)distributor as much as
- * possible, as this is a potentially expensive MMIO access
- * (not to mention locks in the irq layer), and a solution for
- * this is to cache the "active" state in memory.
- *
- * Things to consider: we cannot cache an "active set" state,
- * because the HW can change this behind our back (it becomes
- * "clear" in the HW). We must then restrict the caching to
- * the "clear" state.
- *
- * The cache is invalidated on:
- * - vcpu put, indicating that the HW cannot be trusted to be
- * in a sane state on the next vcpu load,
- * - any change in the interrupt state
- *
- * Usage conditions:
- * - cached value is "active clear"
- * - value to be programmed is "active clear"
- */
- if (timer->active_cleared_last && !phys_active)
- return;
-
- ret = irq_set_irqchip_state(host_vtimer_irq,
- IRQCHIP_STATE_ACTIVE,
- phys_active);
- WARN_ON(ret);
-
- timer->active_cleared_last = !phys_active;
-}
-
-/**
- * kvm_timer_sync_hwstate - sync timer state from cpu
- * @vcpu: The vcpu pointer
- *
- * Check if the virtual timer has expired while we were running in the guest,
- * and inject an interrupt if that was the case.
- */
-void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu)
-{
- struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
-
- BUG_ON(timer_is_armed(timer));
-
- /*
- * The guest could have modified the timer registers or the timer
- * could have expired, update the timer state.
- */
- kvm_timer_update_state(vcpu);
-}
-
-int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu,
- const struct kvm_irq_level *irq)
-{
- struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
-
- /*
- * The vcpu timer irq number cannot be determined in
- * kvm_timer_vcpu_init() because it is called much before
- * kvm_vcpu_set_target(). To handle this, we determine
- * vcpu timer irq number when the vcpu is reset.
- */
- timer->irq.irq = irq->irq;
-
- /*
- * The bits in CNTV_CTL are architecturally reset to UNKNOWN for ARMv8
- * and to 0 for ARMv7. We provide an implementation that always
- * resets the timer to be disabled and unmasked and is compliant with
- * the ARMv7 architecture.
- */
- timer->cntv_ctl = 0;
- kvm_timer_update_state(vcpu);
-
- return 0;
-}
-
-void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu)
-{
- struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
-
- INIT_WORK(&timer->expired, kvm_timer_inject_irq_work);
- hrtimer_init(&timer->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
- timer->timer.function = kvm_timer_expire;
-}
-
-static void kvm_timer_init_interrupt(void *info)
-{
- enable_percpu_irq(host_vtimer_irq, host_vtimer_irq_flags);
-}
-
-int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value)
-{
- struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
-
- switch (regid) {
- case KVM_REG_ARM_TIMER_CTL:
- timer->cntv_ctl = value;
- break;
- case KVM_REG_ARM_TIMER_CNT:
- vcpu->kvm->arch.timer.cntvoff = kvm_phys_timer_read() - value;
- break;
- case KVM_REG_ARM_TIMER_CVAL:
- timer->cntv_cval = value;
- break;
- default:
- return -1;
- }
-
- kvm_timer_update_state(vcpu);
- return 0;
-}
-
-u64 kvm_arm_timer_get_reg(struct kvm_vcpu *vcpu, u64 regid)
-{
- struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
-
- switch (regid) {
- case KVM_REG_ARM_TIMER_CTL:
- return timer->cntv_ctl;
- case KVM_REG_ARM_TIMER_CNT:
- return kvm_phys_timer_read() - vcpu->kvm->arch.timer.cntvoff;
- case KVM_REG_ARM_TIMER_CVAL:
- return timer->cntv_cval;
- }
- return (u64)-1;
-}
-
-static int kvm_timer_starting_cpu(unsigned int cpu)
-{
- kvm_timer_init_interrupt(NULL);
- return 0;
-}
-
-static int kvm_timer_dying_cpu(unsigned int cpu)
-{
- disable_percpu_irq(host_vtimer_irq);
- return 0;
-}
-
-int kvm_timer_hyp_init(void)
-{
- struct arch_timer_kvm_info *info;
- int err;
-
- info = arch_timer_get_kvm_info();
- timecounter = &info->timecounter;
-
- if (info->virtual_irq <= 0) {
- kvm_err("kvm_arch_timer: invalid virtual timer IRQ: %d\n",
- info->virtual_irq);
- return -ENODEV;
- }
- host_vtimer_irq = info->virtual_irq;
-
- host_vtimer_irq_flags = irq_get_trigger_type(host_vtimer_irq);
- if (host_vtimer_irq_flags != IRQF_TRIGGER_HIGH &&
- host_vtimer_irq_flags != IRQF_TRIGGER_LOW) {
- kvm_err("Invalid trigger for IRQ%d, assuming level low\n",
- host_vtimer_irq);
- host_vtimer_irq_flags = IRQF_TRIGGER_LOW;
- }
-
- err = request_percpu_irq(host_vtimer_irq, kvm_arch_timer_handler,
- "kvm guest timer", kvm_get_running_vcpus());
- if (err) {
- kvm_err("kvm_arch_timer: can't request interrupt %d (%d)\n",
- host_vtimer_irq, err);
- return err;
- }
-
- kvm_info("virtual timer IRQ%d\n", host_vtimer_irq);
-
- cpuhp_setup_state(CPUHP_AP_KVM_ARM_TIMER_STARTING,
- "AP_KVM_ARM_TIMER_STARTING", kvm_timer_starting_cpu,
- kvm_timer_dying_cpu);
- return err;
-}
-
-void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu)
-{
- struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
-
- timer_disarm(timer);
- kvm_vgic_unmap_phys_irq(vcpu, timer->irq.irq);
-}
-
-int kvm_timer_enable(struct kvm_vcpu *vcpu)
-{
- struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
- struct irq_desc *desc;
- struct irq_data *data;
- int phys_irq;
- int ret;
-
- if (timer->enabled)
- return 0;
-
- /*
- * Find the physical IRQ number corresponding to the host_vtimer_irq
- */
- desc = irq_to_desc(host_vtimer_irq);
- if (!desc) {
- kvm_err("%s: no interrupt descriptor\n", __func__);
- return -EINVAL;
- }
-
- data = irq_desc_get_irq_data(desc);
- while (data->parent_data)
- data = data->parent_data;
-
- phys_irq = data->hwirq;
-
- /*
- * Tell the VGIC that the virtual interrupt is tied to a
- * physical interrupt. We do that once per VCPU.
- */
- ret = kvm_vgic_map_phys_irq(vcpu, timer->irq.irq, phys_irq);
- if (ret)
- return ret;
-
-
- /*
- * There is a potential race here between VCPUs starting for the first
- * time, which may be enabling the timer multiple times. That doesn't
- * hurt though, because we're just setting a variable to the same
- * variable that it already was. The important thing is that all
- * VCPUs have the enabled variable set, before entering the guest, if
- * the arch timers are enabled.
- */
- if (timecounter)
- timer->enabled = 1;
-
- return 0;
-}
-
-void kvm_timer_init(struct kvm *kvm)
-{
- kvm->arch.timer.cntvoff = kvm_phys_timer_read();
-}
diff --git a/virt/kvm/arm/hyp/timer-sr.c b/virt/kvm/arm/hyp/timer-sr.c
deleted file mode 100644
index 798866a..0000000
--- a/virt/kvm/arm/hyp/timer-sr.c
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * Copyright (C) 2012-2015 - ARM Ltd
- * Author: Marc Zyngier <marc.zyngier@arm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include <clocksource/arm_arch_timer.h>
-#include <linux/compiler.h>
-#include <linux/kvm_host.h>
-
-#include <asm/kvm_hyp.h>
-
-/* vcpu is already in the HYP VA space */
-void __hyp_text __timer_save_state(struct kvm_vcpu *vcpu)
-{
- struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
- u64 val;
-
- if (timer->enabled) {
- timer->cntv_ctl = read_sysreg_el0(cntv_ctl);
- timer->cntv_cval = read_sysreg_el0(cntv_cval);
- }
-
- /* Disable the virtual timer */
- write_sysreg_el0(0, cntv_ctl);
-
- /* Allow physical timer/counter access for the host */
- val = read_sysreg(cnthctl_el2);
- val |= CNTHCTL_EL1PCTEN | CNTHCTL_EL1PCEN;
- write_sysreg(val, cnthctl_el2);
-
- /* Clear cntvoff for the host */
- write_sysreg(0, cntvoff_el2);
-}
-
-void __hyp_text __timer_restore_state(struct kvm_vcpu *vcpu)
-{
- struct kvm *kvm = kern_hyp_va(vcpu->kvm);
- struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
- u64 val;
-
- /*
- * Disallow physical timer access for the guest
- * Physical counter access is allowed
- */
- val = read_sysreg(cnthctl_el2);
- val &= ~CNTHCTL_EL1PCEN;
- val |= CNTHCTL_EL1PCTEN;
- write_sysreg(val, cnthctl_el2);
-
- if (timer->enabled) {
- write_sysreg(kvm->arch.timer.cntvoff, cntvoff_el2);
- write_sysreg_el0(timer->cntv_cval, cntv_cval);
- isb();
- write_sysreg_el0(timer->cntv_ctl, cntv_ctl);
- }
-}
diff --git a/virt/kvm/arm/hyp/vgic-v2-sr.c b/virt/kvm/arm/hyp/vgic-v2-sr.c
deleted file mode 100644
index c8aeb7b..0000000
--- a/virt/kvm/arm/hyp/vgic-v2-sr.c
+++ /dev/null
@@ -1,226 +0,0 @@
-/*
- * Copyright (C) 2012-2015 - ARM Ltd
- * Author: Marc Zyngier <marc.zyngier@arm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include <linux/compiler.h>
-#include <linux/irqchip/arm-gic.h>
-#include <linux/kvm_host.h>
-
-#include <asm/kvm_emulate.h>
-#include <asm/kvm_hyp.h>
-
-static void __hyp_text save_maint_int_state(struct kvm_vcpu *vcpu,
- void __iomem *base)
-{
- struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2;
- int nr_lr = (kern_hyp_va(&kvm_vgic_global_state))->nr_lr;
- u32 eisr0, eisr1;
- int i;
- bool expect_mi;
-
- expect_mi = !!(cpu_if->vgic_hcr & GICH_HCR_UIE);
-
- for (i = 0; i < nr_lr; i++) {
- if (!(vcpu->arch.vgic_cpu.live_lrs & (1UL << i)))
- continue;
-
- expect_mi |= (!(cpu_if->vgic_lr[i] & GICH_LR_HW) &&
- (cpu_if->vgic_lr[i] & GICH_LR_EOI));
- }
-
- if (expect_mi) {
- cpu_if->vgic_misr = readl_relaxed(base + GICH_MISR);
-
- if (cpu_if->vgic_misr & GICH_MISR_EOI) {
- eisr0 = readl_relaxed(base + GICH_EISR0);
- if (unlikely(nr_lr > 32))
- eisr1 = readl_relaxed(base + GICH_EISR1);
- else
- eisr1 = 0;
- } else {
- eisr0 = eisr1 = 0;
- }
- } else {
- cpu_if->vgic_misr = 0;
- eisr0 = eisr1 = 0;
- }
-
-#ifdef CONFIG_CPU_BIG_ENDIAN
- cpu_if->vgic_eisr = ((u64)eisr0 << 32) | eisr1;
-#else
- cpu_if->vgic_eisr = ((u64)eisr1 << 32) | eisr0;
-#endif
-}
-
-static void __hyp_text save_elrsr(struct kvm_vcpu *vcpu, void __iomem *base)
-{
- struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2;
- int nr_lr = (kern_hyp_va(&kvm_vgic_global_state))->nr_lr;
- u32 elrsr0, elrsr1;
-
- elrsr0 = readl_relaxed(base + GICH_ELRSR0);
- if (unlikely(nr_lr > 32))
- elrsr1 = readl_relaxed(base + GICH_ELRSR1);
- else
- elrsr1 = 0;
-
-#ifdef CONFIG_CPU_BIG_ENDIAN
- cpu_if->vgic_elrsr = ((u64)elrsr0 << 32) | elrsr1;
-#else
- cpu_if->vgic_elrsr = ((u64)elrsr1 << 32) | elrsr0;
-#endif
-}
-
-static void __hyp_text save_lrs(struct kvm_vcpu *vcpu, void __iomem *base)
-{
- struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2;
- int nr_lr = (kern_hyp_va(&kvm_vgic_global_state))->nr_lr;
- int i;
-
- for (i = 0; i < nr_lr; i++) {
- if (!(vcpu->arch.vgic_cpu.live_lrs & (1UL << i)))
- continue;
-
- if (cpu_if->vgic_elrsr & (1UL << i))
- cpu_if->vgic_lr[i] &= ~GICH_LR_STATE;
- else
- cpu_if->vgic_lr[i] = readl_relaxed(base + GICH_LR0 + (i * 4));
-
- writel_relaxed(0, base + GICH_LR0 + (i * 4));
- }
-}
-
-/* vcpu is already in the HYP VA space */
-void __hyp_text __vgic_v2_save_state(struct kvm_vcpu *vcpu)
-{
- struct kvm *kvm = kern_hyp_va(vcpu->kvm);
- struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2;
- struct vgic_dist *vgic = &kvm->arch.vgic;
- void __iomem *base = kern_hyp_va(vgic->vctrl_base);
-
- if (!base)
- return;
-
- cpu_if->vgic_vmcr = readl_relaxed(base + GICH_VMCR);
-
- if (vcpu->arch.vgic_cpu.live_lrs) {
- cpu_if->vgic_apr = readl_relaxed(base + GICH_APR);
-
- save_maint_int_state(vcpu, base);
- save_elrsr(vcpu, base);
- save_lrs(vcpu, base);
-
- writel_relaxed(0, base + GICH_HCR);
-
- vcpu->arch.vgic_cpu.live_lrs = 0;
- } else {
- cpu_if->vgic_eisr = 0;
- cpu_if->vgic_elrsr = ~0UL;
- cpu_if->vgic_misr = 0;
- cpu_if->vgic_apr = 0;
- }
-}
-
-/* vcpu is already in the HYP VA space */
-void __hyp_text __vgic_v2_restore_state(struct kvm_vcpu *vcpu)
-{
- struct kvm *kvm = kern_hyp_va(vcpu->kvm);
- struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2;
- struct vgic_dist *vgic = &kvm->arch.vgic;
- void __iomem *base = kern_hyp_va(vgic->vctrl_base);
- int nr_lr = (kern_hyp_va(&kvm_vgic_global_state))->nr_lr;
- int i;
- u64 live_lrs = 0;
-
- if (!base)
- return;
-
-
- for (i = 0; i < nr_lr; i++)
- if (cpu_if->vgic_lr[i] & GICH_LR_STATE)
- live_lrs |= 1UL << i;
-
- if (live_lrs) {
- writel_relaxed(cpu_if->vgic_hcr, base + GICH_HCR);
- writel_relaxed(cpu_if->vgic_apr, base + GICH_APR);
- for (i = 0; i < nr_lr; i++) {
- if (!(live_lrs & (1UL << i)))
- continue;
-
- writel_relaxed(cpu_if->vgic_lr[i],
- base + GICH_LR0 + (i * 4));
- }
- }
-
- writel_relaxed(cpu_if->vgic_vmcr, base + GICH_VMCR);
- vcpu->arch.vgic_cpu.live_lrs = live_lrs;
-}
-
-#ifdef CONFIG_ARM64
-/*
- * __vgic_v2_perform_cpuif_access -- perform a GICV access on behalf of the
- * guest.
- *
- * @vcpu: the offending vcpu
- *
- * Returns:
- * 1: GICV access successfully performed
- * 0: Not a GICV access
- * -1: Illegal GICV access
- */
-int __hyp_text __vgic_v2_perform_cpuif_access(struct kvm_vcpu *vcpu)
-{
- struct kvm *kvm = kern_hyp_va(vcpu->kvm);
- struct vgic_dist *vgic = &kvm->arch.vgic;
- phys_addr_t fault_ipa;
- void __iomem *addr;
- int rd;
-
- /* Build the full address */
- fault_ipa = kvm_vcpu_get_fault_ipa(vcpu);
- fault_ipa |= kvm_vcpu_get_hfar(vcpu) & GENMASK(11, 0);
-
- /* If not for GICV, move on */
- if (fault_ipa < vgic->vgic_cpu_base ||
- fault_ipa >= (vgic->vgic_cpu_base + KVM_VGIC_V2_CPU_SIZE))
- return 0;
-
- /* Reject anything but a 32bit access */
- if (kvm_vcpu_dabt_get_as(vcpu) != sizeof(u32))
- return -1;
-
- /* Not aligned? Don't bother */
- if (fault_ipa & 3)
- return -1;
-
- rd = kvm_vcpu_dabt_get_rd(vcpu);
- addr = kern_hyp_va((kern_hyp_va(&kvm_vgic_global_state))->vcpu_base_va);
- addr += fault_ipa - vgic->vgic_cpu_base;
-
- if (kvm_vcpu_dabt_iswrite(vcpu)) {
- u32 data = vcpu_data_guest_to_host(vcpu,
- vcpu_get_reg(vcpu, rd),
- sizeof(u32));
- writel_relaxed(data, addr);
- } else {
- u32 data = readl_relaxed(addr);
- vcpu_set_reg(vcpu, rd, vcpu_data_host_to_guest(vcpu, data,
- sizeof(u32)));
- }
-
- return 1;
-}
-#endif
diff --git a/virt/kvm/arm/hyp/vgic-v3-sr.c b/virt/kvm/arm/hyp/vgic-v3-sr.c
deleted file mode 100644
index 3947095..0000000
--- a/virt/kvm/arm/hyp/vgic-v3-sr.c
+++ /dev/null
@@ -1,328 +0,0 @@
-/*
- * Copyright (C) 2012-2015 - ARM Ltd
- * Author: Marc Zyngier <marc.zyngier@arm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include <linux/compiler.h>
-#include <linux/irqchip/arm-gic-v3.h>
-#include <linux/kvm_host.h>
-
-#include <asm/kvm_hyp.h>
-
-#define vtr_to_max_lr_idx(v) ((v) & 0xf)
-#define vtr_to_nr_pri_bits(v) (((u32)(v) >> 29) + 1)
-
-static u64 __hyp_text __gic_v3_get_lr(unsigned int lr)
-{
- switch (lr & 0xf) {
- case 0:
- return read_gicreg(ICH_LR0_EL2);
- case 1:
- return read_gicreg(ICH_LR1_EL2);
- case 2:
- return read_gicreg(ICH_LR2_EL2);
- case 3:
- return read_gicreg(ICH_LR3_EL2);
- case 4:
- return read_gicreg(ICH_LR4_EL2);
- case 5:
- return read_gicreg(ICH_LR5_EL2);
- case 6:
- return read_gicreg(ICH_LR6_EL2);
- case 7:
- return read_gicreg(ICH_LR7_EL2);
- case 8:
- return read_gicreg(ICH_LR8_EL2);
- case 9:
- return read_gicreg(ICH_LR9_EL2);
- case 10:
- return read_gicreg(ICH_LR10_EL2);
- case 11:
- return read_gicreg(ICH_LR11_EL2);
- case 12:
- return read_gicreg(ICH_LR12_EL2);
- case 13:
- return read_gicreg(ICH_LR13_EL2);
- case 14:
- return read_gicreg(ICH_LR14_EL2);
- case 15:
- return read_gicreg(ICH_LR15_EL2);
- }
-
- unreachable();
-}
-
-static void __hyp_text __gic_v3_set_lr(u64 val, int lr)
-{
- switch (lr & 0xf) {
- case 0:
- write_gicreg(val, ICH_LR0_EL2);
- break;
- case 1:
- write_gicreg(val, ICH_LR1_EL2);
- break;
- case 2:
- write_gicreg(val, ICH_LR2_EL2);
- break;
- case 3:
- write_gicreg(val, ICH_LR3_EL2);
- break;
- case 4:
- write_gicreg(val, ICH_LR4_EL2);
- break;
- case 5:
- write_gicreg(val, ICH_LR5_EL2);
- break;
- case 6:
- write_gicreg(val, ICH_LR6_EL2);
- break;
- case 7:
- write_gicreg(val, ICH_LR7_EL2);
- break;
- case 8:
- write_gicreg(val, ICH_LR8_EL2);
- break;
- case 9:
- write_gicreg(val, ICH_LR9_EL2);
- break;
- case 10:
- write_gicreg(val, ICH_LR10_EL2);
- break;
- case 11:
- write_gicreg(val, ICH_LR11_EL2);
- break;
- case 12:
- write_gicreg(val, ICH_LR12_EL2);
- break;
- case 13:
- write_gicreg(val, ICH_LR13_EL2);
- break;
- case 14:
- write_gicreg(val, ICH_LR14_EL2);
- break;
- case 15:
- write_gicreg(val, ICH_LR15_EL2);
- break;
- }
-}
-
-static void __hyp_text save_maint_int_state(struct kvm_vcpu *vcpu, int nr_lr)
-{
- struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3;
- int i;
- bool expect_mi;
-
- expect_mi = !!(cpu_if->vgic_hcr & ICH_HCR_UIE);
-
- for (i = 0; i < nr_lr; i++) {
- if (!(vcpu->arch.vgic_cpu.live_lrs & (1UL << i)))
- continue;
-
- expect_mi |= (!(cpu_if->vgic_lr[i] & ICH_LR_HW) &&
- (cpu_if->vgic_lr[i] & ICH_LR_EOI));
- }
-
- if (expect_mi) {
- cpu_if->vgic_misr = read_gicreg(ICH_MISR_EL2);
-
- if (cpu_if->vgic_misr & ICH_MISR_EOI)
- cpu_if->vgic_eisr = read_gicreg(ICH_EISR_EL2);
- else
- cpu_if->vgic_eisr = 0;
- } else {
- cpu_if->vgic_misr = 0;
- cpu_if->vgic_eisr = 0;
- }
-}
-
-void __hyp_text __vgic_v3_save_state(struct kvm_vcpu *vcpu)
-{
- struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3;
- u64 val;
-
- /*
- * Make sure stores to the GIC via the memory mapped interface
- * are now visible to the system register interface.
- */
- if (!cpu_if->vgic_sre)
- dsb(st);
-
- cpu_if->vgic_vmcr = read_gicreg(ICH_VMCR_EL2);
-
- if (vcpu->arch.vgic_cpu.live_lrs) {
- int i;
- u32 max_lr_idx, nr_pri_bits;
-
- cpu_if->vgic_elrsr = read_gicreg(ICH_ELSR_EL2);
-
- write_gicreg(0, ICH_HCR_EL2);
- val = read_gicreg(ICH_VTR_EL2);
- max_lr_idx = vtr_to_max_lr_idx(val);
- nr_pri_bits = vtr_to_nr_pri_bits(val);
-
- save_maint_int_state(vcpu, max_lr_idx + 1);
-
- for (i = 0; i <= max_lr_idx; i++) {
- if (!(vcpu->arch.vgic_cpu.live_lrs & (1UL << i)))
- continue;
-
- if (cpu_if->vgic_elrsr & (1 << i))
- cpu_if->vgic_lr[i] &= ~ICH_LR_STATE;
- else
- cpu_if->vgic_lr[i] = __gic_v3_get_lr(i);
-
- __gic_v3_set_lr(0, i);
- }
-
- switch (nr_pri_bits) {
- case 7:
- cpu_if->vgic_ap0r[3] = read_gicreg(ICH_AP0R3_EL2);
- cpu_if->vgic_ap0r[2] = read_gicreg(ICH_AP0R2_EL2);
- case 6:
- cpu_if->vgic_ap0r[1] = read_gicreg(ICH_AP0R1_EL2);
- default:
- cpu_if->vgic_ap0r[0] = read_gicreg(ICH_AP0R0_EL2);
- }
-
- switch (nr_pri_bits) {
- case 7:
- cpu_if->vgic_ap1r[3] = read_gicreg(ICH_AP1R3_EL2);
- cpu_if->vgic_ap1r[2] = read_gicreg(ICH_AP1R2_EL2);
- case 6:
- cpu_if->vgic_ap1r[1] = read_gicreg(ICH_AP1R1_EL2);
- default:
- cpu_if->vgic_ap1r[0] = read_gicreg(ICH_AP1R0_EL2);
- }
-
- vcpu->arch.vgic_cpu.live_lrs = 0;
- } else {
- cpu_if->vgic_misr = 0;
- cpu_if->vgic_eisr = 0;
- cpu_if->vgic_elrsr = 0xffff;
- cpu_if->vgic_ap0r[0] = 0;
- cpu_if->vgic_ap0r[1] = 0;
- cpu_if->vgic_ap0r[2] = 0;
- cpu_if->vgic_ap0r[3] = 0;
- cpu_if->vgic_ap1r[0] = 0;
- cpu_if->vgic_ap1r[1] = 0;
- cpu_if->vgic_ap1r[2] = 0;
- cpu_if->vgic_ap1r[3] = 0;
- }
-
- val = read_gicreg(ICC_SRE_EL2);
- write_gicreg(val | ICC_SRE_EL2_ENABLE, ICC_SRE_EL2);
-
- if (!cpu_if->vgic_sre) {
- /* Make sure ENABLE is set at EL2 before setting SRE at EL1 */
- isb();
- write_gicreg(1, ICC_SRE_EL1);
- }
-}
-
-void __hyp_text __vgic_v3_restore_state(struct kvm_vcpu *vcpu)
-{
- struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3;
- u64 val;
- u32 max_lr_idx, nr_pri_bits;
- u16 live_lrs = 0;
- int i;
-
- /*
- * VFIQEn is RES1 if ICC_SRE_EL1.SRE is 1. This causes a
- * Group0 interrupt (as generated in GICv2 mode) to be
- * delivered as a FIQ to the guest, with potentially fatal
- * consequences. So we must make sure that ICC_SRE_EL1 has
- * been actually programmed with the value we want before
- * starting to mess with the rest of the GIC.
- */
- if (!cpu_if->vgic_sre) {
- write_gicreg(0, ICC_SRE_EL1);
- isb();
- }
-
- val = read_gicreg(ICH_VTR_EL2);
- max_lr_idx = vtr_to_max_lr_idx(val);
- nr_pri_bits = vtr_to_nr_pri_bits(val);
-
- for (i = 0; i <= max_lr_idx; i++) {
- if (cpu_if->vgic_lr[i] & ICH_LR_STATE)
- live_lrs |= (1 << i);
- }
-
- write_gicreg(cpu_if->vgic_vmcr, ICH_VMCR_EL2);
-
- if (live_lrs) {
- write_gicreg(cpu_if->vgic_hcr, ICH_HCR_EL2);
-
- switch (nr_pri_bits) {
- case 7:
- write_gicreg(cpu_if->vgic_ap0r[3], ICH_AP0R3_EL2);
- write_gicreg(cpu_if->vgic_ap0r[2], ICH_AP0R2_EL2);
- case 6:
- write_gicreg(cpu_if->vgic_ap0r[1], ICH_AP0R1_EL2);
- default:
- write_gicreg(cpu_if->vgic_ap0r[0], ICH_AP0R0_EL2);
- }
-
- switch (nr_pri_bits) {
- case 7:
- write_gicreg(cpu_if->vgic_ap1r[3], ICH_AP1R3_EL2);
- write_gicreg(cpu_if->vgic_ap1r[2], ICH_AP1R2_EL2);
- case 6:
- write_gicreg(cpu_if->vgic_ap1r[1], ICH_AP1R1_EL2);
- default:
- write_gicreg(cpu_if->vgic_ap1r[0], ICH_AP1R0_EL2);
- }
-
- for (i = 0; i <= max_lr_idx; i++) {
- if (!(live_lrs & (1 << i)))
- continue;
-
- __gic_v3_set_lr(cpu_if->vgic_lr[i], i);
- }
- }
-
- /*
- * Ensures that the above will have reached the
- * (re)distributors. This ensure the guest will read the
- * correct values from the memory-mapped interface.
- */
- if (!cpu_if->vgic_sre) {
- isb();
- dsb(sy);
- }
- vcpu->arch.vgic_cpu.live_lrs = live_lrs;
-
- /*
- * Prevent the guest from touching the GIC system registers if
- * SRE isn't enabled for GICv3 emulation.
- */
- write_gicreg(read_gicreg(ICC_SRE_EL2) & ~ICC_SRE_EL2_ENABLE,
- ICC_SRE_EL2);
-}
-
-void __hyp_text __vgic_v3_init_lrs(void)
-{
- int max_lr_idx = vtr_to_max_lr_idx(read_gicreg(ICH_VTR_EL2));
- int i;
-
- for (i = 0; i <= max_lr_idx; i++)
- __gic_v3_set_lr(0, i);
-}
-
-u64 __hyp_text __vgic_v3_get_ich_vtr_el2(void)
-{
- return read_gicreg(ICH_VTR_EL2);
-}
diff --git a/virt/kvm/arm/pmu.c b/virt/kvm/arm/pmu.c
deleted file mode 100644
index 69ccce3..0000000
--- a/virt/kvm/arm/pmu.c
+++ /dev/null
@@ -1,543 +0,0 @@
-/*
- * Copyright (C) 2015 Linaro Ltd.
- * Author: Shannon Zhao <shannon.zhao@linaro.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include <linux/cpu.h>
-#include <linux/kvm.h>
-#include <linux/kvm_host.h>
-#include <linux/perf_event.h>
-#include <linux/uaccess.h>
-#include <asm/kvm_emulate.h>
-#include <kvm/arm_pmu.h>
-#include <kvm/arm_vgic.h>
-
-/**
- * kvm_pmu_get_counter_value - get PMU counter value
- * @vcpu: The vcpu pointer
- * @select_idx: The counter index
- */
-u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx)
-{
- u64 counter, reg, enabled, running;
- struct kvm_pmu *pmu = &vcpu->arch.pmu;
- struct kvm_pmc *pmc = &pmu->pmc[select_idx];
-
- reg = (select_idx == ARMV8_PMU_CYCLE_IDX)
- ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + select_idx;
- counter = vcpu_sys_reg(vcpu, reg);
-
- /* The real counter value is equal to the value of counter register plus
- * the value perf event counts.
- */
- if (pmc->perf_event)
- counter += perf_event_read_value(pmc->perf_event, &enabled,
- &running);
-
- return counter & pmc->bitmask;
-}
-
-/**
- * kvm_pmu_set_counter_value - set PMU counter value
- * @vcpu: The vcpu pointer
- * @select_idx: The counter index
- * @val: The counter value
- */
-void kvm_pmu_set_counter_value(struct kvm_vcpu *vcpu, u64 select_idx, u64 val)
-{
- u64 reg;
-
- reg = (select_idx == ARMV8_PMU_CYCLE_IDX)
- ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + select_idx;
- vcpu_sys_reg(vcpu, reg) += (s64)val - kvm_pmu_get_counter_value(vcpu, select_idx);
-}
-
-/**
- * kvm_pmu_stop_counter - stop PMU counter
- * @pmc: The PMU counter pointer
- *
- * If this counter has been configured to monitor some event, release it here.
- */
-static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, struct kvm_pmc *pmc)
-{
- u64 counter, reg;
-
- if (pmc->perf_event) {
- counter = kvm_pmu_get_counter_value(vcpu, pmc->idx);
- reg = (pmc->idx == ARMV8_PMU_CYCLE_IDX)
- ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + pmc->idx;
- vcpu_sys_reg(vcpu, reg) = counter;
- perf_event_disable(pmc->perf_event);
- perf_event_release_kernel(pmc->perf_event);
- pmc->perf_event = NULL;
- }
-}
-
-/**
- * kvm_pmu_vcpu_reset - reset pmu state for cpu
- * @vcpu: The vcpu pointer
- *
- */
-void kvm_pmu_vcpu_reset(struct kvm_vcpu *vcpu)
-{
- int i;
- struct kvm_pmu *pmu = &vcpu->arch.pmu;
-
- for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) {
- kvm_pmu_stop_counter(vcpu, &pmu->pmc[i]);
- pmu->pmc[i].idx = i;
- pmu->pmc[i].bitmask = 0xffffffffUL;
- }
-}
-
-/**
- * kvm_pmu_vcpu_destroy - free perf event of PMU for cpu
- * @vcpu: The vcpu pointer
- *
- */
-void kvm_pmu_vcpu_destroy(struct kvm_vcpu *vcpu)
-{
- int i;
- struct kvm_pmu *pmu = &vcpu->arch.pmu;
-
- for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) {
- struct kvm_pmc *pmc = &pmu->pmc[i];
-
- if (pmc->perf_event) {
- perf_event_disable(pmc->perf_event);
- perf_event_release_kernel(pmc->perf_event);
- pmc->perf_event = NULL;
- }
- }
-}
-
-u64 kvm_pmu_valid_counter_mask(struct kvm_vcpu *vcpu)
-{
- u64 val = vcpu_sys_reg(vcpu, PMCR_EL0) >> ARMV8_PMU_PMCR_N_SHIFT;
-
- val &= ARMV8_PMU_PMCR_N_MASK;
- if (val == 0)
- return BIT(ARMV8_PMU_CYCLE_IDX);
- else
- return GENMASK(val - 1, 0) | BIT(ARMV8_PMU_CYCLE_IDX);
-}
-
-/**
- * kvm_pmu_enable_counter - enable selected PMU counter
- * @vcpu: The vcpu pointer
- * @val: the value guest writes to PMCNTENSET register
- *
- * Call perf_event_enable to start counting the perf event
- */
-void kvm_pmu_enable_counter(struct kvm_vcpu *vcpu, u64 val)
-{
- int i;
- struct kvm_pmu *pmu = &vcpu->arch.pmu;
- struct kvm_pmc *pmc;
-
- if (!(vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E) || !val)
- return;
-
- for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) {
- if (!(val & BIT(i)))
- continue;
-
- pmc = &pmu->pmc[i];
- if (pmc->perf_event) {
- perf_event_enable(pmc->perf_event);
- if (pmc->perf_event->state != PERF_EVENT_STATE_ACTIVE)
- kvm_debug("fail to enable perf event\n");
- }
- }
-}
-
-/**
- * kvm_pmu_disable_counter - disable selected PMU counter
- * @vcpu: The vcpu pointer
- * @val: the value guest writes to PMCNTENCLR register
- *
- * Call perf_event_disable to stop counting the perf event
- */
-void kvm_pmu_disable_counter(struct kvm_vcpu *vcpu, u64 val)
-{
- int i;
- struct kvm_pmu *pmu = &vcpu->arch.pmu;
- struct kvm_pmc *pmc;
-
- if (!val)
- return;
-
- for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) {
- if (!(val & BIT(i)))
- continue;
-
- pmc = &pmu->pmc[i];
- if (pmc->perf_event)
- perf_event_disable(pmc->perf_event);
- }
-}
-
-static u64 kvm_pmu_overflow_status(struct kvm_vcpu *vcpu)
-{
- u64 reg = 0;
-
- if ((vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E)) {
- reg = vcpu_sys_reg(vcpu, PMOVSSET_EL0);
- reg &= vcpu_sys_reg(vcpu, PMCNTENSET_EL0);
- reg &= vcpu_sys_reg(vcpu, PMINTENSET_EL1);
- reg &= kvm_pmu_valid_counter_mask(vcpu);
- }
-
- return reg;
-}
-
-/**
- * kvm_pmu_overflow_set - set PMU overflow interrupt
- * @vcpu: The vcpu pointer
- * @val: the value guest writes to PMOVSSET register
- */
-void kvm_pmu_overflow_set(struct kvm_vcpu *vcpu, u64 val)
-{
- u64 reg;
-
- if (val == 0)
- return;
-
- vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= val;
- reg = kvm_pmu_overflow_status(vcpu);
- if (reg != 0)
- kvm_vcpu_kick(vcpu);
-}
-
-static void kvm_pmu_update_state(struct kvm_vcpu *vcpu)
-{
- struct kvm_pmu *pmu = &vcpu->arch.pmu;
- bool overflow;
-
- if (!kvm_arm_pmu_v3_ready(vcpu))
- return;
-
- overflow = !!kvm_pmu_overflow_status(vcpu);
- if (pmu->irq_level != overflow) {
- pmu->irq_level = overflow;
- kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id,
- pmu->irq_num, overflow);
- }
-}
-
-/**
- * kvm_pmu_flush_hwstate - flush pmu state to cpu
- * @vcpu: The vcpu pointer
- *
- * Check if the PMU has overflowed while we were running in the host, and inject
- * an interrupt if that was the case.
- */
-void kvm_pmu_flush_hwstate(struct kvm_vcpu *vcpu)
-{
- kvm_pmu_update_state(vcpu);
-}
-
-/**
- * kvm_pmu_sync_hwstate - sync pmu state from cpu
- * @vcpu: The vcpu pointer
- *
- * Check if the PMU has overflowed while we were running in the guest, and
- * inject an interrupt if that was the case.
- */
-void kvm_pmu_sync_hwstate(struct kvm_vcpu *vcpu)
-{
- kvm_pmu_update_state(vcpu);
-}
-
-static inline struct kvm_vcpu *kvm_pmc_to_vcpu(struct kvm_pmc *pmc)
-{
- struct kvm_pmu *pmu;
- struct kvm_vcpu_arch *vcpu_arch;
-
- pmc -= pmc->idx;
- pmu = container_of(pmc, struct kvm_pmu, pmc[0]);
- vcpu_arch = container_of(pmu, struct kvm_vcpu_arch, pmu);
- return container_of(vcpu_arch, struct kvm_vcpu, arch);
-}
-
-/**
- * When perf event overflows, call kvm_pmu_overflow_set to set overflow status.
- */
-static void kvm_pmu_perf_overflow(struct perf_event *perf_event,
- struct perf_sample_data *data,
- struct pt_regs *regs)
-{
- struct kvm_pmc *pmc = perf_event->overflow_handler_context;
- struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc);
- int idx = pmc->idx;
-
- kvm_pmu_overflow_set(vcpu, BIT(idx));
-}
-
-/**
- * kvm_pmu_software_increment - do software increment
- * @vcpu: The vcpu pointer
- * @val: the value guest writes to PMSWINC register
- */
-void kvm_pmu_software_increment(struct kvm_vcpu *vcpu, u64 val)
-{
- int i;
- u64 type, enable, reg;
-
- if (val == 0)
- return;
-
- enable = vcpu_sys_reg(vcpu, PMCNTENSET_EL0);
- for (i = 0; i < ARMV8_PMU_CYCLE_IDX; i++) {
- if (!(val & BIT(i)))
- continue;
- type = vcpu_sys_reg(vcpu, PMEVTYPER0_EL0 + i)
- & ARMV8_PMU_EVTYPE_EVENT;
- if ((type == ARMV8_PMUV3_PERFCTR_SW_INCR)
- && (enable & BIT(i))) {
- reg = vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) + 1;
- reg = lower_32_bits(reg);
- vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) = reg;
- if (!reg)
- kvm_pmu_overflow_set(vcpu, BIT(i));
- }
- }
-}
-
-/**
- * kvm_pmu_handle_pmcr - handle PMCR register
- * @vcpu: The vcpu pointer
- * @val: the value guest writes to PMCR register
- */
-void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val)
-{
- struct kvm_pmu *pmu = &vcpu->arch.pmu;
- struct kvm_pmc *pmc;
- u64 mask;
- int i;
-
- mask = kvm_pmu_valid_counter_mask(vcpu);
- if (val & ARMV8_PMU_PMCR_E) {
- kvm_pmu_enable_counter(vcpu,
- vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & mask);
- } else {
- kvm_pmu_disable_counter(vcpu, mask);
- }
-
- if (val & ARMV8_PMU_PMCR_C)
- kvm_pmu_set_counter_value(vcpu, ARMV8_PMU_CYCLE_IDX, 0);
-
- if (val & ARMV8_PMU_PMCR_P) {
- for (i = 0; i < ARMV8_PMU_CYCLE_IDX; i++)
- kvm_pmu_set_counter_value(vcpu, i, 0);
- }
-
- if (val & ARMV8_PMU_PMCR_LC) {
- pmc = &pmu->pmc[ARMV8_PMU_CYCLE_IDX];
- pmc->bitmask = 0xffffffffffffffffUL;
- }
-}
-
-static bool kvm_pmu_counter_is_enabled(struct kvm_vcpu *vcpu, u64 select_idx)
-{
- return (vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E) &&
- (vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & BIT(select_idx));
-}
-
-/**
- * kvm_pmu_set_counter_event_type - set selected counter to monitor some event
- * @vcpu: The vcpu pointer
- * @data: The data guest writes to PMXEVTYPER_EL0
- * @select_idx: The number of selected counter
- *
- * When OS accesses PMXEVTYPER_EL0, that means it wants to set a PMC to count an
- * event with given hardware event number. Here we call perf_event API to
- * emulate this action and create a kernel perf event for it.
- */
-void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data,
- u64 select_idx)
-{
- struct kvm_pmu *pmu = &vcpu->arch.pmu;
- struct kvm_pmc *pmc = &pmu->pmc[select_idx];
- struct perf_event *event;
- struct perf_event_attr attr;
- u64 eventsel, counter;
-
- kvm_pmu_stop_counter(vcpu, pmc);
- eventsel = data & ARMV8_PMU_EVTYPE_EVENT;
-
- /* Software increment event does't need to be backed by a perf event */
- if (eventsel == ARMV8_PMUV3_PERFCTR_SW_INCR &&
- select_idx != ARMV8_PMU_CYCLE_IDX)
- return;
-
- memset(&attr, 0, sizeof(struct perf_event_attr));
- attr.type = PERF_TYPE_RAW;
- attr.size = sizeof(attr);
- attr.pinned = 1;
- attr.disabled = !kvm_pmu_counter_is_enabled(vcpu, select_idx);
- attr.exclude_user = data & ARMV8_PMU_EXCLUDE_EL0 ? 1 : 0;
- attr.exclude_kernel = data & ARMV8_PMU_EXCLUDE_EL1 ? 1 : 0;
- attr.exclude_hv = 1; /* Don't count EL2 events */
- attr.exclude_host = 1; /* Don't count host events */
- attr.config = (select_idx == ARMV8_PMU_CYCLE_IDX) ?
- ARMV8_PMUV3_PERFCTR_CPU_CYCLES : eventsel;
-
- counter = kvm_pmu_get_counter_value(vcpu, select_idx);
- /* The initial sample period (overflow count) of an event. */
- attr.sample_period = (-counter) & pmc->bitmask;
-
- event = perf_event_create_kernel_counter(&attr, -1, current,
- kvm_pmu_perf_overflow, pmc);
- if (IS_ERR(event)) {
- pr_err_once("kvm: pmu event creation failed %ld\n",
- PTR_ERR(event));
- return;
- }
-
- pmc->perf_event = event;
-}
-
-bool kvm_arm_support_pmu_v3(void)
-{
- /*
- * Check if HW_PERF_EVENTS are supported by checking the number of
- * hardware performance counters. This could ensure the presence of
- * a physical PMU and CONFIG_PERF_EVENT is selected.
- */
- return (perf_num_counters() > 0);
-}
-
-static int kvm_arm_pmu_v3_init(struct kvm_vcpu *vcpu)
-{
- if (!kvm_arm_support_pmu_v3())
- return -ENODEV;
-
- /*
- * We currently require an in-kernel VGIC to use the PMU emulation,
- * because we do not support forwarding PMU overflow interrupts to
- * userspace yet.
- */
- if (!irqchip_in_kernel(vcpu->kvm) || !vgic_initialized(vcpu->kvm))
- return -ENODEV;
-
- if (!test_bit(KVM_ARM_VCPU_PMU_V3, vcpu->arch.features) ||
- !kvm_arm_pmu_irq_initialized(vcpu))
- return -ENXIO;
-
- if (kvm_arm_pmu_v3_ready(vcpu))
- return -EBUSY;
-
- kvm_pmu_vcpu_reset(vcpu);
- vcpu->arch.pmu.ready = true;
-
- return 0;
-}
-
-#define irq_is_ppi(irq) ((irq) >= VGIC_NR_SGIS && (irq) < VGIC_NR_PRIVATE_IRQS)
-
-/*
- * For one VM the interrupt type must be same for each vcpu.
- * As a PPI, the interrupt number is the same for all vcpus,
- * while as an SPI it must be a separate number per vcpu.
- */
-static bool pmu_irq_is_valid(struct kvm *kvm, int irq)
-{
- int i;
- struct kvm_vcpu *vcpu;
-
- kvm_for_each_vcpu(i, vcpu, kvm) {
- if (!kvm_arm_pmu_irq_initialized(vcpu))
- continue;
-
- if (irq_is_ppi(irq)) {
- if (vcpu->arch.pmu.irq_num != irq)
- return false;
- } else {
- if (vcpu->arch.pmu.irq_num == irq)
- return false;
- }
- }
-
- return true;
-}
-
-int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
-{
- switch (attr->attr) {
- case KVM_ARM_VCPU_PMU_V3_IRQ: {
- int __user *uaddr = (int __user *)(long)attr->addr;
- int irq;
-
- if (!test_bit(KVM_ARM_VCPU_PMU_V3, vcpu->arch.features))
- return -ENODEV;
-
- if (get_user(irq, uaddr))
- return -EFAULT;
-
- /* The PMU overflow interrupt can be a PPI or a valid SPI. */
- if (!(irq_is_ppi(irq) || vgic_valid_spi(vcpu->kvm, irq)))
- return -EINVAL;
-
- if (!pmu_irq_is_valid(vcpu->kvm, irq))
- return -EINVAL;
-
- if (kvm_arm_pmu_irq_initialized(vcpu))
- return -EBUSY;
-
- kvm_debug("Set kvm ARM PMU irq: %d\n", irq);
- vcpu->arch.pmu.irq_num = irq;
- return 0;
- }
- case KVM_ARM_VCPU_PMU_V3_INIT:
- return kvm_arm_pmu_v3_init(vcpu);
- }
-
- return -ENXIO;
-}
-
-int kvm_arm_pmu_v3_get_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
-{
- switch (attr->attr) {
- case KVM_ARM_VCPU_PMU_V3_IRQ: {
- int __user *uaddr = (int __user *)(long)attr->addr;
- int irq;
-
- if (!test_bit(KVM_ARM_VCPU_PMU_V3, vcpu->arch.features))
- return -ENODEV;
-
- if (!kvm_arm_pmu_irq_initialized(vcpu))
- return -ENXIO;
-
- irq = vcpu->arch.pmu.irq_num;
- return put_user(irq, uaddr);
- }
- }
-
- return -ENXIO;
-}
-
-int kvm_arm_pmu_v3_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
-{
- switch (attr->attr) {
- case KVM_ARM_VCPU_PMU_V3_IRQ:
- case KVM_ARM_VCPU_PMU_V3_INIT:
- if (kvm_arm_support_pmu_v3() &&
- test_bit(KVM_ARM_VCPU_PMU_V3, vcpu->arch.features))
- return 0;
- }
-
- return -ENXIO;
-}
diff --git a/virt/kvm/arm/trace.h b/virt/kvm/arm/trace.h
deleted file mode 100644
index 37d8b98..0000000
--- a/virt/kvm/arm/trace.h
+++ /dev/null
@@ -1,63 +0,0 @@
-#if !defined(_TRACE_KVM_H) || defined(TRACE_HEADER_MULTI_READ)
-#define _TRACE_KVM_H
-
-#include <linux/tracepoint.h>
-
-#undef TRACE_SYSTEM
-#define TRACE_SYSTEM kvm
-
-/*
- * Tracepoints for vgic
- */
-TRACE_EVENT(vgic_update_irq_pending,
- TP_PROTO(unsigned long vcpu_id, __u32 irq, bool level),
- TP_ARGS(vcpu_id, irq, level),
-
- TP_STRUCT__entry(
- __field( unsigned long, vcpu_id )
- __field( __u32, irq )
- __field( bool, level )
- ),
-
- TP_fast_assign(
- __entry->vcpu_id = vcpu_id;
- __entry->irq = irq;
- __entry->level = level;
- ),
-
- TP_printk("VCPU: %ld, IRQ %d, level: %d",
- __entry->vcpu_id, __entry->irq, __entry->level)
-);
-
-/*
- * Tracepoints for arch_timer
- */
-TRACE_EVENT(kvm_timer_update_irq,
- TP_PROTO(unsigned long vcpu_id, __u32 irq, int level),
- TP_ARGS(vcpu_id, irq, level),
-
- TP_STRUCT__entry(
- __field( unsigned long, vcpu_id )
- __field( __u32, irq )
- __field( int, level )
- ),
-
- TP_fast_assign(
- __entry->vcpu_id = vcpu_id;
- __entry->irq = irq;
- __entry->level = level;
- ),
-
- TP_printk("VCPU: %ld, IRQ %d, level %d",
- __entry->vcpu_id, __entry->irq, __entry->level)
-);
-
-#endif /* _TRACE_KVM_H */
-
-#undef TRACE_INCLUDE_PATH
-#define TRACE_INCLUDE_PATH ../../../virt/kvm/arm
-#undef TRACE_INCLUDE_FILE
-#define TRACE_INCLUDE_FILE trace
-
-/* This part must be outside protection */
-#include <trace/define_trace.h>
diff --git a/virt/kvm/arm/vgic/vgic-init.c b/virt/kvm/arm/vgic/vgic-init.c
deleted file mode 100644
index 8cebfbc..0000000
--- a/virt/kvm/arm/vgic/vgic-init.c
+++ /dev/null
@@ -1,445 +0,0 @@
-/*
- * Copyright (C) 2015, 2016 ARM Ltd.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include <linux/uaccess.h>
-#include <linux/interrupt.h>
-#include <linux/cpu.h>
-#include <linux/kvm_host.h>
-#include <kvm/arm_vgic.h>
-#include <asm/kvm_mmu.h>
-#include "vgic.h"
-
-/*
- * Initialization rules: there are multiple stages to the vgic
- * initialization, both for the distributor and the CPU interfaces.
- *
- * Distributor:
- *
- * - kvm_vgic_early_init(): initialization of static data that doesn't
- * depend on any sizing information or emulation type. No allocation
- * is allowed there.
- *
- * - vgic_init(): allocation and initialization of the generic data
- * structures that depend on sizing information (number of CPUs,
- * number of interrupts). Also initializes the vcpu specific data
- * structures. Can be executed lazily for GICv2.
- *
- * CPU Interface:
- *
- * - kvm_vgic_cpu_early_init(): initialization of static data that
- * doesn't depend on any sizing information or emulation type. No
- * allocation is allowed there.
- */
-
-/* EARLY INIT */
-
-/*
- * Those 2 functions should not be needed anymore but they
- * still are called from arm.c
- */
-void kvm_vgic_early_init(struct kvm *kvm)
-{
-}
-
-void kvm_vgic_vcpu_early_init(struct kvm_vcpu *vcpu)
-{
-}
-
-/* CREATION */
-
-/**
- * kvm_vgic_create: triggered by the instantiation of the VGIC device by
- * user space, either through the legacy KVM_CREATE_IRQCHIP ioctl (v2 only)
- * or through the generic KVM_CREATE_DEVICE API ioctl.
- * irqchip_in_kernel() tells you if this function succeeded or not.
- * @kvm: kvm struct pointer
- * @type: KVM_DEV_TYPE_ARM_VGIC_V[23]
- */
-int kvm_vgic_create(struct kvm *kvm, u32 type)
-{
- int i, vcpu_lock_idx = -1, ret;
- struct kvm_vcpu *vcpu;
-
- if (irqchip_in_kernel(kvm))
- return -EEXIST;
-
- /*
- * This function is also called by the KVM_CREATE_IRQCHIP handler,
- * which had no chance yet to check the availability of the GICv2
- * emulation. So check this here again. KVM_CREATE_DEVICE does
- * the proper checks already.
- */
- if (type == KVM_DEV_TYPE_ARM_VGIC_V2 &&
- !kvm_vgic_global_state.can_emulate_gicv2)
- return -ENODEV;
-
- /*
- * Any time a vcpu is run, vcpu_load is called which tries to grab the
- * vcpu->mutex. By grabbing the vcpu->mutex of all VCPUs we ensure
- * that no other VCPUs are run while we create the vgic.
- */
- ret = -EBUSY;
- kvm_for_each_vcpu(i, vcpu, kvm) {
- if (!mutex_trylock(&vcpu->mutex))
- goto out_unlock;
- vcpu_lock_idx = i;
- }
-
- kvm_for_each_vcpu(i, vcpu, kvm) {
- if (vcpu->arch.has_run_once)
- goto out_unlock;
- }
- ret = 0;
-
- if (type == KVM_DEV_TYPE_ARM_VGIC_V2)
- kvm->arch.max_vcpus = VGIC_V2_MAX_CPUS;
- else
- kvm->arch.max_vcpus = VGIC_V3_MAX_CPUS;
-
- if (atomic_read(&kvm->online_vcpus) > kvm->arch.max_vcpus) {
- ret = -E2BIG;
- goto out_unlock;
- }
-
- kvm->arch.vgic.in_kernel = true;
- kvm->arch.vgic.vgic_model = type;
-
- /*
- * kvm_vgic_global_state.vctrl_base is set on vgic probe (kvm_arch_init)
- * it is stored in distributor struct for asm save/restore purpose
- */
- kvm->arch.vgic.vctrl_base = kvm_vgic_global_state.vctrl_base;
-
- kvm->arch.vgic.vgic_dist_base = VGIC_ADDR_UNDEF;
- kvm->arch.vgic.vgic_cpu_base = VGIC_ADDR_UNDEF;
- kvm->arch.vgic.vgic_redist_base = VGIC_ADDR_UNDEF;
-
-out_unlock:
- for (; vcpu_lock_idx >= 0; vcpu_lock_idx--) {
- vcpu = kvm_get_vcpu(kvm, vcpu_lock_idx);
- mutex_unlock(&vcpu->mutex);
- }
- return ret;
-}
-
-/* INIT/DESTROY */
-
-/**
- * kvm_vgic_dist_init: initialize the dist data structures
- * @kvm: kvm struct pointer
- * @nr_spis: number of spis, frozen by caller
- */
-static int kvm_vgic_dist_init(struct kvm *kvm, unsigned int nr_spis)
-{
- struct vgic_dist *dist = &kvm->arch.vgic;
- struct kvm_vcpu *vcpu0 = kvm_get_vcpu(kvm, 0);
- int i;
-
- INIT_LIST_HEAD(&dist->lpi_list_head);
- spin_lock_init(&dist->lpi_list_lock);
-
- dist->spis = kcalloc(nr_spis, sizeof(struct vgic_irq), GFP_KERNEL);
- if (!dist->spis)
- return -ENOMEM;
-
- /*
- * In the following code we do not take the irq struct lock since
- * no other action on irq structs can happen while the VGIC is
- * not initialized yet:
- * If someone wants to inject an interrupt or does a MMIO access, we
- * require prior initialization in case of a virtual GICv3 or trigger
- * initialization when using a virtual GICv2.
- */
- for (i = 0; i < nr_spis; i++) {
- struct vgic_irq *irq = &dist->spis[i];
-
- irq->intid = i + VGIC_NR_PRIVATE_IRQS;
- INIT_LIST_HEAD(&irq->ap_list);
- spin_lock_init(&irq->irq_lock);
- irq->vcpu = NULL;
- irq->target_vcpu = vcpu0;
- kref_init(&irq->refcount);
- if (dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V2)
- irq->targets = 0;
- else
- irq->mpidr = 0;
- }
- return 0;
-}
-
-/**
- * kvm_vgic_vcpu_init: initialize the vcpu data structures and
- * enable the VCPU interface
- * @vcpu: the VCPU which's VGIC should be initialized
- */
-static void kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
-{
- struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
- int i;
-
- INIT_LIST_HEAD(&vgic_cpu->ap_list_head);
- spin_lock_init(&vgic_cpu->ap_list_lock);
-
- /*
- * Enable and configure all SGIs to be edge-triggered and
- * configure all PPIs as level-triggered.
- */
- for (i = 0; i < VGIC_NR_PRIVATE_IRQS; i++) {
- struct vgic_irq *irq = &vgic_cpu->private_irqs[i];
-
- INIT_LIST_HEAD(&irq->ap_list);
- spin_lock_init(&irq->irq_lock);
- irq->intid = i;
- irq->vcpu = NULL;
- irq->target_vcpu = vcpu;
- irq->targets = 1U << vcpu->vcpu_id;
- kref_init(&irq->refcount);
- if (vgic_irq_is_sgi(i)) {
- /* SGIs */
- irq->enabled = 1;
- irq->config = VGIC_CONFIG_EDGE;
- } else {
- /* PPIs */
- irq->config = VGIC_CONFIG_LEVEL;
- }
- }
- if (kvm_vgic_global_state.type == VGIC_V2)
- vgic_v2_enable(vcpu);
- else
- vgic_v3_enable(vcpu);
-}
-
-/*
- * vgic_init: allocates and initializes dist and vcpu data structures
- * depending on two dimensioning parameters:
- * - the number of spis
- * - the number of vcpus
- * The function is generally called when nr_spis has been explicitly set
- * by the guest through the KVM DEVICE API. If not nr_spis is set to 256.
- * vgic_initialized() returns true when this function has succeeded.
- * Must be called with kvm->lock held!
- */
-int vgic_init(struct kvm *kvm)
-{
- struct vgic_dist *dist = &kvm->arch.vgic;
- struct kvm_vcpu *vcpu;
- int ret = 0, i;
-
- if (vgic_initialized(kvm))
- return 0;
-
- /* freeze the number of spis */
- if (!dist->nr_spis)
- dist->nr_spis = VGIC_NR_IRQS_LEGACY - VGIC_NR_PRIVATE_IRQS;
-
- ret = kvm_vgic_dist_init(kvm, dist->nr_spis);
- if (ret)
- goto out;
-
- if (vgic_has_its(kvm))
- dist->msis_require_devid = true;
-
- kvm_for_each_vcpu(i, vcpu, kvm)
- kvm_vgic_vcpu_init(vcpu);
-
- ret = kvm_vgic_setup_default_irq_routing(kvm);
- if (ret)
- goto out;
-
- dist->initialized = true;
-out:
- return ret;
-}
-
-static void kvm_vgic_dist_destroy(struct kvm *kvm)
-{
- struct vgic_dist *dist = &kvm->arch.vgic;
-
- mutex_lock(&kvm->lock);
-
- dist->ready = false;
- dist->initialized = false;
-
- kfree(dist->spis);
- dist->nr_spis = 0;
-
- mutex_unlock(&kvm->lock);
-}
-
-void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu)
-{
- struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
-
- INIT_LIST_HEAD(&vgic_cpu->ap_list_head);
-}
-
-void kvm_vgic_destroy(struct kvm *kvm)
-{
- struct kvm_vcpu *vcpu;
- int i;
-
- kvm_vgic_dist_destroy(kvm);
-
- kvm_for_each_vcpu(i, vcpu, kvm)
- kvm_vgic_vcpu_destroy(vcpu);
-}
-
-/**
- * vgic_lazy_init: Lazy init is only allowed if the GIC exposed to the guest
- * is a GICv2. A GICv3 must be explicitly initialized by the guest using the
- * KVM_DEV_ARM_VGIC_GRP_CTRL KVM_DEVICE group.
- * @kvm: kvm struct pointer
- */
-int vgic_lazy_init(struct kvm *kvm)
-{
- int ret = 0;
-
- if (unlikely(!vgic_initialized(kvm))) {
- /*
- * We only provide the automatic initialization of the VGIC
- * for the legacy case of a GICv2. Any other type must
- * be explicitly initialized once setup with the respective
- * KVM device call.
- */
- if (kvm->arch.vgic.vgic_model != KVM_DEV_TYPE_ARM_VGIC_V2)
- return -EBUSY;
-
- mutex_lock(&kvm->lock);
- ret = vgic_init(kvm);
- mutex_unlock(&kvm->lock);
- }
-
- return ret;
-}
-
-/* RESOURCE MAPPING */
-
-/**
- * Map the MMIO regions depending on the VGIC model exposed to the guest
- * called on the first VCPU run.
- * Also map the virtual CPU interface into the VM.
- * v2/v3 derivatives call vgic_init if not already done.
- * vgic_ready() returns true if this function has succeeded.
- * @kvm: kvm struct pointer
- */
-int kvm_vgic_map_resources(struct kvm *kvm)
-{
- struct vgic_dist *dist = &kvm->arch.vgic;
- int ret = 0;
-
- mutex_lock(&kvm->lock);
- if (!irqchip_in_kernel(kvm))
- goto out;
-
- if (dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V2)
- ret = vgic_v2_map_resources(kvm);
- else
- ret = vgic_v3_map_resources(kvm);
-out:
- mutex_unlock(&kvm->lock);
- return ret;
-}
-
-/* GENERIC PROBE */
-
-static int vgic_init_cpu_starting(unsigned int cpu)
-{
- enable_percpu_irq(kvm_vgic_global_state.maint_irq, 0);
- return 0;
-}
-
-
-static int vgic_init_cpu_dying(unsigned int cpu)
-{
- disable_percpu_irq(kvm_vgic_global_state.maint_irq);
- return 0;
-}
-
-static irqreturn_t vgic_maintenance_handler(int irq, void *data)
-{
- /*
- * We cannot rely on the vgic maintenance interrupt to be
- * delivered synchronously. This means we can only use it to
- * exit the VM, and we perform the handling of EOIed
- * interrupts on the exit path (see vgic_process_maintenance).
- */
- return IRQ_HANDLED;
-}
-
-/**
- * kvm_vgic_hyp_init: populates the kvm_vgic_global_state variable
- * according to the host GIC model. Accordingly calls either
- * vgic_v2/v3_probe which registers the KVM_DEVICE that can be
- * instantiated by a guest later on .
- */
-int kvm_vgic_hyp_init(void)
-{
- const struct gic_kvm_info *gic_kvm_info;
- int ret;
-
- gic_kvm_info = gic_get_kvm_info();
- if (!gic_kvm_info)
- return -ENODEV;
-
- if (!gic_kvm_info->maint_irq) {
- kvm_err("No vgic maintenance irq\n");
- return -ENXIO;
- }
-
- switch (gic_kvm_info->type) {
- case GIC_V2:
- ret = vgic_v2_probe(gic_kvm_info);
- break;
- case GIC_V3:
- ret = vgic_v3_probe(gic_kvm_info);
- if (!ret) {
- static_branch_enable(&kvm_vgic_global_state.gicv3_cpuif);
- kvm_info("GIC system register CPU interface enabled\n");
- }
- break;
- default:
- ret = -ENODEV;
- };
-
- if (ret)
- return ret;
-
- kvm_vgic_global_state.maint_irq = gic_kvm_info->maint_irq;
- ret = request_percpu_irq(kvm_vgic_global_state.maint_irq,
- vgic_maintenance_handler,
- "vgic", kvm_get_running_vcpus());
- if (ret) {
- kvm_err("Cannot register interrupt %d\n",
- kvm_vgic_global_state.maint_irq);
- return ret;
- }
-
- ret = cpuhp_setup_state(CPUHP_AP_KVM_ARM_VGIC_INIT_STARTING,
- "AP_KVM_ARM_VGIC_INIT_STARTING",
- vgic_init_cpu_starting, vgic_init_cpu_dying);
- if (ret) {
- kvm_err("Cannot register vgic CPU notifier\n");
- goto out_free_irq;
- }
-
- kvm_info("vgic interrupt IRQ%d\n", kvm_vgic_global_state.maint_irq);
- return 0;
-
-out_free_irq:
- free_percpu_irq(kvm_vgic_global_state.maint_irq,
- kvm_get_running_vcpus());
- return ret;
-}
diff --git a/virt/kvm/arm/vgic/vgic-irqfd.c b/virt/kvm/arm/vgic/vgic-irqfd.c
deleted file mode 100644
index d918dcf..0000000
--- a/virt/kvm/arm/vgic/vgic-irqfd.c
+++ /dev/null
@@ -1,126 +0,0 @@
-/*
- * Copyright (C) 2015, 2016 ARM Ltd.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include <linux/kvm.h>
-#include <linux/kvm_host.h>
-#include <trace/events/kvm.h>
-#include <kvm/arm_vgic.h>
-#include "vgic.h"
-
-/**
- * vgic_irqfd_set_irq: inject the IRQ corresponding to the
- * irqchip routing entry
- *
- * This is the entry point for irqfd IRQ injection
- */
-static int vgic_irqfd_set_irq(struct kvm_kernel_irq_routing_entry *e,
- struct kvm *kvm, int irq_source_id,
- int level, bool line_status)
-{
- unsigned int spi_id = e->irqchip.pin + VGIC_NR_PRIVATE_IRQS;
-
- if (!vgic_valid_spi(kvm, spi_id))
- return -EINVAL;
- return kvm_vgic_inject_irq(kvm, 0, spi_id, level);
-}
-
-/**
- * kvm_set_routing_entry: populate a kvm routing entry
- * from a user routing entry
- *
- * @kvm: the VM this entry is applied to
- * @e: kvm kernel routing entry handle
- * @ue: user api routing entry handle
- * return 0 on success, -EINVAL on errors.
- */
-int kvm_set_routing_entry(struct kvm *kvm,
- struct kvm_kernel_irq_routing_entry *e,
- const struct kvm_irq_routing_entry *ue)
-{
- int r = -EINVAL;
-
- switch (ue->type) {
- case KVM_IRQ_ROUTING_IRQCHIP:
- e->set = vgic_irqfd_set_irq;
- e->irqchip.irqchip = ue->u.irqchip.irqchip;
- e->irqchip.pin = ue->u.irqchip.pin;
- if ((e->irqchip.pin >= KVM_IRQCHIP_NUM_PINS) ||
- (e->irqchip.irqchip >= KVM_NR_IRQCHIPS))
- goto out;
- break;
- case KVM_IRQ_ROUTING_MSI:
- e->set = kvm_set_msi;
- e->msi.address_lo = ue->u.msi.address_lo;
- e->msi.address_hi = ue->u.msi.address_hi;
- e->msi.data = ue->u.msi.data;
- e->msi.flags = ue->flags;
- e->msi.devid = ue->u.msi.devid;
- break;
- default:
- goto out;
- }
- r = 0;
-out:
- return r;
-}
-
-/**
- * kvm_set_msi: inject the MSI corresponding to the
- * MSI routing entry
- *
- * This is the entry point for irqfd MSI injection
- * and userspace MSI injection.
- */
-int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
- struct kvm *kvm, int irq_source_id,
- int level, bool line_status)
-{
- struct kvm_msi msi;
-
- msi.address_lo = e->msi.address_lo;
- msi.address_hi = e->msi.address_hi;
- msi.data = e->msi.data;
- msi.flags = e->msi.flags;
- msi.devid = e->msi.devid;
-
- if (!vgic_has_its(kvm))
- return -ENODEV;
-
- return vgic_its_inject_msi(kvm, &msi);
-}
-
-int kvm_vgic_setup_default_irq_routing(struct kvm *kvm)
-{
- struct kvm_irq_routing_entry *entries;
- struct vgic_dist *dist = &kvm->arch.vgic;
- u32 nr = dist->nr_spis;
- int i, ret;
-
- entries = kcalloc(nr, sizeof(struct kvm_kernel_irq_routing_entry),
- GFP_KERNEL);
- if (!entries)
- return -ENOMEM;
-
- for (i = 0; i < nr; i++) {
- entries[i].gsi = i;
- entries[i].type = KVM_IRQ_ROUTING_IRQCHIP;
- entries[i].u.irqchip.irqchip = 0;
- entries[i].u.irqchip.pin = i;
- }
- ret = kvm_set_irq_routing(kvm, entries, nr, 0);
- kfree(entries);
- return ret;
-}
diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c
deleted file mode 100644
index 4660a7d..0000000
--- a/virt/kvm/arm/vgic/vgic-its.c
+++ /dev/null
@@ -1,1570 +0,0 @@
-/*
- * GICv3 ITS emulation
- *
- * Copyright (C) 2015,2016 ARM Ltd.
- * Author: Andre Przywara <andre.przywara@arm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include <linux/cpu.h>
-#include <linux/kvm.h>
-#include <linux/kvm_host.h>
-#include <linux/interrupt.h>
-#include <linux/list.h>
-#include <linux/uaccess.h>
-
-#include <linux/irqchip/arm-gic-v3.h>
-
-#include <asm/kvm_emulate.h>
-#include <asm/kvm_arm.h>
-#include <asm/kvm_mmu.h>
-
-#include "vgic.h"
-#include "vgic-mmio.h"
-
-/*
- * Creates a new (reference to a) struct vgic_irq for a given LPI.
- * If this LPI is already mapped on another ITS, we increase its refcount
- * and return a pointer to the existing structure.
- * If this is a "new" LPI, we allocate and initialize a new struct vgic_irq.
- * This function returns a pointer to the _unlocked_ structure.
- */
-static struct vgic_irq *vgic_add_lpi(struct kvm *kvm, u32 intid)
-{
- struct vgic_dist *dist = &kvm->arch.vgic;
- struct vgic_irq *irq = vgic_get_irq(kvm, NULL, intid), *oldirq;
-
- /* In this case there is no put, since we keep the reference. */
- if (irq)
- return irq;
-
- irq = kzalloc(sizeof(struct vgic_irq), GFP_KERNEL);
- if (!irq)
- return ERR_PTR(-ENOMEM);
-
- INIT_LIST_HEAD(&irq->lpi_list);
- INIT_LIST_HEAD(&irq->ap_list);
- spin_lock_init(&irq->irq_lock);
-
- irq->config = VGIC_CONFIG_EDGE;
- kref_init(&irq->refcount);
- irq->intid = intid;
-
- spin_lock(&dist->lpi_list_lock);
-
- /*
- * There could be a race with another vgic_add_lpi(), so we need to
- * check that we don't add a second list entry with the same LPI.
- */
- list_for_each_entry(oldirq, &dist->lpi_list_head, lpi_list) {
- if (oldirq->intid != intid)
- continue;
-
- /* Someone was faster with adding this LPI, lets use that. */
- kfree(irq);
- irq = oldirq;
-
- /*
- * This increases the refcount, the caller is expected to
- * call vgic_put_irq() on the returned pointer once it's
- * finished with the IRQ.
- */
- vgic_get_irq_kref(irq);
-
- goto out_unlock;
- }
-
- list_add_tail(&irq->lpi_list, &dist->lpi_list_head);
- dist->lpi_list_count++;
-
-out_unlock:
- spin_unlock(&dist->lpi_list_lock);
-
- return irq;
-}
-
-struct its_device {
- struct list_head dev_list;
-
- /* the head for the list of ITTEs */
- struct list_head itt_head;
- u32 device_id;
-};
-
-#define COLLECTION_NOT_MAPPED ((u32)~0)
-
-struct its_collection {
- struct list_head coll_list;
-
- u32 collection_id;
- u32 target_addr;
-};
-
-#define its_is_collection_mapped(coll) ((coll) && \
- ((coll)->target_addr != COLLECTION_NOT_MAPPED))
-
-struct its_itte {
- struct list_head itte_list;
-
- struct vgic_irq *irq;
- struct its_collection *collection;
- u32 lpi;
- u32 event_id;
-};
-
-/*
- * Find and returns a device in the device table for an ITS.
- * Must be called with the its_lock mutex held.
- */
-static struct its_device *find_its_device(struct vgic_its *its, u32 device_id)
-{
- struct its_device *device;
-
- list_for_each_entry(device, &its->device_list, dev_list)
- if (device_id == device->device_id)
- return device;
-
- return NULL;
-}
-
-/*
- * Find and returns an interrupt translation table entry (ITTE) for a given
- * Device ID/Event ID pair on an ITS.
- * Must be called with the its_lock mutex held.
- */
-static struct its_itte *find_itte(struct vgic_its *its, u32 device_id,
- u32 event_id)
-{
- struct its_device *device;
- struct its_itte *itte;
-
- device = find_its_device(its, device_id);
- if (device == NULL)
- return NULL;
-
- list_for_each_entry(itte, &device->itt_head, itte_list)
- if (itte->event_id == event_id)
- return itte;
-
- return NULL;
-}
-
-/* To be used as an iterator this macro misses the enclosing parentheses */
-#define for_each_lpi_its(dev, itte, its) \
- list_for_each_entry(dev, &(its)->device_list, dev_list) \
- list_for_each_entry(itte, &(dev)->itt_head, itte_list)
-
-/*
- * We only implement 48 bits of PA at the moment, although the ITS
- * supports more. Let's be restrictive here.
- */
-#define BASER_ADDRESS(x) ((x) & GENMASK_ULL(47, 16))
-#define CBASER_ADDRESS(x) ((x) & GENMASK_ULL(47, 12))
-#define PENDBASER_ADDRESS(x) ((x) & GENMASK_ULL(47, 16))
-#define PROPBASER_ADDRESS(x) ((x) & GENMASK_ULL(47, 12))
-
-#define GIC_LPI_OFFSET 8192
-
-/*
- * Finds and returns a collection in the ITS collection table.
- * Must be called with the its_lock mutex held.
- */
-static struct its_collection *find_collection(struct vgic_its *its, int coll_id)
-{
- struct its_collection *collection;
-
- list_for_each_entry(collection, &its->collection_list, coll_list) {
- if (coll_id == collection->collection_id)
- return collection;
- }
-
- return NULL;
-}
-
-#define LPI_PROP_ENABLE_BIT(p) ((p) & LPI_PROP_ENABLED)
-#define LPI_PROP_PRIORITY(p) ((p) & 0xfc)
-
-/*
- * Reads the configuration data for a given LPI from guest memory and
- * updates the fields in struct vgic_irq.
- * If filter_vcpu is not NULL, applies only if the IRQ is targeting this
- * VCPU. Unconditionally applies if filter_vcpu is NULL.
- */
-static int update_lpi_config(struct kvm *kvm, struct vgic_irq *irq,
- struct kvm_vcpu *filter_vcpu)
-{
- u64 propbase = PROPBASER_ADDRESS(kvm->arch.vgic.propbaser);
- u8 prop;
- int ret;
-
- ret = kvm_read_guest(kvm, propbase + irq->intid - GIC_LPI_OFFSET,
- &prop, 1);
-
- if (ret)
- return ret;
-
- spin_lock(&irq->irq_lock);
-
- if (!filter_vcpu || filter_vcpu == irq->target_vcpu) {
- irq->priority = LPI_PROP_PRIORITY(prop);
- irq->enabled = LPI_PROP_ENABLE_BIT(prop);
-
- vgic_queue_irq_unlock(kvm, irq);
- } else {
- spin_unlock(&irq->irq_lock);
- }
-
- return 0;
-}
-
-/*
- * Create a snapshot of the current LPI list, so that we can enumerate all
- * LPIs without holding any lock.
- * Returns the array length and puts the kmalloc'ed array into intid_ptr.
- */
-static int vgic_copy_lpi_list(struct kvm *kvm, u32 **intid_ptr)
-{
- struct vgic_dist *dist = &kvm->arch.vgic;
- struct vgic_irq *irq;
- u32 *intids;
- int irq_count = dist->lpi_list_count, i = 0;
-
- /*
- * We use the current value of the list length, which may change
- * after the kmalloc. We don't care, because the guest shouldn't
- * change anything while the command handling is still running,
- * and in the worst case we would miss a new IRQ, which one wouldn't
- * expect to be covered by this command anyway.
- */
- intids = kmalloc_array(irq_count, sizeof(intids[0]), GFP_KERNEL);
- if (!intids)
- return -ENOMEM;
-
- spin_lock(&dist->lpi_list_lock);
- list_for_each_entry(irq, &dist->lpi_list_head, lpi_list) {
- /* We don't need to "get" the IRQ, as we hold the list lock. */
- intids[i] = irq->intid;
- if (++i == irq_count)
- break;
- }
- spin_unlock(&dist->lpi_list_lock);
-
- *intid_ptr = intids;
- return irq_count;
-}
-
-/*
- * Promotes the ITS view of affinity of an ITTE (which redistributor this LPI
- * is targeting) to the VGIC's view, which deals with target VCPUs.
- * Needs to be called whenever either the collection for a LPIs has
- * changed or the collection itself got retargeted.
- */
-static void update_affinity_itte(struct kvm *kvm, struct its_itte *itte)
-{
- struct kvm_vcpu *vcpu;
-
- if (!its_is_collection_mapped(itte->collection))
- return;
-
- vcpu = kvm_get_vcpu(kvm, itte->collection->target_addr);
-
- spin_lock(&itte->irq->irq_lock);
- itte->irq->target_vcpu = vcpu;
- spin_unlock(&itte->irq->irq_lock);
-}
-
-/*
- * Updates the target VCPU for every LPI targeting this collection.
- * Must be called with the its_lock mutex held.
- */
-static void update_affinity_collection(struct kvm *kvm, struct vgic_its *its,
- struct its_collection *coll)
-{
- struct its_device *device;
- struct its_itte *itte;
-
- for_each_lpi_its(device, itte, its) {
- if (!itte->collection || coll != itte->collection)
- continue;
-
- update_affinity_itte(kvm, itte);
- }
-}
-
-static u32 max_lpis_propbaser(u64 propbaser)
-{
- int nr_idbits = (propbaser & 0x1f) + 1;
-
- return 1U << min(nr_idbits, INTERRUPT_ID_BITS_ITS);
-}
-
-/*
- * Scan the whole LPI pending table and sync the pending bit in there
- * with our own data structures. This relies on the LPI being
- * mapped before.
- */
-static int its_sync_lpi_pending_table(struct kvm_vcpu *vcpu)
-{
- gpa_t pendbase = PENDBASER_ADDRESS(vcpu->arch.vgic_cpu.pendbaser);
- struct vgic_irq *irq;
- int last_byte_offset = -1;
- int ret = 0;
- u32 *intids;
- int nr_irqs, i;
-
- nr_irqs = vgic_copy_lpi_list(vcpu->kvm, &intids);
- if (nr_irqs < 0)
- return nr_irqs;
-
- for (i = 0; i < nr_irqs; i++) {
- int byte_offset, bit_nr;
- u8 pendmask;
-
- byte_offset = intids[i] / BITS_PER_BYTE;
- bit_nr = intids[i] % BITS_PER_BYTE;
-
- /*
- * For contiguously allocated LPIs chances are we just read
- * this very same byte in the last iteration. Reuse that.
- */
- if (byte_offset != last_byte_offset) {
- ret = kvm_read_guest(vcpu->kvm, pendbase + byte_offset,
- &pendmask, 1);
- if (ret) {
- kfree(intids);
- return ret;
- }
- last_byte_offset = byte_offset;
- }
-
- irq = vgic_get_irq(vcpu->kvm, NULL, intids[i]);
- spin_lock(&irq->irq_lock);
- irq->pending = pendmask & (1U << bit_nr);
- vgic_queue_irq_unlock(vcpu->kvm, irq);
- vgic_put_irq(vcpu->kvm, irq);
- }
-
- kfree(intids);
-
- return ret;
-}
-
-static unsigned long vgic_mmio_read_its_ctlr(struct kvm *vcpu,
- struct vgic_its *its,
- gpa_t addr, unsigned int len)
-{
- u32 reg = 0;
-
- mutex_lock(&its->cmd_lock);
- if (its->creadr == its->cwriter)
- reg |= GITS_CTLR_QUIESCENT;
- if (its->enabled)
- reg |= GITS_CTLR_ENABLE;
- mutex_unlock(&its->cmd_lock);
-
- return reg;
-}
-
-static void vgic_mmio_write_its_ctlr(struct kvm *kvm, struct vgic_its *its,
- gpa_t addr, unsigned int len,
- unsigned long val)
-{
- its->enabled = !!(val & GITS_CTLR_ENABLE);
-}
-
-static unsigned long vgic_mmio_read_its_typer(struct kvm *kvm,
- struct vgic_its *its,
- gpa_t addr, unsigned int len)
-{
- u64 reg = GITS_TYPER_PLPIS;
-
- /*
- * We use linear CPU numbers for redistributor addressing,
- * so GITS_TYPER.PTA is 0.
- * Also we force all PROPBASER registers to be the same, so
- * CommonLPIAff is 0 as well.
- * To avoid memory waste in the guest, we keep the number of IDBits and
- * DevBits low - as least for the time being.
- */
- reg |= 0x0f << GITS_TYPER_DEVBITS_SHIFT;
- reg |= 0x0f << GITS_TYPER_IDBITS_SHIFT;
-
- return extract_bytes(reg, addr & 7, len);
-}
-
-static unsigned long vgic_mmio_read_its_iidr(struct kvm *kvm,
- struct vgic_its *its,
- gpa_t addr, unsigned int len)
-{
- return (PRODUCT_ID_KVM << 24) | (IMPLEMENTER_ARM << 0);
-}
-
-static unsigned long vgic_mmio_read_its_idregs(struct kvm *kvm,
- struct vgic_its *its,
- gpa_t addr, unsigned int len)
-{
- switch (addr & 0xffff) {
- case GITS_PIDR0:
- return 0x92; /* part number, bits[7:0] */
- case GITS_PIDR1:
- return 0xb4; /* part number, bits[11:8] */
- case GITS_PIDR2:
- return GIC_PIDR2_ARCH_GICv3 | 0x0b;
- case GITS_PIDR4:
- return 0x40; /* This is a 64K software visible page */
- /* The following are the ID registers for (any) GIC. */
- case GITS_CIDR0:
- return 0x0d;
- case GITS_CIDR1:
- return 0xf0;
- case GITS_CIDR2:
- return 0x05;
- case GITS_CIDR3:
- return 0xb1;
- }
-
- return 0;
-}
-
-/*
- * Find the target VCPU and the LPI number for a given devid/eventid pair
- * and make this IRQ pending, possibly injecting it.
- * Must be called with the its_lock mutex held.
- * Returns 0 on success, a positive error value for any ITS mapping
- * related errors and negative error values for generic errors.
- */
-static int vgic_its_trigger_msi(struct kvm *kvm, struct vgic_its *its,
- u32 devid, u32 eventid)
-{
- struct kvm_vcpu *vcpu;
- struct its_itte *itte;
-
- if (!its->enabled)
- return -EBUSY;
-
- itte = find_itte(its, devid, eventid);
- if (!itte || !its_is_collection_mapped(itte->collection))
- return E_ITS_INT_UNMAPPED_INTERRUPT;
-
- vcpu = kvm_get_vcpu(kvm, itte->collection->target_addr);
- if (!vcpu)
- return E_ITS_INT_UNMAPPED_INTERRUPT;
-
- if (!vcpu->arch.vgic_cpu.lpis_enabled)
- return -EBUSY;
-
- spin_lock(&itte->irq->irq_lock);
- itte->irq->pending = true;
- vgic_queue_irq_unlock(kvm, itte->irq);
-
- return 0;
-}
-
-static struct vgic_io_device *vgic_get_its_iodev(struct kvm_io_device *dev)
-{
- struct vgic_io_device *iodev;
-
- if (dev->ops != &kvm_io_gic_ops)
- return NULL;
-
- iodev = container_of(dev, struct vgic_io_device, dev);
-
- if (iodev->iodev_type != IODEV_ITS)
- return NULL;
-
- return iodev;
-}
-
-/*
- * Queries the KVM IO bus framework to get the ITS pointer from the given
- * doorbell address.
- * We then call vgic_its_trigger_msi() with the decoded data.
- * According to the KVM_SIGNAL_MSI API description returns 1 on success.
- */
-int vgic_its_inject_msi(struct kvm *kvm, struct kvm_msi *msi)
-{
- u64 address;
- struct kvm_io_device *kvm_io_dev;
- struct vgic_io_device *iodev;
- int ret;
-
- if (!vgic_has_its(kvm))
- return -ENODEV;
-
- if (!(msi->flags & KVM_MSI_VALID_DEVID))
- return -EINVAL;
-
- address = (u64)msi->address_hi << 32 | msi->address_lo;
-
- kvm_io_dev = kvm_io_bus_get_dev(kvm, KVM_MMIO_BUS, address);
- if (!kvm_io_dev)
- return -EINVAL;
-
- iodev = vgic_get_its_iodev(kvm_io_dev);
- if (!iodev)
- return -EINVAL;
-
- mutex_lock(&iodev->its->its_lock);
- ret = vgic_its_trigger_msi(kvm, iodev->its, msi->devid, msi->data);
- mutex_unlock(&iodev->its->its_lock);
-
- if (ret < 0)
- return ret;
-
- /*
- * KVM_SIGNAL_MSI demands a return value > 0 for success and 0
- * if the guest has blocked the MSI. So we map any LPI mapping
- * related error to that.
- */
- if (ret)
- return 0;
- else
- return 1;
-}
-
-/* Requires the its_lock to be held. */
-static void its_free_itte(struct kvm *kvm, struct its_itte *itte)
-{
- list_del(&itte->itte_list);
-
- /* This put matches the get in vgic_add_lpi. */
- if (itte->irq)
- vgic_put_irq(kvm, itte->irq);
-
- kfree(itte);
-}
-
-static u64 its_cmd_mask_field(u64 *its_cmd, int word, int shift, int size)
-{
- return (le64_to_cpu(its_cmd[word]) >> shift) & (BIT_ULL(size) - 1);
-}
-
-#define its_cmd_get_command(cmd) its_cmd_mask_field(cmd, 0, 0, 8)
-#define its_cmd_get_deviceid(cmd) its_cmd_mask_field(cmd, 0, 32, 32)
-#define its_cmd_get_id(cmd) its_cmd_mask_field(cmd, 1, 0, 32)
-#define its_cmd_get_physical_id(cmd) its_cmd_mask_field(cmd, 1, 32, 32)
-#define its_cmd_get_collection(cmd) its_cmd_mask_field(cmd, 2, 0, 16)
-#define its_cmd_get_target_addr(cmd) its_cmd_mask_field(cmd, 2, 16, 32)
-#define its_cmd_get_validbit(cmd) its_cmd_mask_field(cmd, 2, 63, 1)
-
-/*
- * The DISCARD command frees an Interrupt Translation Table Entry (ITTE).
- * Must be called with the its_lock mutex held.
- */
-static int vgic_its_cmd_handle_discard(struct kvm *kvm, struct vgic_its *its,
- u64 *its_cmd)
-{
- u32 device_id = its_cmd_get_deviceid(its_cmd);
- u32 event_id = its_cmd_get_id(its_cmd);
- struct its_itte *itte;
-
-
- itte = find_itte(its, device_id, event_id);
- if (itte && itte->collection) {
- /*
- * Though the spec talks about removing the pending state, we
- * don't bother here since we clear the ITTE anyway and the
- * pending state is a property of the ITTE struct.
- */
- its_free_itte(kvm, itte);
- return 0;
- }
-
- return E_ITS_DISCARD_UNMAPPED_INTERRUPT;
-}
-
-/*
- * The MOVI command moves an ITTE to a different collection.
- * Must be called with the its_lock mutex held.
- */
-static int vgic_its_cmd_handle_movi(struct kvm *kvm, struct vgic_its *its,
- u64 *its_cmd)
-{
- u32 device_id = its_cmd_get_deviceid(its_cmd);
- u32 event_id = its_cmd_get_id(its_cmd);
- u32 coll_id = its_cmd_get_collection(its_cmd);
- struct kvm_vcpu *vcpu;
- struct its_itte *itte;
- struct its_collection *collection;
-
- itte = find_itte(its, device_id, event_id);
- if (!itte)
- return E_ITS_MOVI_UNMAPPED_INTERRUPT;
-
- if (!its_is_collection_mapped(itte->collection))
- return E_ITS_MOVI_UNMAPPED_COLLECTION;
-
- collection = find_collection(its, coll_id);
- if (!its_is_collection_mapped(collection))
- return E_ITS_MOVI_UNMAPPED_COLLECTION;
-
- itte->collection = collection;
- vcpu = kvm_get_vcpu(kvm, collection->target_addr);
-
- spin_lock(&itte->irq->irq_lock);
- itte->irq->target_vcpu = vcpu;
- spin_unlock(&itte->irq->irq_lock);
-
- return 0;
-}
-
-/*
- * Check whether an ID can be stored into the corresponding guest table.
- * For a direct table this is pretty easy, but gets a bit nasty for
- * indirect tables. We check whether the resulting guest physical address
- * is actually valid (covered by a memslot and guest accessbible).
- * For this we have to read the respective first level entry.
- */
-static bool vgic_its_check_id(struct vgic_its *its, u64 baser, int id)
-{
- int l1_tbl_size = GITS_BASER_NR_PAGES(baser) * SZ_64K;
- int index;
- u64 indirect_ptr;
- gfn_t gfn;
-
- if (!(baser & GITS_BASER_INDIRECT)) {
- phys_addr_t addr;
-
- if (id >= (l1_tbl_size / GITS_BASER_ENTRY_SIZE(baser)))
- return false;
-
- addr = BASER_ADDRESS(baser) + id * GITS_BASER_ENTRY_SIZE(baser);
- gfn = addr >> PAGE_SHIFT;
-
- return kvm_is_visible_gfn(its->dev->kvm, gfn);
- }
-
- /* calculate and check the index into the 1st level */
- index = id / (SZ_64K / GITS_BASER_ENTRY_SIZE(baser));
- if (index >= (l1_tbl_size / sizeof(u64)))
- return false;
-
- /* Each 1st level entry is represented by a 64-bit value. */
- if (kvm_read_guest(its->dev->kvm,
- BASER_ADDRESS(baser) + index * sizeof(indirect_ptr),
- &indirect_ptr, sizeof(indirect_ptr)))
- return false;
-
- indirect_ptr = le64_to_cpu(indirect_ptr);
-
- /* check the valid bit of the first level entry */
- if (!(indirect_ptr & BIT_ULL(63)))
- return false;
-
- /*
- * Mask the guest physical address and calculate the frame number.
- * Any address beyond our supported 48 bits of PA will be caught
- * by the actual check in the final step.
- */
- indirect_ptr &= GENMASK_ULL(51, 16);
-
- /* Find the address of the actual entry */
- index = id % (SZ_64K / GITS_BASER_ENTRY_SIZE(baser));
- indirect_ptr += index * GITS_BASER_ENTRY_SIZE(baser);
- gfn = indirect_ptr >> PAGE_SHIFT;
-
- return kvm_is_visible_gfn(its->dev->kvm, gfn);
-}
-
-static int vgic_its_alloc_collection(struct vgic_its *its,
- struct its_collection **colp,
- u32 coll_id)
-{
- struct its_collection *collection;
-
- if (!vgic_its_check_id(its, its->baser_coll_table, coll_id))
- return E_ITS_MAPC_COLLECTION_OOR;
-
- collection = kzalloc(sizeof(*collection), GFP_KERNEL);
-
- collection->collection_id = coll_id;
- collection->target_addr = COLLECTION_NOT_MAPPED;
-
- list_add_tail(&collection->coll_list, &its->collection_list);
- *colp = collection;
-
- return 0;
-}
-
-static void vgic_its_free_collection(struct vgic_its *its, u32 coll_id)
-{
- struct its_collection *collection;
- struct its_device *device;
- struct its_itte *itte;
-
- /*
- * Clearing the mapping for that collection ID removes the
- * entry from the list. If there wasn't any before, we can
- * go home early.
- */
- collection = find_collection(its, coll_id);
- if (!collection)
- return;
-
- for_each_lpi_its(device, itte, its)
- if (itte->collection &&
- itte->collection->collection_id == coll_id)
- itte->collection = NULL;
-
- list_del(&collection->coll_list);
- kfree(collection);
-}
-
-/*
- * The MAPTI and MAPI commands map LPIs to ITTEs.
- * Must be called with its_lock mutex held.
- */
-static int vgic_its_cmd_handle_mapi(struct kvm *kvm, struct vgic_its *its,
- u64 *its_cmd)
-{
- u32 device_id = its_cmd_get_deviceid(its_cmd);
- u32 event_id = its_cmd_get_id(its_cmd);
- u32 coll_id = its_cmd_get_collection(its_cmd);
- struct its_itte *itte;
- struct its_device *device;
- struct its_collection *collection, *new_coll = NULL;
- int lpi_nr;
- struct vgic_irq *irq;
-
- device = find_its_device(its, device_id);
- if (!device)
- return E_ITS_MAPTI_UNMAPPED_DEVICE;
-
- if (its_cmd_get_command(its_cmd) == GITS_CMD_MAPTI)
- lpi_nr = its_cmd_get_physical_id(its_cmd);
- else
- lpi_nr = event_id;
- if (lpi_nr < GIC_LPI_OFFSET ||
- lpi_nr >= max_lpis_propbaser(kvm->arch.vgic.propbaser))
- return E_ITS_MAPTI_PHYSICALID_OOR;
-
- /* If there is an existing mapping, behavior is UNPREDICTABLE. */
- if (find_itte(its, device_id, event_id))
- return 0;
-
- collection = find_collection(its, coll_id);
- if (!collection) {
- int ret = vgic_its_alloc_collection(its, &collection, coll_id);
- if (ret)
- return ret;
- new_coll = collection;
- }
-
- itte = kzalloc(sizeof(struct its_itte), GFP_KERNEL);
- if (!itte) {
- if (new_coll)
- vgic_its_free_collection(its, coll_id);
- return -ENOMEM;
- }
-
- itte->event_id = event_id;
- list_add_tail(&itte->itte_list, &device->itt_head);
-
- itte->collection = collection;
- itte->lpi = lpi_nr;
-
- irq = vgic_add_lpi(kvm, lpi_nr);
- if (IS_ERR(irq)) {
- if (new_coll)
- vgic_its_free_collection(its, coll_id);
- its_free_itte(kvm, itte);
- return PTR_ERR(irq);
- }
- itte->irq = irq;
-
- update_affinity_itte(kvm, itte);
-
- /*
- * We "cache" the configuration table entries in out struct vgic_irq's.
- * However we only have those structs for mapped IRQs, so we read in
- * the respective config data from memory here upon mapping the LPI.
- */
- update_lpi_config(kvm, itte->irq, NULL);
-
- return 0;
-}
-
-/* Requires the its_lock to be held. */
-static void vgic_its_unmap_device(struct kvm *kvm, struct its_device *device)
-{
- struct its_itte *itte, *temp;
-
- /*
- * The spec says that unmapping a device with still valid
- * ITTEs associated is UNPREDICTABLE. We remove all ITTEs,
- * since we cannot leave the memory unreferenced.
- */
- list_for_each_entry_safe(itte, temp, &device->itt_head, itte_list)
- its_free_itte(kvm, itte);
-
- list_del(&device->dev_list);
- kfree(device);
-}
-
-/*
- * MAPD maps or unmaps a device ID to Interrupt Translation Tables (ITTs).
- * Must be called with the its_lock mutex held.
- */
-static int vgic_its_cmd_handle_mapd(struct kvm *kvm, struct vgic_its *its,
- u64 *its_cmd)
-{
- u32 device_id = its_cmd_get_deviceid(its_cmd);
- bool valid = its_cmd_get_validbit(its_cmd);
- struct its_device *device;
-
- if (!vgic_its_check_id(its, its->baser_device_table, device_id))
- return E_ITS_MAPD_DEVICE_OOR;
-
- device = find_its_device(its, device_id);
-
- /*
- * The spec says that calling MAPD on an already mapped device
- * invalidates all cached data for this device. We implement this
- * by removing the mapping and re-establishing it.
- */
- if (device)
- vgic_its_unmap_device(kvm, device);
-
- /*
- * The spec does not say whether unmapping a not-mapped device
- * is an error, so we are done in any case.
- */
- if (!valid)
- return 0;
-
- device = kzalloc(sizeof(struct its_device), GFP_KERNEL);
- if (!device)
- return -ENOMEM;
-
- device->device_id = device_id;
- INIT_LIST_HEAD(&device->itt_head);
-
- list_add_tail(&device->dev_list, &its->device_list);
-
- return 0;
-}
-
-/*
- * The MAPC command maps collection IDs to redistributors.
- * Must be called with the its_lock mutex held.
- */
-static int vgic_its_cmd_handle_mapc(struct kvm *kvm, struct vgic_its *its,
- u64 *its_cmd)
-{
- u16 coll_id;
- u32 target_addr;
- struct its_collection *collection;
- bool valid;
-
- valid = its_cmd_get_validbit(its_cmd);
- coll_id = its_cmd_get_collection(its_cmd);
- target_addr = its_cmd_get_target_addr(its_cmd);
-
- if (target_addr >= atomic_read(&kvm->online_vcpus))
- return E_ITS_MAPC_PROCNUM_OOR;
-
- if (!valid) {
- vgic_its_free_collection(its, coll_id);
- } else {
- collection = find_collection(its, coll_id);
-
- if (!collection) {
- int ret;
-
- ret = vgic_its_alloc_collection(its, &collection,
- coll_id);
- if (ret)
- return ret;
- collection->target_addr = target_addr;
- } else {
- collection->target_addr = target_addr;
- update_affinity_collection(kvm, its, collection);
- }
- }
-
- return 0;
-}
-
-/*
- * The CLEAR command removes the pending state for a particular LPI.
- * Must be called with the its_lock mutex held.
- */
-static int vgic_its_cmd_handle_clear(struct kvm *kvm, struct vgic_its *its,
- u64 *its_cmd)
-{
- u32 device_id = its_cmd_get_deviceid(its_cmd);
- u32 event_id = its_cmd_get_id(its_cmd);
- struct its_itte *itte;
-
-
- itte = find_itte(its, device_id, event_id);
- if (!itte)
- return E_ITS_CLEAR_UNMAPPED_INTERRUPT;
-
- itte->irq->pending = false;
-
- return 0;
-}
-
-/*
- * The INV command syncs the configuration bits from the memory table.
- * Must be called with the its_lock mutex held.
- */
-static int vgic_its_cmd_handle_inv(struct kvm *kvm, struct vgic_its *its,
- u64 *its_cmd)
-{
- u32 device_id = its_cmd_get_deviceid(its_cmd);
- u32 event_id = its_cmd_get_id(its_cmd);
- struct its_itte *itte;
-
-
- itte = find_itte(its, device_id, event_id);
- if (!itte)
- return E_ITS_INV_UNMAPPED_INTERRUPT;
-
- return update_lpi_config(kvm, itte->irq, NULL);
-}
-
-/*
- * The INVALL command requests flushing of all IRQ data in this collection.
- * Find the VCPU mapped to that collection, then iterate over the VM's list
- * of mapped LPIs and update the configuration for each IRQ which targets
- * the specified vcpu. The configuration will be read from the in-memory
- * configuration table.
- * Must be called with the its_lock mutex held.
- */
-static int vgic_its_cmd_handle_invall(struct kvm *kvm, struct vgic_its *its,
- u64 *its_cmd)
-{
- u32 coll_id = its_cmd_get_collection(its_cmd);
- struct its_collection *collection;
- struct kvm_vcpu *vcpu;
- struct vgic_irq *irq;
- u32 *intids;
- int irq_count, i;
-
- collection = find_collection(its, coll_id);
- if (!its_is_collection_mapped(collection))
- return E_ITS_INVALL_UNMAPPED_COLLECTION;
-
- vcpu = kvm_get_vcpu(kvm, collection->target_addr);
-
- irq_count = vgic_copy_lpi_list(kvm, &intids);
- if (irq_count < 0)
- return irq_count;
-
- for (i = 0; i < irq_count; i++) {
- irq = vgic_get_irq(kvm, NULL, intids[i]);
- if (!irq)
- continue;
- update_lpi_config(kvm, irq, vcpu);
- vgic_put_irq(kvm, irq);
- }
-
- kfree(intids);
-
- return 0;
-}
-
-/*
- * The MOVALL command moves the pending state of all IRQs targeting one
- * redistributor to another. We don't hold the pending state in the VCPUs,
- * but in the IRQs instead, so there is really not much to do for us here.
- * However the spec says that no IRQ must target the old redistributor
- * afterwards, so we make sure that no LPI is using the associated target_vcpu.
- * This command affects all LPIs in the system that target that redistributor.
- */
-static int vgic_its_cmd_handle_movall(struct kvm *kvm, struct vgic_its *its,
- u64 *its_cmd)
-{
- struct vgic_dist *dist = &kvm->arch.vgic;
- u32 target1_addr = its_cmd_get_target_addr(its_cmd);
- u32 target2_addr = its_cmd_mask_field(its_cmd, 3, 16, 32);
- struct kvm_vcpu *vcpu1, *vcpu2;
- struct vgic_irq *irq;
-
- if (target1_addr >= atomic_read(&kvm->online_vcpus) ||
- target2_addr >= atomic_read(&kvm->online_vcpus))
- return E_ITS_MOVALL_PROCNUM_OOR;
-
- if (target1_addr == target2_addr)
- return 0;
-
- vcpu1 = kvm_get_vcpu(kvm, target1_addr);
- vcpu2 = kvm_get_vcpu(kvm, target2_addr);
-
- spin_lock(&dist->lpi_list_lock);
-
- list_for_each_entry(irq, &dist->lpi_list_head, lpi_list) {
- spin_lock(&irq->irq_lock);
-
- if (irq->target_vcpu == vcpu1)
- irq->target_vcpu = vcpu2;
-
- spin_unlock(&irq->irq_lock);
- }
-
- spin_unlock(&dist->lpi_list_lock);
-
- return 0;
-}
-
-/*
- * The INT command injects the LPI associated with that DevID/EvID pair.
- * Must be called with the its_lock mutex held.
- */
-static int vgic_its_cmd_handle_int(struct kvm *kvm, struct vgic_its *its,
- u64 *its_cmd)
-{
- u32 msi_data = its_cmd_get_id(its_cmd);
- u64 msi_devid = its_cmd_get_deviceid(its_cmd);
-
- return vgic_its_trigger_msi(kvm, its, msi_devid, msi_data);
-}
-
-/*
- * This function is called with the its_cmd lock held, but the ITS data
- * structure lock dropped.
- */
-static int vgic_its_handle_command(struct kvm *kvm, struct vgic_its *its,
- u64 *its_cmd)
-{
- int ret = -ENODEV;
-
- mutex_lock(&its->its_lock);
- switch (its_cmd_get_command(its_cmd)) {
- case GITS_CMD_MAPD:
- ret = vgic_its_cmd_handle_mapd(kvm, its, its_cmd);
- break;
- case GITS_CMD_MAPC:
- ret = vgic_its_cmd_handle_mapc(kvm, its, its_cmd);
- break;
- case GITS_CMD_MAPI:
- ret = vgic_its_cmd_handle_mapi(kvm, its, its_cmd);
- break;
- case GITS_CMD_MAPTI:
- ret = vgic_its_cmd_handle_mapi(kvm, its, its_cmd);
- break;
- case GITS_CMD_MOVI:
- ret = vgic_its_cmd_handle_movi(kvm, its, its_cmd);
- break;
- case GITS_CMD_DISCARD:
- ret = vgic_its_cmd_handle_discard(kvm, its, its_cmd);
- break;
- case GITS_CMD_CLEAR:
- ret = vgic_its_cmd_handle_clear(kvm, its, its_cmd);
- break;
- case GITS_CMD_MOVALL:
- ret = vgic_its_cmd_handle_movall(kvm, its, its_cmd);
- break;
- case GITS_CMD_INT:
- ret = vgic_its_cmd_handle_int(kvm, its, its_cmd);
- break;
- case GITS_CMD_INV:
- ret = vgic_its_cmd_handle_inv(kvm, its, its_cmd);
- break;
- case GITS_CMD_INVALL:
- ret = vgic_its_cmd_handle_invall(kvm, its, its_cmd);
- break;
- case GITS_CMD_SYNC:
- /* we ignore this command: we are in sync all of the time */
- ret = 0;
- break;
- }
- mutex_unlock(&its->its_lock);
-
- return ret;
-}
-
-static u64 vgic_sanitise_its_baser(u64 reg)
-{
- reg = vgic_sanitise_field(reg, GITS_BASER_SHAREABILITY_MASK,
- GITS_BASER_SHAREABILITY_SHIFT,
- vgic_sanitise_shareability);
- reg = vgic_sanitise_field(reg, GITS_BASER_INNER_CACHEABILITY_MASK,
- GITS_BASER_INNER_CACHEABILITY_SHIFT,
- vgic_sanitise_inner_cacheability);
- reg = vgic_sanitise_field(reg, GITS_BASER_OUTER_CACHEABILITY_MASK,
- GITS_BASER_OUTER_CACHEABILITY_SHIFT,
- vgic_sanitise_outer_cacheability);
-
- /* Bits 15:12 contain bits 51:48 of the PA, which we don't support. */
- reg &= ~GENMASK_ULL(15, 12);
-
- /* We support only one (ITS) page size: 64K */
- reg = (reg & ~GITS_BASER_PAGE_SIZE_MASK) | GITS_BASER_PAGE_SIZE_64K;
-
- return reg;
-}
-
-static u64 vgic_sanitise_its_cbaser(u64 reg)
-{
- reg = vgic_sanitise_field(reg, GITS_CBASER_SHAREABILITY_MASK,
- GITS_CBASER_SHAREABILITY_SHIFT,
- vgic_sanitise_shareability);
- reg = vgic_sanitise_field(reg, GITS_CBASER_INNER_CACHEABILITY_MASK,
- GITS_CBASER_INNER_CACHEABILITY_SHIFT,
- vgic_sanitise_inner_cacheability);
- reg = vgic_sanitise_field(reg, GITS_CBASER_OUTER_CACHEABILITY_MASK,
- GITS_CBASER_OUTER_CACHEABILITY_SHIFT,
- vgic_sanitise_outer_cacheability);
-
- /*
- * Sanitise the physical address to be 64k aligned.
- * Also limit the physical addresses to 48 bits.
- */
- reg &= ~(GENMASK_ULL(51, 48) | GENMASK_ULL(15, 12));
-
- return reg;
-}
-
-static unsigned long vgic_mmio_read_its_cbaser(struct kvm *kvm,
- struct vgic_its *its,
- gpa_t addr, unsigned int len)
-{
- return extract_bytes(its->cbaser, addr & 7, len);
-}
-
-static void vgic_mmio_write_its_cbaser(struct kvm *kvm, struct vgic_its *its,
- gpa_t addr, unsigned int len,
- unsigned long val)
-{
- /* When GITS_CTLR.Enable is 1, this register is RO. */
- if (its->enabled)
- return;
-
- mutex_lock(&its->cmd_lock);
- its->cbaser = update_64bit_reg(its->cbaser, addr & 7, len, val);
- its->cbaser = vgic_sanitise_its_cbaser(its->cbaser);
- its->creadr = 0;
- /*
- * CWRITER is architecturally UNKNOWN on reset, but we need to reset
- * it to CREADR to make sure we start with an empty command buffer.
- */
- its->cwriter = its->creadr;
- mutex_unlock(&its->cmd_lock);
-}
-
-#define ITS_CMD_BUFFER_SIZE(baser) ((((baser) & 0xff) + 1) << 12)
-#define ITS_CMD_SIZE 32
-#define ITS_CMD_OFFSET(reg) ((reg) & GENMASK(19, 5))
-
-/*
- * By writing to CWRITER the guest announces new commands to be processed.
- * To avoid any races in the first place, we take the its_cmd lock, which
- * protects our ring buffer variables, so that there is only one user
- * per ITS handling commands at a given time.
- */
-static void vgic_mmio_write_its_cwriter(struct kvm *kvm, struct vgic_its *its,
- gpa_t addr, unsigned int len,
- unsigned long val)
-{
- gpa_t cbaser;
- u64 cmd_buf[4];
- u32 reg;
-
- if (!its)
- return;
-
- mutex_lock(&its->cmd_lock);
-
- reg = update_64bit_reg(its->cwriter, addr & 7, len, val);
- reg = ITS_CMD_OFFSET(reg);
- if (reg >= ITS_CMD_BUFFER_SIZE(its->cbaser)) {
- mutex_unlock(&its->cmd_lock);
- return;
- }
-
- its->cwriter = reg;
- cbaser = CBASER_ADDRESS(its->cbaser);
-
- while (its->cwriter != its->creadr) {
- int ret = kvm_read_guest(kvm, cbaser + its->creadr,
- cmd_buf, ITS_CMD_SIZE);
- /*
- * If kvm_read_guest() fails, this could be due to the guest
- * programming a bogus value in CBASER or something else going
- * wrong from which we cannot easily recover.
- * According to section 6.3.2 in the GICv3 spec we can just
- * ignore that command then.
- */
- if (!ret)
- vgic_its_handle_command(kvm, its, cmd_buf);
-
- its->creadr += ITS_CMD_SIZE;
- if (its->creadr == ITS_CMD_BUFFER_SIZE(its->cbaser))
- its->creadr = 0;
- }
-
- mutex_unlock(&its->cmd_lock);
-}
-
-static unsigned long vgic_mmio_read_its_cwriter(struct kvm *kvm,
- struct vgic_its *its,
- gpa_t addr, unsigned int len)
-{
- return extract_bytes(its->cwriter, addr & 0x7, len);
-}
-
-static unsigned long vgic_mmio_read_its_creadr(struct kvm *kvm,
- struct vgic_its *its,
- gpa_t addr, unsigned int len)
-{
- return extract_bytes(its->creadr, addr & 0x7, len);
-}
-
-#define BASER_INDEX(addr) (((addr) / sizeof(u64)) & 0x7)
-static unsigned long vgic_mmio_read_its_baser(struct kvm *kvm,
- struct vgic_its *its,
- gpa_t addr, unsigned int len)
-{
- u64 reg;
-
- switch (BASER_INDEX(addr)) {
- case 0:
- reg = its->baser_device_table;
- break;
- case 1:
- reg = its->baser_coll_table;
- break;
- default:
- reg = 0;
- break;
- }
-
- return extract_bytes(reg, addr & 7, len);
-}
-
-#define GITS_BASER_RO_MASK (GENMASK_ULL(52, 48) | GENMASK_ULL(58, 56))
-static void vgic_mmio_write_its_baser(struct kvm *kvm,
- struct vgic_its *its,
- gpa_t addr, unsigned int len,
- unsigned long val)
-{
- u64 entry_size, device_type;
- u64 reg, *regptr, clearbits = 0;
-
- /* When GITS_CTLR.Enable is 1, we ignore write accesses. */
- if (its->enabled)
- return;
-
- switch (BASER_INDEX(addr)) {
- case 0:
- regptr = &its->baser_device_table;
- entry_size = 8;
- device_type = GITS_BASER_TYPE_DEVICE;
- break;
- case 1:
- regptr = &its->baser_coll_table;
- entry_size = 8;
- device_type = GITS_BASER_TYPE_COLLECTION;
- clearbits = GITS_BASER_INDIRECT;
- break;
- default:
- return;
- }
-
- reg = update_64bit_reg(*regptr, addr & 7, len, val);
- reg &= ~GITS_BASER_RO_MASK;
- reg &= ~clearbits;
-
- reg |= (entry_size - 1) << GITS_BASER_ENTRY_SIZE_SHIFT;
- reg |= device_type << GITS_BASER_TYPE_SHIFT;
- reg = vgic_sanitise_its_baser(reg);
-
- *regptr = reg;
-}
-
-#define REGISTER_ITS_DESC(off, rd, wr, length, acc) \
-{ \
- .reg_offset = off, \
- .len = length, \
- .access_flags = acc, \
- .its_read = rd, \
- .its_write = wr, \
-}
-
-static void its_mmio_write_wi(struct kvm *kvm, struct vgic_its *its,
- gpa_t addr, unsigned int len, unsigned long val)
-{
- /* Ignore */
-}
-
-static struct vgic_register_region its_registers[] = {
- REGISTER_ITS_DESC(GITS_CTLR,
- vgic_mmio_read_its_ctlr, vgic_mmio_write_its_ctlr, 4,
- VGIC_ACCESS_32bit),
- REGISTER_ITS_DESC(GITS_IIDR,
- vgic_mmio_read_its_iidr, its_mmio_write_wi, 4,
- VGIC_ACCESS_32bit),
- REGISTER_ITS_DESC(GITS_TYPER,
- vgic_mmio_read_its_typer, its_mmio_write_wi, 8,
- VGIC_ACCESS_64bit | VGIC_ACCESS_32bit),
- REGISTER_ITS_DESC(GITS_CBASER,
- vgic_mmio_read_its_cbaser, vgic_mmio_write_its_cbaser, 8,
- VGIC_ACCESS_64bit | VGIC_ACCESS_32bit),
- REGISTER_ITS_DESC(GITS_CWRITER,
- vgic_mmio_read_its_cwriter, vgic_mmio_write_its_cwriter, 8,
- VGIC_ACCESS_64bit | VGIC_ACCESS_32bit),
- REGISTER_ITS_DESC(GITS_CREADR,
- vgic_mmio_read_its_creadr, its_mmio_write_wi, 8,
- VGIC_ACCESS_64bit | VGIC_ACCESS_32bit),
- REGISTER_ITS_DESC(GITS_BASER,
- vgic_mmio_read_its_baser, vgic_mmio_write_its_baser, 0x40,
- VGIC_ACCESS_64bit | VGIC_ACCESS_32bit),
- REGISTER_ITS_DESC(GITS_IDREGS_BASE,
- vgic_mmio_read_its_idregs, its_mmio_write_wi, 0x30,
- VGIC_ACCESS_32bit),
-};
-
-/* This is called on setting the LPI enable bit in the redistributor. */
-void vgic_enable_lpis(struct kvm_vcpu *vcpu)
-{
- if (!(vcpu->arch.vgic_cpu.pendbaser & GICR_PENDBASER_PTZ))
- its_sync_lpi_pending_table(vcpu);
-}
-
-static int vgic_register_its_iodev(struct kvm *kvm, struct vgic_its *its)
-{
- struct vgic_io_device *iodev = &its->iodev;
- int ret;
-
- if (!its->initialized)
- return -EBUSY;
-
- if (IS_VGIC_ADDR_UNDEF(its->vgic_its_base))
- return -ENXIO;
-
- iodev->regions = its_registers;
- iodev->nr_regions = ARRAY_SIZE(its_registers);
- kvm_iodevice_init(&iodev->dev, &kvm_io_gic_ops);
-
- iodev->base_addr = its->vgic_its_base;
- iodev->iodev_type = IODEV_ITS;
- iodev->its = its;
- mutex_lock(&kvm->slots_lock);
- ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, iodev->base_addr,
- KVM_VGIC_V3_ITS_SIZE, &iodev->dev);
- mutex_unlock(&kvm->slots_lock);
-
- return ret;
-}
-
-#define INITIAL_BASER_VALUE \
- (GIC_BASER_CACHEABILITY(GITS_BASER, INNER, RaWb) | \
- GIC_BASER_CACHEABILITY(GITS_BASER, OUTER, SameAsInner) | \
- GIC_BASER_SHAREABILITY(GITS_BASER, InnerShareable) | \
- ((8ULL - 1) << GITS_BASER_ENTRY_SIZE_SHIFT) | \
- GITS_BASER_PAGE_SIZE_64K)
-
-#define INITIAL_PROPBASER_VALUE \
- (GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, RaWb) | \
- GIC_BASER_CACHEABILITY(GICR_PROPBASER, OUTER, SameAsInner) | \
- GIC_BASER_SHAREABILITY(GICR_PROPBASER, InnerShareable))
-
-static int vgic_its_create(struct kvm_device *dev, u32 type)
-{
- struct vgic_its *its;
-
- if (type != KVM_DEV_TYPE_ARM_VGIC_ITS)
- return -ENODEV;
-
- its = kzalloc(sizeof(struct vgic_its), GFP_KERNEL);
- if (!its)
- return -ENOMEM;
-
- mutex_init(&its->its_lock);
- mutex_init(&its->cmd_lock);
-
- its->vgic_its_base = VGIC_ADDR_UNDEF;
-
- INIT_LIST_HEAD(&its->device_list);
- INIT_LIST_HEAD(&its->collection_list);
-
- dev->kvm->arch.vgic.has_its = true;
- its->initialized = false;
- its->enabled = false;
- its->dev = dev;
-
- its->baser_device_table = INITIAL_BASER_VALUE |
- ((u64)GITS_BASER_TYPE_DEVICE << GITS_BASER_TYPE_SHIFT);
- its->baser_coll_table = INITIAL_BASER_VALUE |
- ((u64)GITS_BASER_TYPE_COLLECTION << GITS_BASER_TYPE_SHIFT);
- dev->kvm->arch.vgic.propbaser = INITIAL_PROPBASER_VALUE;
-
- dev->private = its;
-
- return 0;
-}
-
-static void vgic_its_destroy(struct kvm_device *kvm_dev)
-{
- struct kvm *kvm = kvm_dev->kvm;
- struct vgic_its *its = kvm_dev->private;
- struct its_device *dev;
- struct its_itte *itte;
- struct list_head *dev_cur, *dev_temp;
- struct list_head *cur, *temp;
-
- /*
- * We may end up here without the lists ever having been initialized.
- * Check this and bail out early to avoid dereferencing a NULL pointer.
- */
- if (!its->device_list.next)
- return;
-
- mutex_lock(&its->its_lock);
- list_for_each_safe(dev_cur, dev_temp, &its->device_list) {
- dev = container_of(dev_cur, struct its_device, dev_list);
- list_for_each_safe(cur, temp, &dev->itt_head) {
- itte = (container_of(cur, struct its_itte, itte_list));
- its_free_itte(kvm, itte);
- }
- list_del(dev_cur);
- kfree(dev);
- }
-
- list_for_each_safe(cur, temp, &its->collection_list) {
- list_del(cur);
- kfree(container_of(cur, struct its_collection, coll_list));
- }
- mutex_unlock(&its->its_lock);
-
- kfree(its);
-}
-
-static int vgic_its_has_attr(struct kvm_device *dev,
- struct kvm_device_attr *attr)
-{
- switch (attr->group) {
- case KVM_DEV_ARM_VGIC_GRP_ADDR:
- switch (attr->attr) {
- case KVM_VGIC_ITS_ADDR_TYPE:
- return 0;
- }
- break;
- case KVM_DEV_ARM_VGIC_GRP_CTRL:
- switch (attr->attr) {
- case KVM_DEV_ARM_VGIC_CTRL_INIT:
- return 0;
- }
- break;
- }
- return -ENXIO;
-}
-
-static int vgic_its_set_attr(struct kvm_device *dev,
- struct kvm_device_attr *attr)
-{
- struct vgic_its *its = dev->private;
- int ret;
-
- switch (attr->group) {
- case KVM_DEV_ARM_VGIC_GRP_ADDR: {
- u64 __user *uaddr = (u64 __user *)(long)attr->addr;
- unsigned long type = (unsigned long)attr->attr;
- u64 addr;
-
- if (type != KVM_VGIC_ITS_ADDR_TYPE)
- return -ENODEV;
-
- if (copy_from_user(&addr, uaddr, sizeof(addr)))
- return -EFAULT;
-
- ret = vgic_check_ioaddr(dev->kvm, &its->vgic_its_base,
- addr, SZ_64K);
- if (ret)
- return ret;
-
- its->vgic_its_base = addr;
-
- return 0;
- }
- case KVM_DEV_ARM_VGIC_GRP_CTRL:
- switch (attr->attr) {
- case KVM_DEV_ARM_VGIC_CTRL_INIT:
- its->initialized = true;
-
- return 0;
- }
- break;
- }
- return -ENXIO;
-}
-
-static int vgic_its_get_attr(struct kvm_device *dev,
- struct kvm_device_attr *attr)
-{
- switch (attr->group) {
- case KVM_DEV_ARM_VGIC_GRP_ADDR: {
- struct vgic_its *its = dev->private;
- u64 addr = its->vgic_its_base;
- u64 __user *uaddr = (u64 __user *)(long)attr->addr;
- unsigned long type = (unsigned long)attr->attr;
-
- if (type != KVM_VGIC_ITS_ADDR_TYPE)
- return -ENODEV;
-
- if (copy_to_user(uaddr, &addr, sizeof(addr)))
- return -EFAULT;
- break;
- default:
- return -ENXIO;
- }
- }
-
- return 0;
-}
-
-static struct kvm_device_ops kvm_arm_vgic_its_ops = {
- .name = "kvm-arm-vgic-its",
- .create = vgic_its_create,
- .destroy = vgic_its_destroy,
- .set_attr = vgic_its_set_attr,
- .get_attr = vgic_its_get_attr,
- .has_attr = vgic_its_has_attr,
-};
-
-int kvm_vgic_register_its_device(void)
-{
- return kvm_register_device_ops(&kvm_arm_vgic_its_ops,
- KVM_DEV_TYPE_ARM_VGIC_ITS);
-}
-
-/*
- * Registers all ITSes with the kvm_io_bus framework.
- * To follow the existing VGIC initialization sequence, this has to be
- * done as late as possible, just before the first VCPU runs.
- */
-int vgic_register_its_iodevs(struct kvm *kvm)
-{
- struct kvm_device *dev;
- int ret = 0;
-
- list_for_each_entry(dev, &kvm->devices, vm_node) {
- if (dev->ops != &kvm_arm_vgic_its_ops)
- continue;
-
- ret = vgic_register_its_iodev(kvm, dev->private);
- if (ret)
- return ret;
- /*
- * We don't need to care about tearing down previously
- * registered ITSes, as the kvm_io_bus framework removes
- * them for us if the VM gets destroyed.
- */
- }
-
- return ret;
-}
diff --git a/virt/kvm/arm/vgic/vgic-kvm-device.c b/virt/kvm/arm/vgic/vgic-kvm-device.c
deleted file mode 100644
index ce1f4ed..0000000
--- a/virt/kvm/arm/vgic/vgic-kvm-device.c
+++ /dev/null
@@ -1,474 +0,0 @@
-/*
- * VGIC: KVM DEVICE API
- *
- * Copyright (C) 2015 ARM Ltd.
- * Author: Marc Zyngier <marc.zyngier@arm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- */
-#include <linux/kvm_host.h>
-#include <kvm/arm_vgic.h>
-#include <linux/uaccess.h>
-#include <asm/kvm_mmu.h>
-#include "vgic.h"
-
-/* common helpers */
-
-int vgic_check_ioaddr(struct kvm *kvm, phys_addr_t *ioaddr,
- phys_addr_t addr, phys_addr_t alignment)
-{
- if (addr & ~KVM_PHYS_MASK)
- return -E2BIG;
-
- if (!IS_ALIGNED(addr, alignment))
- return -EINVAL;
-
- if (!IS_VGIC_ADDR_UNDEF(*ioaddr))
- return -EEXIST;
-
- return 0;
-}
-
-/**
- * kvm_vgic_addr - set or get vgic VM base addresses
- * @kvm: pointer to the vm struct
- * @type: the VGIC addr type, one of KVM_VGIC_V[23]_ADDR_TYPE_XXX
- * @addr: pointer to address value
- * @write: if true set the address in the VM address space, if false read the
- * address
- *
- * Set or get the vgic base addresses for the distributor and the virtual CPU
- * interface in the VM physical address space. These addresses are properties
- * of the emulated core/SoC and therefore user space initially knows this
- * information.
- * Check them for sanity (alignment, double assignment). We can't check for
- * overlapping regions in case of a virtual GICv3 here, since we don't know
- * the number of VCPUs yet, so we defer this check to map_resources().
- */
-int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write)
-{
- int r = 0;
- struct vgic_dist *vgic = &kvm->arch.vgic;
- int type_needed;
- phys_addr_t *addr_ptr, alignment;
-
- mutex_lock(&kvm->lock);
- switch (type) {
- case KVM_VGIC_V2_ADDR_TYPE_DIST:
- type_needed = KVM_DEV_TYPE_ARM_VGIC_V2;
- addr_ptr = &vgic->vgic_dist_base;
- alignment = SZ_4K;
- break;
- case KVM_VGIC_V2_ADDR_TYPE_CPU:
- type_needed = KVM_DEV_TYPE_ARM_VGIC_V2;
- addr_ptr = &vgic->vgic_cpu_base;
- alignment = SZ_4K;
- break;
- case KVM_VGIC_V3_ADDR_TYPE_DIST:
- type_needed = KVM_DEV_TYPE_ARM_VGIC_V3;
- addr_ptr = &vgic->vgic_dist_base;
- alignment = SZ_64K;
- break;
- case KVM_VGIC_V3_ADDR_TYPE_REDIST:
- type_needed = KVM_DEV_TYPE_ARM_VGIC_V3;
- addr_ptr = &vgic->vgic_redist_base;
- alignment = SZ_64K;
- break;
- default:
- r = -ENODEV;
- goto out;
- }
-
- if (vgic->vgic_model != type_needed) {
- r = -ENODEV;
- goto out;
- }
-
- if (write) {
- r = vgic_check_ioaddr(kvm, addr_ptr, *addr, alignment);
- if (!r)
- *addr_ptr = *addr;
- } else {
- *addr = *addr_ptr;
- }
-
-out:
- mutex_unlock(&kvm->lock);
- return r;
-}
-
-static int vgic_set_common_attr(struct kvm_device *dev,
- struct kvm_device_attr *attr)
-{
- int r;
-
- switch (attr->group) {
- case KVM_DEV_ARM_VGIC_GRP_ADDR: {
- u64 __user *uaddr = (u64 __user *)(long)attr->addr;
- u64 addr;
- unsigned long type = (unsigned long)attr->attr;
-
- if (copy_from_user(&addr, uaddr, sizeof(addr)))
- return -EFAULT;
-
- r = kvm_vgic_addr(dev->kvm, type, &addr, true);
- return (r == -ENODEV) ? -ENXIO : r;
- }
- case KVM_DEV_ARM_VGIC_GRP_NR_IRQS: {
- u32 __user *uaddr = (u32 __user *)(long)attr->addr;
- u32 val;
- int ret = 0;
-
- if (get_user(val, uaddr))
- return -EFAULT;
-
- /*
- * We require:
- * - at least 32 SPIs on top of the 16 SGIs and 16 PPIs
- * - at most 1024 interrupts
- * - a multiple of 32 interrupts
- */
- if (val < (VGIC_NR_PRIVATE_IRQS + 32) ||
- val > VGIC_MAX_RESERVED ||
- (val & 31))
- return -EINVAL;
-
- mutex_lock(&dev->kvm->lock);
-
- if (vgic_ready(dev->kvm) || dev->kvm->arch.vgic.nr_spis)
- ret = -EBUSY;
- else
- dev->kvm->arch.vgic.nr_spis =
- val - VGIC_NR_PRIVATE_IRQS;
-
- mutex_unlock(&dev->kvm->lock);
-
- return ret;
- }
- case KVM_DEV_ARM_VGIC_GRP_CTRL: {
- switch (attr->attr) {
- case KVM_DEV_ARM_VGIC_CTRL_INIT:
- mutex_lock(&dev->kvm->lock);
- r = vgic_init(dev->kvm);
- mutex_unlock(&dev->kvm->lock);
- return r;
- }
- break;
- }
- }
-
- return -ENXIO;
-}
-
-static int vgic_get_common_attr(struct kvm_device *dev,
- struct kvm_device_attr *attr)
-{
- int r = -ENXIO;
-
- switch (attr->group) {
- case KVM_DEV_ARM_VGIC_GRP_ADDR: {
- u64 __user *uaddr = (u64 __user *)(long)attr->addr;
- u64 addr;
- unsigned long type = (unsigned long)attr->attr;
-
- r = kvm_vgic_addr(dev->kvm, type, &addr, false);
- if (r)
- return (r == -ENODEV) ? -ENXIO : r;
-
- if (copy_to_user(uaddr, &addr, sizeof(addr)))
- return -EFAULT;
- break;
- }
- case KVM_DEV_ARM_VGIC_GRP_NR_IRQS: {
- u32 __user *uaddr = (u32 __user *)(long)attr->addr;
-
- r = put_user(dev->kvm->arch.vgic.nr_spis +
- VGIC_NR_PRIVATE_IRQS, uaddr);
- break;
- }
- }
-
- return r;
-}
-
-static int vgic_create(struct kvm_device *dev, u32 type)
-{
- return kvm_vgic_create(dev->kvm, type);
-}
-
-static void vgic_destroy(struct kvm_device *dev)
-{
- kfree(dev);
-}
-
-int kvm_register_vgic_device(unsigned long type)
-{
- int ret = -ENODEV;
-
- switch (type) {
- case KVM_DEV_TYPE_ARM_VGIC_V2:
- ret = kvm_register_device_ops(&kvm_arm_vgic_v2_ops,
- KVM_DEV_TYPE_ARM_VGIC_V2);
- break;
- case KVM_DEV_TYPE_ARM_VGIC_V3:
- ret = kvm_register_device_ops(&kvm_arm_vgic_v3_ops,
- KVM_DEV_TYPE_ARM_VGIC_V3);
-
-#ifdef CONFIG_KVM_ARM_VGIC_V3_ITS
- if (ret)
- break;
- ret = kvm_vgic_register_its_device();
-#endif
- break;
- }
-
- return ret;
-}
-
-struct vgic_reg_attr {
- struct kvm_vcpu *vcpu;
- gpa_t addr;
-};
-
-static int parse_vgic_v2_attr(struct kvm_device *dev,
- struct kvm_device_attr *attr,
- struct vgic_reg_attr *reg_attr)
-{
- int cpuid;
-
- cpuid = (attr->attr & KVM_DEV_ARM_VGIC_CPUID_MASK) >>
- KVM_DEV_ARM_VGIC_CPUID_SHIFT;
-
- if (cpuid >= atomic_read(&dev->kvm->online_vcpus))
- return -EINVAL;
-
- reg_attr->vcpu = kvm_get_vcpu(dev->kvm, cpuid);
- reg_attr->addr = attr->attr & KVM_DEV_ARM_VGIC_OFFSET_MASK;
-
- return 0;
-}
-
-/* unlocks vcpus from @vcpu_lock_idx and smaller */
-static void unlock_vcpus(struct kvm *kvm, int vcpu_lock_idx)
-{
- struct kvm_vcpu *tmp_vcpu;
-
- for (; vcpu_lock_idx >= 0; vcpu_lock_idx--) {
- tmp_vcpu = kvm_get_vcpu(kvm, vcpu_lock_idx);
- mutex_unlock(&tmp_vcpu->mutex);
- }
-}
-
-static void unlock_all_vcpus(struct kvm *kvm)
-{
- unlock_vcpus(kvm, atomic_read(&kvm->online_vcpus) - 1);
-}
-
-/* Returns true if all vcpus were locked, false otherwise */
-static bool lock_all_vcpus(struct kvm *kvm)
-{
- struct kvm_vcpu *tmp_vcpu;
- int c;
-
- /*
- * Any time a vcpu is run, vcpu_load is called which tries to grab the
- * vcpu->mutex. By grabbing the vcpu->mutex of all VCPUs we ensure
- * that no other VCPUs are run and fiddle with the vgic state while we
- * access it.
- */
- kvm_for_each_vcpu(c, tmp_vcpu, kvm) {
- if (!mutex_trylock(&tmp_vcpu->mutex)) {
- unlock_vcpus(kvm, c - 1);
- return false;
- }
- }
-
- return true;
-}
-
-/**
- * vgic_attr_regs_access_v2 - allows user space to access VGIC v2 state
- *
- * @dev: kvm device handle
- * @attr: kvm device attribute
- * @reg: address the value is read or written
- * @is_write: true if userspace is writing a register
- */
-static int vgic_attr_regs_access_v2(struct kvm_device *dev,
- struct kvm_device_attr *attr,
- u32 *reg, bool is_write)
-{
- struct vgic_reg_attr reg_attr;
- gpa_t addr;
- struct kvm_vcpu *vcpu;
- int ret;
-
- ret = parse_vgic_v2_attr(dev, attr, &reg_attr);
- if (ret)
- return ret;
-
- vcpu = reg_attr.vcpu;
- addr = reg_attr.addr;
-
- mutex_lock(&dev->kvm->lock);
-
- ret = vgic_init(dev->kvm);
- if (ret)
- goto out;
-
- if (!lock_all_vcpus(dev->kvm)) {
- ret = -EBUSY;
- goto out;
- }
-
- switch (attr->group) {
- case KVM_DEV_ARM_VGIC_GRP_CPU_REGS:
- ret = vgic_v2_cpuif_uaccess(vcpu, is_write, addr, reg);
- break;
- case KVM_DEV_ARM_VGIC_GRP_DIST_REGS:
- ret = vgic_v2_dist_uaccess(vcpu, is_write, addr, reg);
- break;
- default:
- ret = -EINVAL;
- break;
- }
-
- unlock_all_vcpus(dev->kvm);
-out:
- mutex_unlock(&dev->kvm->lock);
- return ret;
-}
-
-static int vgic_v2_set_attr(struct kvm_device *dev,
- struct kvm_device_attr *attr)
-{
- int ret;
-
- ret = vgic_set_common_attr(dev, attr);
- if (ret != -ENXIO)
- return ret;
-
- switch (attr->group) {
- case KVM_DEV_ARM_VGIC_GRP_DIST_REGS:
- case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: {
- u32 __user *uaddr = (u32 __user *)(long)attr->addr;
- u32 reg;
-
- if (get_user(reg, uaddr))
- return -EFAULT;
-
- return vgic_attr_regs_access_v2(dev, attr, &reg, true);
- }
- }
-
- return -ENXIO;
-}
-
-static int vgic_v2_get_attr(struct kvm_device *dev,
- struct kvm_device_attr *attr)
-{
- int ret;
-
- ret = vgic_get_common_attr(dev, attr);
- if (ret != -ENXIO)
- return ret;
-
- switch (attr->group) {
- case KVM_DEV_ARM_VGIC_GRP_DIST_REGS:
- case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: {
- u32 __user *uaddr = (u32 __user *)(long)attr->addr;
- u32 reg = 0;
-
- ret = vgic_attr_regs_access_v2(dev, attr, &reg, false);
- if (ret)
- return ret;
- return put_user(reg, uaddr);
- }
- }
-
- return -ENXIO;
-}
-
-static int vgic_v2_has_attr(struct kvm_device *dev,
- struct kvm_device_attr *attr)
-{
- switch (attr->group) {
- case KVM_DEV_ARM_VGIC_GRP_ADDR:
- switch (attr->attr) {
- case KVM_VGIC_V2_ADDR_TYPE_DIST:
- case KVM_VGIC_V2_ADDR_TYPE_CPU:
- return 0;
- }
- break;
- case KVM_DEV_ARM_VGIC_GRP_DIST_REGS:
- case KVM_DEV_ARM_VGIC_GRP_CPU_REGS:
- return vgic_v2_has_attr_regs(dev, attr);
- case KVM_DEV_ARM_VGIC_GRP_NR_IRQS:
- return 0;
- case KVM_DEV_ARM_VGIC_GRP_CTRL:
- switch (attr->attr) {
- case KVM_DEV_ARM_VGIC_CTRL_INIT:
- return 0;
- }
- }
- return -ENXIO;
-}
-
-struct kvm_device_ops kvm_arm_vgic_v2_ops = {
- .name = "kvm-arm-vgic-v2",
- .create = vgic_create,
- .destroy = vgic_destroy,
- .set_attr = vgic_v2_set_attr,
- .get_attr = vgic_v2_get_attr,
- .has_attr = vgic_v2_has_attr,
-};
-
-static int vgic_v3_set_attr(struct kvm_device *dev,
- struct kvm_device_attr *attr)
-{
- return vgic_set_common_attr(dev, attr);
-}
-
-static int vgic_v3_get_attr(struct kvm_device *dev,
- struct kvm_device_attr *attr)
-{
- return vgic_get_common_attr(dev, attr);
-}
-
-static int vgic_v3_has_attr(struct kvm_device *dev,
- struct kvm_device_attr *attr)
-{
- switch (attr->group) {
- case KVM_DEV_ARM_VGIC_GRP_ADDR:
- switch (attr->attr) {
- case KVM_VGIC_V3_ADDR_TYPE_DIST:
- case KVM_VGIC_V3_ADDR_TYPE_REDIST:
- return 0;
- }
- break;
- case KVM_DEV_ARM_VGIC_GRP_NR_IRQS:
- return 0;
- case KVM_DEV_ARM_VGIC_GRP_CTRL:
- switch (attr->attr) {
- case KVM_DEV_ARM_VGIC_CTRL_INIT:
- return 0;
- }
- }
- return -ENXIO;
-}
-
-struct kvm_device_ops kvm_arm_vgic_v3_ops = {
- .name = "kvm-arm-vgic-v3",
- .create = vgic_create,
- .destroy = vgic_destroy,
- .set_attr = vgic_v3_set_attr,
- .get_attr = vgic_v3_get_attr,
- .has_attr = vgic_v3_has_attr,
-};
diff --git a/virt/kvm/arm/vgic/vgic-mmio-v2.c b/virt/kvm/arm/vgic/vgic-mmio-v2.c
deleted file mode 100644
index b44b359..0000000
--- a/virt/kvm/arm/vgic/vgic-mmio-v2.c
+++ /dev/null
@@ -1,456 +0,0 @@
-/*
- * VGICv2 MMIO handling functions
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- */
-
-#include <linux/irqchip/arm-gic.h>
-#include <linux/kvm.h>
-#include <linux/kvm_host.h>
-#include <kvm/iodev.h>
-#include <kvm/arm_vgic.h>
-
-#include "vgic.h"
-#include "vgic-mmio.h"
-
-static unsigned long vgic_mmio_read_v2_misc(struct kvm_vcpu *vcpu,
- gpa_t addr, unsigned int len)
-{
- u32 value;
-
- switch (addr & 0x0c) {
- case GIC_DIST_CTRL:
- value = vcpu->kvm->arch.vgic.enabled ? GICD_ENABLE : 0;
- break;
- case GIC_DIST_CTR:
- value = vcpu->kvm->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS;
- value = (value >> 5) - 1;
- value |= (atomic_read(&vcpu->kvm->online_vcpus) - 1) << 5;
- break;
- case GIC_DIST_IIDR:
- value = (PRODUCT_ID_KVM << 24) | (IMPLEMENTER_ARM << 0);
- break;
- default:
- return 0;
- }
-
- return value;
-}
-
-static void vgic_mmio_write_v2_misc(struct kvm_vcpu *vcpu,
- gpa_t addr, unsigned int len,
- unsigned long val)
-{
- struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
- bool was_enabled = dist->enabled;
-
- switch (addr & 0x0c) {
- case GIC_DIST_CTRL:
- dist->enabled = val & GICD_ENABLE;
- if (!was_enabled && dist->enabled)
- vgic_kick_vcpus(vcpu->kvm);
- break;
- case GIC_DIST_CTR:
- case GIC_DIST_IIDR:
- /* Nothing to do */
- return;
- }
-}
-
-static void vgic_mmio_write_sgir(struct kvm_vcpu *source_vcpu,
- gpa_t addr, unsigned int len,
- unsigned long val)
-{
- int nr_vcpus = atomic_read(&source_vcpu->kvm->online_vcpus);
- int intid = val & 0xf;
- int targets = (val >> 16) & 0xff;
- int mode = (val >> 24) & 0x03;
- int c;
- struct kvm_vcpu *vcpu;
-
- switch (mode) {
- case 0x0: /* as specified by targets */
- break;
- case 0x1:
- targets = (1U << nr_vcpus) - 1; /* all, ... */
- targets &= ~(1U << source_vcpu->vcpu_id); /* but self */
- break;
- case 0x2: /* this very vCPU only */
- targets = (1U << source_vcpu->vcpu_id);
- break;
- case 0x3: /* reserved */
- return;
- }
-
- kvm_for_each_vcpu(c, vcpu, source_vcpu->kvm) {
- struct vgic_irq *irq;
-
- if (!(targets & (1U << c)))
- continue;
-
- irq = vgic_get_irq(source_vcpu->kvm, vcpu, intid);
-
- spin_lock(&irq->irq_lock);
- irq->pending = true;
- irq->source |= 1U << source_vcpu->vcpu_id;
-
- vgic_queue_irq_unlock(source_vcpu->kvm, irq);
- vgic_put_irq(source_vcpu->kvm, irq);
- }
-}
-
-static unsigned long vgic_mmio_read_target(struct kvm_vcpu *vcpu,
- gpa_t addr, unsigned int len)
-{
- u32 intid = VGIC_ADDR_TO_INTID(addr, 8);
- int i;
- u64 val = 0;
-
- for (i = 0; i < len; i++) {
- struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
-
- val |= (u64)irq->targets << (i * 8);
-
- vgic_put_irq(vcpu->kvm, irq);
- }
-
- return val;
-}
-
-static void vgic_mmio_write_target(struct kvm_vcpu *vcpu,
- gpa_t addr, unsigned int len,
- unsigned long val)
-{
- u32 intid = VGIC_ADDR_TO_INTID(addr, 8);
- int i;
-
- /* GICD_ITARGETSR[0-7] are read-only */
- if (intid < VGIC_NR_PRIVATE_IRQS)
- return;
-
- for (i = 0; i < len; i++) {
- struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, NULL, intid + i);
- int target;
-
- spin_lock(&irq->irq_lock);
-
- irq->targets = (val >> (i * 8)) & 0xff;
- target = irq->targets ? __ffs(irq->targets) : 0;
- irq->target_vcpu = kvm_get_vcpu(vcpu->kvm, target);
-
- spin_unlock(&irq->irq_lock);
- vgic_put_irq(vcpu->kvm, irq);
- }
-}
-
-static unsigned long vgic_mmio_read_sgipend(struct kvm_vcpu *vcpu,
- gpa_t addr, unsigned int len)
-{
- u32 intid = addr & 0x0f;
- int i;
- u64 val = 0;
-
- for (i = 0; i < len; i++) {
- struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
-
- val |= (u64)irq->source << (i * 8);
-
- vgic_put_irq(vcpu->kvm, irq);
- }
- return val;
-}
-
-static void vgic_mmio_write_sgipendc(struct kvm_vcpu *vcpu,
- gpa_t addr, unsigned int len,
- unsigned long val)
-{
- u32 intid = addr & 0x0f;
- int i;
-
- for (i = 0; i < len; i++) {
- struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
-
- spin_lock(&irq->irq_lock);
-
- irq->source &= ~((val >> (i * 8)) & 0xff);
- if (!irq->source)
- irq->pending = false;
-
- spin_unlock(&irq->irq_lock);
- vgic_put_irq(vcpu->kvm, irq);
- }
-}
-
-static void vgic_mmio_write_sgipends(struct kvm_vcpu *vcpu,
- gpa_t addr, unsigned int len,
- unsigned long val)
-{
- u32 intid = addr & 0x0f;
- int i;
-
- for (i = 0; i < len; i++) {
- struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
-
- spin_lock(&irq->irq_lock);
-
- irq->source |= (val >> (i * 8)) & 0xff;
-
- if (irq->source) {
- irq->pending = true;
- vgic_queue_irq_unlock(vcpu->kvm, irq);
- } else {
- spin_unlock(&irq->irq_lock);
- }
- vgic_put_irq(vcpu->kvm, irq);
- }
-}
-
-static void vgic_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr)
-{
- if (kvm_vgic_global_state.type == VGIC_V2)
- vgic_v2_set_vmcr(vcpu, vmcr);
- else
- vgic_v3_set_vmcr(vcpu, vmcr);
-}
-
-static void vgic_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr)
-{
- if (kvm_vgic_global_state.type == VGIC_V2)
- vgic_v2_get_vmcr(vcpu, vmcr);
- else
- vgic_v3_get_vmcr(vcpu, vmcr);
-}
-
-#define GICC_ARCH_VERSION_V2 0x2
-
-/* These are for userland accesses only, there is no guest-facing emulation. */
-static unsigned long vgic_mmio_read_vcpuif(struct kvm_vcpu *vcpu,
- gpa_t addr, unsigned int len)
-{
- struct vgic_vmcr vmcr;
- u32 val;
-
- vgic_get_vmcr(vcpu, &vmcr);
-
- switch (addr & 0xff) {
- case GIC_CPU_CTRL:
- val = vmcr.ctlr;
- break;
- case GIC_CPU_PRIMASK:
- val = vmcr.pmr;
- break;
- case GIC_CPU_BINPOINT:
- val = vmcr.bpr;
- break;
- case GIC_CPU_ALIAS_BINPOINT:
- val = vmcr.abpr;
- break;
- case GIC_CPU_IDENT:
- val = ((PRODUCT_ID_KVM << 20) |
- (GICC_ARCH_VERSION_V2 << 16) |
- IMPLEMENTER_ARM);
- break;
- default:
- return 0;
- }
-
- return val;
-}
-
-static void vgic_mmio_write_vcpuif(struct kvm_vcpu *vcpu,
- gpa_t addr, unsigned int len,
- unsigned long val)
-{
- struct vgic_vmcr vmcr;
-
- vgic_get_vmcr(vcpu, &vmcr);
-
- switch (addr & 0xff) {
- case GIC_CPU_CTRL:
- vmcr.ctlr = val;
- break;
- case GIC_CPU_PRIMASK:
- vmcr.pmr = val;
- break;
- case GIC_CPU_BINPOINT:
- vmcr.bpr = val;
- break;
- case GIC_CPU_ALIAS_BINPOINT:
- vmcr.abpr = val;
- break;
- }
-
- vgic_set_vmcr(vcpu, &vmcr);
-}
-
-static const struct vgic_register_region vgic_v2_dist_registers[] = {
- REGISTER_DESC_WITH_LENGTH(GIC_DIST_CTRL,
- vgic_mmio_read_v2_misc, vgic_mmio_write_v2_misc, 12,
- VGIC_ACCESS_32bit),
- REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_IGROUP,
- vgic_mmio_read_rao, vgic_mmio_write_wi, 1,
- VGIC_ACCESS_32bit),
- REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_ENABLE_SET,
- vgic_mmio_read_enable, vgic_mmio_write_senable, 1,
- VGIC_ACCESS_32bit),
- REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_ENABLE_CLEAR,
- vgic_mmio_read_enable, vgic_mmio_write_cenable, 1,
- VGIC_ACCESS_32bit),
- REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_PENDING_SET,
- vgic_mmio_read_pending, vgic_mmio_write_spending, 1,
- VGIC_ACCESS_32bit),
- REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_PENDING_CLEAR,
- vgic_mmio_read_pending, vgic_mmio_write_cpending, 1,
- VGIC_ACCESS_32bit),
- REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_ACTIVE_SET,
- vgic_mmio_read_active, vgic_mmio_write_sactive, 1,
- VGIC_ACCESS_32bit),
- REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_ACTIVE_CLEAR,
- vgic_mmio_read_active, vgic_mmio_write_cactive, 1,
- VGIC_ACCESS_32bit),
- REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_PRI,
- vgic_mmio_read_priority, vgic_mmio_write_priority, 8,
- VGIC_ACCESS_32bit | VGIC_ACCESS_8bit),
- REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_TARGET,
- vgic_mmio_read_target, vgic_mmio_write_target, 8,
- VGIC_ACCESS_32bit | VGIC_ACCESS_8bit),
- REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_CONFIG,
- vgic_mmio_read_config, vgic_mmio_write_config, 2,
- VGIC_ACCESS_32bit),
- REGISTER_DESC_WITH_LENGTH(GIC_DIST_SOFTINT,
- vgic_mmio_read_raz, vgic_mmio_write_sgir, 4,
- VGIC_ACCESS_32bit),
- REGISTER_DESC_WITH_LENGTH(GIC_DIST_SGI_PENDING_CLEAR,
- vgic_mmio_read_sgipend, vgic_mmio_write_sgipendc, 16,
- VGIC_ACCESS_32bit | VGIC_ACCESS_8bit),
- REGISTER_DESC_WITH_LENGTH(GIC_DIST_SGI_PENDING_SET,
- vgic_mmio_read_sgipend, vgic_mmio_write_sgipends, 16,
- VGIC_ACCESS_32bit | VGIC_ACCESS_8bit),
-};
-
-static const struct vgic_register_region vgic_v2_cpu_registers[] = {
- REGISTER_DESC_WITH_LENGTH(GIC_CPU_CTRL,
- vgic_mmio_read_vcpuif, vgic_mmio_write_vcpuif, 4,
- VGIC_ACCESS_32bit),
- REGISTER_DESC_WITH_LENGTH(GIC_CPU_PRIMASK,
- vgic_mmio_read_vcpuif, vgic_mmio_write_vcpuif, 4,
- VGIC_ACCESS_32bit),
- REGISTER_DESC_WITH_LENGTH(GIC_CPU_BINPOINT,
- vgic_mmio_read_vcpuif, vgic_mmio_write_vcpuif, 4,
- VGIC_ACCESS_32bit),
- REGISTER_DESC_WITH_LENGTH(GIC_CPU_ALIAS_BINPOINT,
- vgic_mmio_read_vcpuif, vgic_mmio_write_vcpuif, 4,
- VGIC_ACCESS_32bit),
- REGISTER_DESC_WITH_LENGTH(GIC_CPU_ACTIVEPRIO,
- vgic_mmio_read_raz, vgic_mmio_write_wi, 16,
- VGIC_ACCESS_32bit),
- REGISTER_DESC_WITH_LENGTH(GIC_CPU_IDENT,
- vgic_mmio_read_vcpuif, vgic_mmio_write_vcpuif, 4,
- VGIC_ACCESS_32bit),
-};
-
-unsigned int vgic_v2_init_dist_iodev(struct vgic_io_device *dev)
-{
- dev->regions = vgic_v2_dist_registers;
- dev->nr_regions = ARRAY_SIZE(vgic_v2_dist_registers);
-
- kvm_iodevice_init(&dev->dev, &kvm_io_gic_ops);
-
- return SZ_4K;
-}
-
-int vgic_v2_has_attr_regs(struct kvm_device *dev, struct kvm_device_attr *attr)
-{
- int nr_irqs = dev->kvm->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS;
- const struct vgic_register_region *regions;
- gpa_t addr;
- int nr_regions, i, len;
-
- addr = attr->attr & KVM_DEV_ARM_VGIC_OFFSET_MASK;
-
- switch (attr->group) {
- case KVM_DEV_ARM_VGIC_GRP_DIST_REGS:
- regions = vgic_v2_dist_registers;
- nr_regions = ARRAY_SIZE(vgic_v2_dist_registers);
- break;
- case KVM_DEV_ARM_VGIC_GRP_CPU_REGS:
- regions = vgic_v2_cpu_registers;
- nr_regions = ARRAY_SIZE(vgic_v2_cpu_registers);
- break;
- default:
- return -ENXIO;
- }
-
- /* We only support aligned 32-bit accesses. */
- if (addr & 3)
- return -ENXIO;
-
- for (i = 0; i < nr_regions; i++) {
- if (regions[i].bits_per_irq)
- len = (regions[i].bits_per_irq * nr_irqs) / 8;
- else
- len = regions[i].len;
-
- if (regions[i].reg_offset <= addr &&
- regions[i].reg_offset + len > addr)
- return 0;
- }
-
- return -ENXIO;
-}
-
-/*
- * When userland tries to access the VGIC register handlers, we need to
- * create a usable struct vgic_io_device to be passed to the handlers and we
- * have to set up a buffer similar to what would have happened if a guest MMIO
- * access occurred, including doing endian conversions on BE systems.
- */
-static int vgic_uaccess(struct kvm_vcpu *vcpu, struct vgic_io_device *dev,
- bool is_write, int offset, u32 *val)
-{
- unsigned int len = 4;
- u8 buf[4];
- int ret;
-
- if (is_write) {
- vgic_data_host_to_mmio_bus(buf, len, *val);
- ret = kvm_io_gic_ops.write(vcpu, &dev->dev, offset, len, buf);
- } else {
- ret = kvm_io_gic_ops.read(vcpu, &dev->dev, offset, len, buf);
- if (!ret)
- *val = vgic_data_mmio_bus_to_host(buf, len);
- }
-
- return ret;
-}
-
-int vgic_v2_cpuif_uaccess(struct kvm_vcpu *vcpu, bool is_write,
- int offset, u32 *val)
-{
- struct vgic_io_device dev = {
- .regions = vgic_v2_cpu_registers,
- .nr_regions = ARRAY_SIZE(vgic_v2_cpu_registers),
- .iodev_type = IODEV_CPUIF,
- };
-
- return vgic_uaccess(vcpu, &dev, is_write, offset, val);
-}
-
-int vgic_v2_dist_uaccess(struct kvm_vcpu *vcpu, bool is_write,
- int offset, u32 *val)
-{
- struct vgic_io_device dev = {
- .regions = vgic_v2_dist_registers,
- .nr_regions = ARRAY_SIZE(vgic_v2_dist_registers),
- .iodev_type = IODEV_DIST,
- };
-
- return vgic_uaccess(vcpu, &dev, is_write, offset, val);
-}
diff --git a/virt/kvm/arm/vgic/vgic-mmio-v3.c b/virt/kvm/arm/vgic/vgic-mmio-v3.c
deleted file mode 100644
index 0d3c76a..0000000
--- a/virt/kvm/arm/vgic/vgic-mmio-v3.c
+++ /dev/null
@@ -1,656 +0,0 @@
-/*
- * VGICv3 MMIO handling functions
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- */
-
-#include <linux/irqchip/arm-gic-v3.h>
-#include <linux/kvm.h>
-#include <linux/kvm_host.h>
-#include <kvm/iodev.h>
-#include <kvm/arm_vgic.h>
-
-#include <asm/kvm_emulate.h>
-
-#include "vgic.h"
-#include "vgic-mmio.h"
-
-/* extract @num bytes at @offset bytes offset in data */
-unsigned long extract_bytes(u64 data, unsigned int offset,
- unsigned int num)
-{
- return (data >> (offset * 8)) & GENMASK_ULL(num * 8 - 1, 0);
-}
-
-/* allows updates of any half of a 64-bit register (or the whole thing) */
-u64 update_64bit_reg(u64 reg, unsigned int offset, unsigned int len,
- unsigned long val)
-{
- int lower = (offset & 4) * 8;
- int upper = lower + 8 * len - 1;
-
- reg &= ~GENMASK_ULL(upper, lower);
- val &= GENMASK_ULL(len * 8 - 1, 0);
-
- return reg | ((u64)val << lower);
-}
-
-#ifdef CONFIG_KVM_ARM_VGIC_V3_ITS
-bool vgic_has_its(struct kvm *kvm)
-{
- struct vgic_dist *dist = &kvm->arch.vgic;
-
- if (dist->vgic_model != KVM_DEV_TYPE_ARM_VGIC_V3)
- return false;
-
- return dist->has_its;
-}
-#endif
-
-static unsigned long vgic_mmio_read_v3_misc(struct kvm_vcpu *vcpu,
- gpa_t addr, unsigned int len)
-{
- u32 value = 0;
-
- switch (addr & 0x0c) {
- case GICD_CTLR:
- if (vcpu->kvm->arch.vgic.enabled)
- value |= GICD_CTLR_ENABLE_SS_G1;
- value |= GICD_CTLR_ARE_NS | GICD_CTLR_DS;
- break;
- case GICD_TYPER:
- value = vcpu->kvm->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS;
- value = (value >> 5) - 1;
- if (vgic_has_its(vcpu->kvm)) {
- value |= (INTERRUPT_ID_BITS_ITS - 1) << 19;
- value |= GICD_TYPER_LPIS;
- } else {
- value |= (INTERRUPT_ID_BITS_SPIS - 1) << 19;
- }
- break;
- case GICD_IIDR:
- value = (PRODUCT_ID_KVM << 24) | (IMPLEMENTER_ARM << 0);
- break;
- default:
- return 0;
- }
-
- return value;
-}
-
-static void vgic_mmio_write_v3_misc(struct kvm_vcpu *vcpu,
- gpa_t addr, unsigned int len,
- unsigned long val)
-{
- struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
- bool was_enabled = dist->enabled;
-
- switch (addr & 0x0c) {
- case GICD_CTLR:
- dist->enabled = val & GICD_CTLR_ENABLE_SS_G1;
-
- if (!was_enabled && dist->enabled)
- vgic_kick_vcpus(vcpu->kvm);
- break;
- case GICD_TYPER:
- case GICD_IIDR:
- return;
- }
-}
-
-static unsigned long vgic_mmio_read_irouter(struct kvm_vcpu *vcpu,
- gpa_t addr, unsigned int len)
-{
- int intid = VGIC_ADDR_TO_INTID(addr, 64);
- struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, NULL, intid);
- unsigned long ret = 0;
-
- if (!irq)
- return 0;
-
- /* The upper word is RAZ for us. */
- if (!(addr & 4))
- ret = extract_bytes(READ_ONCE(irq->mpidr), addr & 7, len);
-
- vgic_put_irq(vcpu->kvm, irq);
- return ret;
-}
-
-static void vgic_mmio_write_irouter(struct kvm_vcpu *vcpu,
- gpa_t addr, unsigned int len,
- unsigned long val)
-{
- int intid = VGIC_ADDR_TO_INTID(addr, 64);
- struct vgic_irq *irq;
-
- /* The upper word is WI for us since we don't implement Aff3. */
- if (addr & 4)
- return;
-
- irq = vgic_get_irq(vcpu->kvm, NULL, intid);
-
- if (!irq)
- return;
-
- spin_lock(&irq->irq_lock);
-
- /* We only care about and preserve Aff0, Aff1 and Aff2. */
- irq->mpidr = val & GENMASK(23, 0);
- irq->target_vcpu = kvm_mpidr_to_vcpu(vcpu->kvm, irq->mpidr);
-
- spin_unlock(&irq->irq_lock);
- vgic_put_irq(vcpu->kvm, irq);
-}
-
-static unsigned long vgic_mmio_read_v3r_ctlr(struct kvm_vcpu *vcpu,
- gpa_t addr, unsigned int len)
-{
- struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
-
- return vgic_cpu->lpis_enabled ? GICR_CTLR_ENABLE_LPIS : 0;
-}
-
-
-static void vgic_mmio_write_v3r_ctlr(struct kvm_vcpu *vcpu,
- gpa_t addr, unsigned int len,
- unsigned long val)
-{
- struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
- bool was_enabled = vgic_cpu->lpis_enabled;
-
- if (!vgic_has_its(vcpu->kvm))
- return;
-
- vgic_cpu->lpis_enabled = val & GICR_CTLR_ENABLE_LPIS;
-
- if (!was_enabled && vgic_cpu->lpis_enabled)
- vgic_enable_lpis(vcpu);
-}
-
-static unsigned long vgic_mmio_read_v3r_typer(struct kvm_vcpu *vcpu,
- gpa_t addr, unsigned int len)
-{
- unsigned long mpidr = kvm_vcpu_get_mpidr_aff(vcpu);
- int target_vcpu_id = vcpu->vcpu_id;
- u64 value;
-
- value = (u64)(mpidr & GENMASK(23, 0)) << 32;
- value |= ((target_vcpu_id & 0xffff) << 8);
- if (target_vcpu_id == atomic_read(&vcpu->kvm->online_vcpus) - 1)
- value |= GICR_TYPER_LAST;
- if (vgic_has_its(vcpu->kvm))
- value |= GICR_TYPER_PLPIS;
-
- return extract_bytes(value, addr & 7, len);
-}
-
-static unsigned long vgic_mmio_read_v3r_iidr(struct kvm_vcpu *vcpu,
- gpa_t addr, unsigned int len)
-{
- return (PRODUCT_ID_KVM << 24) | (IMPLEMENTER_ARM << 0);
-}
-
-static unsigned long vgic_mmio_read_v3_idregs(struct kvm_vcpu *vcpu,
- gpa_t addr, unsigned int len)
-{
- switch (addr & 0xffff) {
- case GICD_PIDR2:
- /* report a GICv3 compliant implementation */
- return 0x3b;
- }
-
- return 0;
-}
-
-/* We want to avoid outer shareable. */
-u64 vgic_sanitise_shareability(u64 field)
-{
- switch (field) {
- case GIC_BASER_OuterShareable:
- return GIC_BASER_InnerShareable;
- default:
- return field;
- }
-}
-
-/* Avoid any inner non-cacheable mapping. */
-u64 vgic_sanitise_inner_cacheability(u64 field)
-{
- switch (field) {
- case GIC_BASER_CACHE_nCnB:
- case GIC_BASER_CACHE_nC:
- return GIC_BASER_CACHE_RaWb;
- default:
- return field;
- }
-}
-
-/* Non-cacheable or same-as-inner are OK. */
-u64 vgic_sanitise_outer_cacheability(u64 field)
-{
- switch (field) {
- case GIC_BASER_CACHE_SameAsInner:
- case GIC_BASER_CACHE_nC:
- return field;
- default:
- return GIC_BASER_CACHE_nC;
- }
-}
-
-u64 vgic_sanitise_field(u64 reg, u64 field_mask, int field_shift,
- u64 (*sanitise_fn)(u64))
-{
- u64 field = (reg & field_mask) >> field_shift;
-
- field = sanitise_fn(field) << field_shift;
- return (reg & ~field_mask) | field;
-}
-
-#define PROPBASER_RES0_MASK \
- (GENMASK_ULL(63, 59) | GENMASK_ULL(55, 52) | GENMASK_ULL(6, 5))
-#define PENDBASER_RES0_MASK \
- (BIT_ULL(63) | GENMASK_ULL(61, 59) | GENMASK_ULL(55, 52) | \
- GENMASK_ULL(15, 12) | GENMASK_ULL(6, 0))
-
-static u64 vgic_sanitise_pendbaser(u64 reg)
-{
- reg = vgic_sanitise_field(reg, GICR_PENDBASER_SHAREABILITY_MASK,
- GICR_PENDBASER_SHAREABILITY_SHIFT,
- vgic_sanitise_shareability);
- reg = vgic_sanitise_field(reg, GICR_PENDBASER_INNER_CACHEABILITY_MASK,
- GICR_PENDBASER_INNER_CACHEABILITY_SHIFT,
- vgic_sanitise_inner_cacheability);
- reg = vgic_sanitise_field(reg, GICR_PENDBASER_OUTER_CACHEABILITY_MASK,
- GICR_PENDBASER_OUTER_CACHEABILITY_SHIFT,
- vgic_sanitise_outer_cacheability);
-
- reg &= ~PENDBASER_RES0_MASK;
- reg &= ~GENMASK_ULL(51, 48);
-
- return reg;
-}
-
-static u64 vgic_sanitise_propbaser(u64 reg)
-{
- reg = vgic_sanitise_field(reg, GICR_PROPBASER_SHAREABILITY_MASK,
- GICR_PROPBASER_SHAREABILITY_SHIFT,
- vgic_sanitise_shareability);
- reg = vgic_sanitise_field(reg, GICR_PROPBASER_INNER_CACHEABILITY_MASK,
- GICR_PROPBASER_INNER_CACHEABILITY_SHIFT,
- vgic_sanitise_inner_cacheability);
- reg = vgic_sanitise_field(reg, GICR_PROPBASER_OUTER_CACHEABILITY_MASK,
- GICR_PROPBASER_OUTER_CACHEABILITY_SHIFT,
- vgic_sanitise_outer_cacheability);
-
- reg &= ~PROPBASER_RES0_MASK;
- reg &= ~GENMASK_ULL(51, 48);
- return reg;
-}
-
-static unsigned long vgic_mmio_read_propbase(struct kvm_vcpu *vcpu,
- gpa_t addr, unsigned int len)
-{
- struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
-
- return extract_bytes(dist->propbaser, addr & 7, len);
-}
-
-static void vgic_mmio_write_propbase(struct kvm_vcpu *vcpu,
- gpa_t addr, unsigned int len,
- unsigned long val)
-{
- struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
- struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
- u64 old_propbaser, propbaser;
-
- /* Storing a value with LPIs already enabled is undefined */
- if (vgic_cpu->lpis_enabled)
- return;
-
- do {
- old_propbaser = dist->propbaser;
- propbaser = old_propbaser;
- propbaser = update_64bit_reg(propbaser, addr & 4, len, val);
- propbaser = vgic_sanitise_propbaser(propbaser);
- } while (cmpxchg64(&dist->propbaser, old_propbaser,
- propbaser) != old_propbaser);
-}
-
-static unsigned long vgic_mmio_read_pendbase(struct kvm_vcpu *vcpu,
- gpa_t addr, unsigned int len)
-{
- struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
-
- return extract_bytes(vgic_cpu->pendbaser, addr & 7, len);
-}
-
-static void vgic_mmio_write_pendbase(struct kvm_vcpu *vcpu,
- gpa_t addr, unsigned int len,
- unsigned long val)
-{
- struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
- u64 old_pendbaser, pendbaser;
-
- /* Storing a value with LPIs already enabled is undefined */
- if (vgic_cpu->lpis_enabled)
- return;
-
- do {
- old_pendbaser = vgic_cpu->pendbaser;
- pendbaser = old_pendbaser;
- pendbaser = update_64bit_reg(pendbaser, addr & 4, len, val);
- pendbaser = vgic_sanitise_pendbaser(pendbaser);
- } while (cmpxchg64(&vgic_cpu->pendbaser, old_pendbaser,
- pendbaser) != old_pendbaser);
-}
-
-/*
- * The GICv3 per-IRQ registers are split to control PPIs and SGIs in the
- * redistributors, while SPIs are covered by registers in the distributor
- * block. Trying to set private IRQs in this block gets ignored.
- * We take some special care here to fix the calculation of the register
- * offset.
- */
-#define REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(off, rd, wr, bpi, acc) \
- { \
- .reg_offset = off, \
- .bits_per_irq = bpi, \
- .len = (bpi * VGIC_NR_PRIVATE_IRQS) / 8, \
- .access_flags = acc, \
- .read = vgic_mmio_read_raz, \
- .write = vgic_mmio_write_wi, \
- }, { \
- .reg_offset = off + (bpi * VGIC_NR_PRIVATE_IRQS) / 8, \
- .bits_per_irq = bpi, \
- .len = (bpi * (1024 - VGIC_NR_PRIVATE_IRQS)) / 8, \
- .access_flags = acc, \
- .read = rd, \
- .write = wr, \
- }
-
-static const struct vgic_register_region vgic_v3_dist_registers[] = {
- REGISTER_DESC_WITH_LENGTH(GICD_CTLR,
- vgic_mmio_read_v3_misc, vgic_mmio_write_v3_misc, 16,
- VGIC_ACCESS_32bit),
- REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_IGROUPR,
- vgic_mmio_read_rao, vgic_mmio_write_wi, 1,
- VGIC_ACCESS_32bit),
- REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ISENABLER,
- vgic_mmio_read_enable, vgic_mmio_write_senable, 1,
- VGIC_ACCESS_32bit),
- REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ICENABLER,
- vgic_mmio_read_enable, vgic_mmio_write_cenable, 1,
- VGIC_ACCESS_32bit),
- REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ISPENDR,
- vgic_mmio_read_pending, vgic_mmio_write_spending, 1,
- VGIC_ACCESS_32bit),
- REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ICPENDR,
- vgic_mmio_read_pending, vgic_mmio_write_cpending, 1,
- VGIC_ACCESS_32bit),
- REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ISACTIVER,
- vgic_mmio_read_active, vgic_mmio_write_sactive, 1,
- VGIC_ACCESS_32bit),
- REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ICACTIVER,
- vgic_mmio_read_active, vgic_mmio_write_cactive, 1,
- VGIC_ACCESS_32bit),
- REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_IPRIORITYR,
- vgic_mmio_read_priority, vgic_mmio_write_priority, 8,
- VGIC_ACCESS_32bit | VGIC_ACCESS_8bit),
- REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ITARGETSR,
- vgic_mmio_read_raz, vgic_mmio_write_wi, 8,
- VGIC_ACCESS_32bit | VGIC_ACCESS_8bit),
- REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ICFGR,
- vgic_mmio_read_config, vgic_mmio_write_config, 2,
- VGIC_ACCESS_32bit),
- REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_IGRPMODR,
- vgic_mmio_read_raz, vgic_mmio_write_wi, 1,
- VGIC_ACCESS_32bit),
- REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_IROUTER,
- vgic_mmio_read_irouter, vgic_mmio_write_irouter, 64,
- VGIC_ACCESS_64bit | VGIC_ACCESS_32bit),
- REGISTER_DESC_WITH_LENGTH(GICD_IDREGS,
- vgic_mmio_read_v3_idregs, vgic_mmio_write_wi, 48,
- VGIC_ACCESS_32bit),
-};
-
-static const struct vgic_register_region vgic_v3_rdbase_registers[] = {
- REGISTER_DESC_WITH_LENGTH(GICR_CTLR,
- vgic_mmio_read_v3r_ctlr, vgic_mmio_write_v3r_ctlr, 4,
- VGIC_ACCESS_32bit),
- REGISTER_DESC_WITH_LENGTH(GICR_IIDR,
- vgic_mmio_read_v3r_iidr, vgic_mmio_write_wi, 4,
- VGIC_ACCESS_32bit),
- REGISTER_DESC_WITH_LENGTH(GICR_TYPER,
- vgic_mmio_read_v3r_typer, vgic_mmio_write_wi, 8,
- VGIC_ACCESS_64bit | VGIC_ACCESS_32bit),
- REGISTER_DESC_WITH_LENGTH(GICR_PROPBASER,
- vgic_mmio_read_propbase, vgic_mmio_write_propbase, 8,
- VGIC_ACCESS_64bit | VGIC_ACCESS_32bit),
- REGISTER_DESC_WITH_LENGTH(GICR_PENDBASER,
- vgic_mmio_read_pendbase, vgic_mmio_write_pendbase, 8,
- VGIC_ACCESS_64bit | VGIC_ACCESS_32bit),
- REGISTER_DESC_WITH_LENGTH(GICR_IDREGS,
- vgic_mmio_read_v3_idregs, vgic_mmio_write_wi, 48,
- VGIC_ACCESS_32bit),
-};
-
-static const struct vgic_register_region vgic_v3_sgibase_registers[] = {
- REGISTER_DESC_WITH_LENGTH(GICR_IGROUPR0,
- vgic_mmio_read_rao, vgic_mmio_write_wi, 4,
- VGIC_ACCESS_32bit),
- REGISTER_DESC_WITH_LENGTH(GICR_ISENABLER0,
- vgic_mmio_read_enable, vgic_mmio_write_senable, 4,
- VGIC_ACCESS_32bit),
- REGISTER_DESC_WITH_LENGTH(GICR_ICENABLER0,
- vgic_mmio_read_enable, vgic_mmio_write_cenable, 4,
- VGIC_ACCESS_32bit),
- REGISTER_DESC_WITH_LENGTH(GICR_ISPENDR0,
- vgic_mmio_read_pending, vgic_mmio_write_spending, 4,
- VGIC_ACCESS_32bit),
- REGISTER_DESC_WITH_LENGTH(GICR_ICPENDR0,
- vgic_mmio_read_pending, vgic_mmio_write_cpending, 4,
- VGIC_ACCESS_32bit),
- REGISTER_DESC_WITH_LENGTH(GICR_ISACTIVER0,
- vgic_mmio_read_active, vgic_mmio_write_sactive, 4,
- VGIC_ACCESS_32bit),
- REGISTER_DESC_WITH_LENGTH(GICR_ICACTIVER0,
- vgic_mmio_read_active, vgic_mmio_write_cactive, 4,
- VGIC_ACCESS_32bit),
- REGISTER_DESC_WITH_LENGTH(GICR_IPRIORITYR0,
- vgic_mmio_read_priority, vgic_mmio_write_priority, 32,
- VGIC_ACCESS_32bit | VGIC_ACCESS_8bit),
- REGISTER_DESC_WITH_LENGTH(GICR_ICFGR0,
- vgic_mmio_read_config, vgic_mmio_write_config, 8,
- VGIC_ACCESS_32bit),
- REGISTER_DESC_WITH_LENGTH(GICR_IGRPMODR0,
- vgic_mmio_read_raz, vgic_mmio_write_wi, 4,
- VGIC_ACCESS_32bit),
- REGISTER_DESC_WITH_LENGTH(GICR_NSACR,
- vgic_mmio_read_raz, vgic_mmio_write_wi, 4,
- VGIC_ACCESS_32bit),
-};
-
-unsigned int vgic_v3_init_dist_iodev(struct vgic_io_device *dev)
-{
- dev->regions = vgic_v3_dist_registers;
- dev->nr_regions = ARRAY_SIZE(vgic_v3_dist_registers);
-
- kvm_iodevice_init(&dev->dev, &kvm_io_gic_ops);
-
- return SZ_64K;
-}
-
-int vgic_register_redist_iodevs(struct kvm *kvm, gpa_t redist_base_address)
-{
- struct kvm_vcpu *vcpu;
- int c, ret = 0;
-
- kvm_for_each_vcpu(c, vcpu, kvm) {
- gpa_t rd_base = redist_base_address + c * SZ_64K * 2;
- gpa_t sgi_base = rd_base + SZ_64K;
- struct vgic_io_device *rd_dev = &vcpu->arch.vgic_cpu.rd_iodev;
- struct vgic_io_device *sgi_dev = &vcpu->arch.vgic_cpu.sgi_iodev;
-
- kvm_iodevice_init(&rd_dev->dev, &kvm_io_gic_ops);
- rd_dev->base_addr = rd_base;
- rd_dev->iodev_type = IODEV_REDIST;
- rd_dev->regions = vgic_v3_rdbase_registers;
- rd_dev->nr_regions = ARRAY_SIZE(vgic_v3_rdbase_registers);
- rd_dev->redist_vcpu = vcpu;
-
- mutex_lock(&kvm->slots_lock);
- ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, rd_base,
- SZ_64K, &rd_dev->dev);
- mutex_unlock(&kvm->slots_lock);
-
- if (ret)
- break;
-
- kvm_iodevice_init(&sgi_dev->dev, &kvm_io_gic_ops);
- sgi_dev->base_addr = sgi_base;
- sgi_dev->iodev_type = IODEV_REDIST;
- sgi_dev->regions = vgic_v3_sgibase_registers;
- sgi_dev->nr_regions = ARRAY_SIZE(vgic_v3_sgibase_registers);
- sgi_dev->redist_vcpu = vcpu;
-
- mutex_lock(&kvm->slots_lock);
- ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, sgi_base,
- SZ_64K, &sgi_dev->dev);
- mutex_unlock(&kvm->slots_lock);
- if (ret) {
- kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS,
- &rd_dev->dev);
- break;
- }
- }
-
- if (ret) {
- /* The current c failed, so we start with the previous one. */
- for (c--; c >= 0; c--) {
- struct vgic_cpu *vgic_cpu;
-
- vcpu = kvm_get_vcpu(kvm, c);
- vgic_cpu = &vcpu->arch.vgic_cpu;
- kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS,
- &vgic_cpu->rd_iodev.dev);
- kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS,
- &vgic_cpu->sgi_iodev.dev);
- }
- }
-
- return ret;
-}
-
-/*
- * Compare a given affinity (level 1-3 and a level 0 mask, from the SGI
- * generation register ICC_SGI1R_EL1) with a given VCPU.
- * If the VCPU's MPIDR matches, return the level0 affinity, otherwise
- * return -1.
- */
-static int match_mpidr(u64 sgi_aff, u16 sgi_cpu_mask, struct kvm_vcpu *vcpu)
-{
- unsigned long affinity;
- int level0;
-
- /*
- * Split the current VCPU's MPIDR into affinity level 0 and the
- * rest as this is what we have to compare against.
- */
- affinity = kvm_vcpu_get_mpidr_aff(vcpu);
- level0 = MPIDR_AFFINITY_LEVEL(affinity, 0);
- affinity &= ~MPIDR_LEVEL_MASK;
-
- /* bail out if the upper three levels don't match */
- if (sgi_aff != affinity)
- return -1;
-
- /* Is this VCPU's bit set in the mask ? */
- if (!(sgi_cpu_mask & BIT(level0)))
- return -1;
-
- return level0;
-}
-
-/*
- * The ICC_SGI* registers encode the affinity differently from the MPIDR,
- * so provide a wrapper to use the existing defines to isolate a certain
- * affinity level.
- */
-#define SGI_AFFINITY_LEVEL(reg, level) \
- ((((reg) & ICC_SGI1R_AFFINITY_## level ##_MASK) \
- >> ICC_SGI1R_AFFINITY_## level ##_SHIFT) << MPIDR_LEVEL_SHIFT(level))
-
-/**
- * vgic_v3_dispatch_sgi - handle SGI requests from VCPUs
- * @vcpu: The VCPU requesting a SGI
- * @reg: The value written into the ICC_SGI1R_EL1 register by that VCPU
- *
- * With GICv3 (and ARE=1) CPUs trigger SGIs by writing to a system register.
- * This will trap in sys_regs.c and call this function.
- * This ICC_SGI1R_EL1 register contains the upper three affinity levels of the
- * target processors as well as a bitmask of 16 Aff0 CPUs.
- * If the interrupt routing mode bit is not set, we iterate over all VCPUs to
- * check for matching ones. If this bit is set, we signal all, but not the
- * calling VCPU.
- */
-void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg)
-{
- struct kvm *kvm = vcpu->kvm;
- struct kvm_vcpu *c_vcpu;
- u16 target_cpus;
- u64 mpidr;
- int sgi, c;
- int vcpu_id = vcpu->vcpu_id;
- bool broadcast;
-
- sgi = (reg & ICC_SGI1R_SGI_ID_MASK) >> ICC_SGI1R_SGI_ID_SHIFT;
- broadcast = reg & BIT_ULL(ICC_SGI1R_IRQ_ROUTING_MODE_BIT);
- target_cpus = (reg & ICC_SGI1R_TARGET_LIST_MASK) >> ICC_SGI1R_TARGET_LIST_SHIFT;
- mpidr = SGI_AFFINITY_LEVEL(reg, 3);
- mpidr |= SGI_AFFINITY_LEVEL(reg, 2);
- mpidr |= SGI_AFFINITY_LEVEL(reg, 1);
-
- /*
- * We iterate over all VCPUs to find the MPIDRs matching the request.
- * If we have handled one CPU, we clear its bit to detect early
- * if we are already finished. This avoids iterating through all
- * VCPUs when most of the times we just signal a single VCPU.
- */
- kvm_for_each_vcpu(c, c_vcpu, kvm) {
- struct vgic_irq *irq;
-
- /* Exit early if we have dealt with all requested CPUs */
- if (!broadcast && target_cpus == 0)
- break;
-
- /* Don't signal the calling VCPU */
- if (broadcast && c == vcpu_id)
- continue;
-
- if (!broadcast) {
- int level0;
-
- level0 = match_mpidr(mpidr, target_cpus, c_vcpu);
- if (level0 == -1)
- continue;
-
- /* remove this matching VCPU from the mask */
- target_cpus &= ~BIT(level0);
- }
-
- irq = vgic_get_irq(vcpu->kvm, c_vcpu, sgi);
-
- spin_lock(&irq->irq_lock);
- irq->pending = true;
-
- vgic_queue_irq_unlock(vcpu->kvm, irq);
- vgic_put_irq(vcpu->kvm, irq);
- }
-}
diff --git a/virt/kvm/arm/vgic/vgic-mmio.c b/virt/kvm/arm/vgic/vgic-mmio.c
deleted file mode 100644
index ebe1b9f..0000000
--- a/virt/kvm/arm/vgic/vgic-mmio.c
+++ /dev/null
@@ -1,583 +0,0 @@
-/*
- * VGIC MMIO handling functions
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- */
-
-#include <linux/bitops.h>
-#include <linux/bsearch.h>
-#include <linux/kvm.h>
-#include <linux/kvm_host.h>
-#include <kvm/iodev.h>
-#include <kvm/arm_vgic.h>
-
-#include "vgic.h"
-#include "vgic-mmio.h"
-
-unsigned long vgic_mmio_read_raz(struct kvm_vcpu *vcpu,
- gpa_t addr, unsigned int len)
-{
- return 0;
-}
-
-unsigned long vgic_mmio_read_rao(struct kvm_vcpu *vcpu,
- gpa_t addr, unsigned int len)
-{
- return -1UL;
-}
-
-void vgic_mmio_write_wi(struct kvm_vcpu *vcpu, gpa_t addr,
- unsigned int len, unsigned long val)
-{
- /* Ignore */
-}
-
-/*
- * Read accesses to both GICD_ICENABLER and GICD_ISENABLER return the value
- * of the enabled bit, so there is only one function for both here.
- */
-unsigned long vgic_mmio_read_enable(struct kvm_vcpu *vcpu,
- gpa_t addr, unsigned int len)
-{
- u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
- u32 value = 0;
- int i;
-
- /* Loop over all IRQs affected by this read */
- for (i = 0; i < len * 8; i++) {
- struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
-
- if (irq->enabled)
- value |= (1U << i);
-
- vgic_put_irq(vcpu->kvm, irq);
- }
-
- return value;
-}
-
-void vgic_mmio_write_senable(struct kvm_vcpu *vcpu,
- gpa_t addr, unsigned int len,
- unsigned long val)
-{
- u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
- int i;
-
- for_each_set_bit(i, &val, len * 8) {
- struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
-
- spin_lock(&irq->irq_lock);
- irq->enabled = true;
- vgic_queue_irq_unlock(vcpu->kvm, irq);
-
- vgic_put_irq(vcpu->kvm, irq);
- }
-}
-
-void vgic_mmio_write_cenable(struct kvm_vcpu *vcpu,
- gpa_t addr, unsigned int len,
- unsigned long val)
-{
- u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
- int i;
-
- for_each_set_bit(i, &val, len * 8) {
- struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
-
- spin_lock(&irq->irq_lock);
-
- irq->enabled = false;
-
- spin_unlock(&irq->irq_lock);
- vgic_put_irq(vcpu->kvm, irq);
- }
-}
-
-unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu,
- gpa_t addr, unsigned int len)
-{
- u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
- u32 value = 0;
- int i;
-
- /* Loop over all IRQs affected by this read */
- for (i = 0; i < len * 8; i++) {
- struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
-
- if (irq->pending)
- value |= (1U << i);
-
- vgic_put_irq(vcpu->kvm, irq);
- }
-
- return value;
-}
-
-void vgic_mmio_write_spending(struct kvm_vcpu *vcpu,
- gpa_t addr, unsigned int len,
- unsigned long val)
-{
- u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
- int i;
-
- for_each_set_bit(i, &val, len * 8) {
- struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
-
- spin_lock(&irq->irq_lock);
- irq->pending = true;
- if (irq->config == VGIC_CONFIG_LEVEL)
- irq->soft_pending = true;
-
- vgic_queue_irq_unlock(vcpu->kvm, irq);
- vgic_put_irq(vcpu->kvm, irq);
- }
-}
-
-void vgic_mmio_write_cpending(struct kvm_vcpu *vcpu,
- gpa_t addr, unsigned int len,
- unsigned long val)
-{
- u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
- int i;
-
- for_each_set_bit(i, &val, len * 8) {
- struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
-
- spin_lock(&irq->irq_lock);
-
- if (irq->config == VGIC_CONFIG_LEVEL) {
- irq->soft_pending = false;
- irq->pending = irq->line_level;
- } else {
- irq->pending = false;
- }
-
- spin_unlock(&irq->irq_lock);
- vgic_put_irq(vcpu->kvm, irq);
- }
-}
-
-unsigned long vgic_mmio_read_active(struct kvm_vcpu *vcpu,
- gpa_t addr, unsigned int len)
-{
- u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
- u32 value = 0;
- int i;
-
- /* Loop over all IRQs affected by this read */
- for (i = 0; i < len * 8; i++) {
- struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
-
- if (irq->active)
- value |= (1U << i);
-
- vgic_put_irq(vcpu->kvm, irq);
- }
-
- return value;
-}
-
-static void vgic_mmio_change_active(struct kvm_vcpu *vcpu, struct vgic_irq *irq,
- bool new_active_state)
-{
- spin_lock(&irq->irq_lock);
- /*
- * If this virtual IRQ was written into a list register, we
- * have to make sure the CPU that runs the VCPU thread has
- * synced back LR state to the struct vgic_irq. We can only
- * know this for sure, when either this irq is not assigned to
- * anyone's AP list anymore, or the VCPU thread is not
- * running on any CPUs.
- *
- * In the opposite case, we know the VCPU thread may be on its
- * way back from the guest and still has to sync back this
- * IRQ, so we release and re-acquire the spin_lock to let the
- * other thread sync back the IRQ.
- */
- while (irq->vcpu && /* IRQ may have state in an LR somewhere */
- irq->vcpu->cpu != -1) /* VCPU thread is running */
- cond_resched_lock(&irq->irq_lock);
-
- irq->active = new_active_state;
- if (new_active_state)
- vgic_queue_irq_unlock(vcpu->kvm, irq);
- else
- spin_unlock(&irq->irq_lock);
-}
-
-/*
- * If we are fiddling with an IRQ's active state, we have to make sure the IRQ
- * is not queued on some running VCPU's LRs, because then the change to the
- * active state can be overwritten when the VCPU's state is synced coming back
- * from the guest.
- *
- * For shared interrupts, we have to stop all the VCPUs because interrupts can
- * be migrated while we don't hold the IRQ locks and we don't want to be
- * chasing moving targets.
- *
- * For private interrupts, we only have to make sure the single and only VCPU
- * that can potentially queue the IRQ is stopped.
- */
-static void vgic_change_active_prepare(struct kvm_vcpu *vcpu, u32 intid)
-{
- if (intid < VGIC_NR_PRIVATE_IRQS)
- kvm_arm_halt_vcpu(vcpu);
- else
- kvm_arm_halt_guest(vcpu->kvm);
-}
-
-/* See vgic_change_active_prepare */
-static void vgic_change_active_finish(struct kvm_vcpu *vcpu, u32 intid)
-{
- if (intid < VGIC_NR_PRIVATE_IRQS)
- kvm_arm_resume_vcpu(vcpu);
- else
- kvm_arm_resume_guest(vcpu->kvm);
-}
-
-void vgic_mmio_write_cactive(struct kvm_vcpu *vcpu,
- gpa_t addr, unsigned int len,
- unsigned long val)
-{
- u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
- int i;
-
- vgic_change_active_prepare(vcpu, intid);
- for_each_set_bit(i, &val, len * 8) {
- struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
- vgic_mmio_change_active(vcpu, irq, false);
- vgic_put_irq(vcpu->kvm, irq);
- }
- vgic_change_active_finish(vcpu, intid);
-}
-
-void vgic_mmio_write_sactive(struct kvm_vcpu *vcpu,
- gpa_t addr, unsigned int len,
- unsigned long val)
-{
- u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
- int i;
-
- vgic_change_active_prepare(vcpu, intid);
- for_each_set_bit(i, &val, len * 8) {
- struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
- vgic_mmio_change_active(vcpu, irq, true);
- vgic_put_irq(vcpu->kvm, irq);
- }
- vgic_change_active_finish(vcpu, intid);
-}
-
-unsigned long vgic_mmio_read_priority(struct kvm_vcpu *vcpu,
- gpa_t addr, unsigned int len)
-{
- u32 intid = VGIC_ADDR_TO_INTID(addr, 8);
- int i;
- u64 val = 0;
-
- for (i = 0; i < len; i++) {
- struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
-
- val |= (u64)irq->priority << (i * 8);
-
- vgic_put_irq(vcpu->kvm, irq);
- }
-
- return val;
-}
-
-/*
- * We currently don't handle changing the priority of an interrupt that
- * is already pending on a VCPU. If there is a need for this, we would
- * need to make this VCPU exit and re-evaluate the priorities, potentially
- * leading to this interrupt getting presented now to the guest (if it has
- * been masked by the priority mask before).
- */
-void vgic_mmio_write_priority(struct kvm_vcpu *vcpu,
- gpa_t addr, unsigned int len,
- unsigned long val)
-{
- u32 intid = VGIC_ADDR_TO_INTID(addr, 8);
- int i;
-
- for (i = 0; i < len; i++) {
- struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
-
- spin_lock(&irq->irq_lock);
- /* Narrow the priority range to what we actually support */
- irq->priority = (val >> (i * 8)) & GENMASK(7, 8 - VGIC_PRI_BITS);
- spin_unlock(&irq->irq_lock);
-
- vgic_put_irq(vcpu->kvm, irq);
- }
-}
-
-unsigned long vgic_mmio_read_config(struct kvm_vcpu *vcpu,
- gpa_t addr, unsigned int len)
-{
- u32 intid = VGIC_ADDR_TO_INTID(addr, 2);
- u32 value = 0;
- int i;
-
- for (i = 0; i < len * 4; i++) {
- struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
-
- if (irq->config == VGIC_CONFIG_EDGE)
- value |= (2U << (i * 2));
-
- vgic_put_irq(vcpu->kvm, irq);
- }
-
- return value;
-}
-
-void vgic_mmio_write_config(struct kvm_vcpu *vcpu,
- gpa_t addr, unsigned int len,
- unsigned long val)
-{
- u32 intid = VGIC_ADDR_TO_INTID(addr, 2);
- int i;
-
- for (i = 0; i < len * 4; i++) {
- struct vgic_irq *irq;
-
- /*
- * The configuration cannot be changed for SGIs in general,
- * for PPIs this is IMPLEMENTATION DEFINED. The arch timer
- * code relies on PPIs being level triggered, so we also
- * make them read-only here.
- */
- if (intid + i < VGIC_NR_PRIVATE_IRQS)
- continue;
-
- irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
- spin_lock(&irq->irq_lock);
-
- if (test_bit(i * 2 + 1, &val)) {
- irq->config = VGIC_CONFIG_EDGE;
- } else {
- irq->config = VGIC_CONFIG_LEVEL;
- irq->pending = irq->line_level | irq->soft_pending;
- }
-
- spin_unlock(&irq->irq_lock);
- vgic_put_irq(vcpu->kvm, irq);
- }
-}
-
-static int match_region(const void *key, const void *elt)
-{
- const unsigned int offset = (unsigned long)key;
- const struct vgic_register_region *region = elt;
-
- if (offset < region->reg_offset)
- return -1;
-
- if (offset >= region->reg_offset + region->len)
- return 1;
-
- return 0;
-}
-
-/* Find the proper register handler entry given a certain address offset. */
-static const struct vgic_register_region *
-vgic_find_mmio_region(const struct vgic_register_region *region, int nr_regions,
- unsigned int offset)
-{
- return bsearch((void *)(uintptr_t)offset, region, nr_regions,
- sizeof(region[0]), match_region);
-}
-
-/*
- * kvm_mmio_read_buf() returns a value in a format where it can be converted
- * to a byte array and be directly observed as the guest wanted it to appear
- * in memory if it had done the store itself, which is LE for the GIC, as the
- * guest knows the GIC is always LE.
- *
- * We convert this value to the CPUs native format to deal with it as a data
- * value.
- */
-unsigned long vgic_data_mmio_bus_to_host(const void *val, unsigned int len)
-{
- unsigned long data = kvm_mmio_read_buf(val, len);
-
- switch (len) {
- case 1:
- return data;
- case 2:
- return le16_to_cpu(data);
- case 4:
- return le32_to_cpu(data);
- default:
- return le64_to_cpu(data);
- }
-}
-
-/*
- * kvm_mmio_write_buf() expects a value in a format such that if converted to
- * a byte array it is observed as the guest would see it if it could perform
- * the load directly. Since the GIC is LE, and the guest knows this, the
- * guest expects a value in little endian format.
- *
- * We convert the data value from the CPUs native format to LE so that the
- * value is returned in the proper format.
- */
-void vgic_data_host_to_mmio_bus(void *buf, unsigned int len,
- unsigned long data)
-{
- switch (len) {
- case 1:
- break;
- case 2:
- data = cpu_to_le16(data);
- break;
- case 4:
- data = cpu_to_le32(data);
- break;
- default:
- data = cpu_to_le64(data);
- }
-
- kvm_mmio_write_buf(buf, len, data);
-}
-
-static
-struct vgic_io_device *kvm_to_vgic_iodev(const struct kvm_io_device *dev)
-{
- return container_of(dev, struct vgic_io_device, dev);
-}
-
-static bool check_region(const struct kvm *kvm,
- const struct vgic_register_region *region,
- gpa_t addr, int len)
-{
- int flags, nr_irqs = kvm->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS;
-
- switch (len) {
- case sizeof(u8):
- flags = VGIC_ACCESS_8bit;
- break;
- case sizeof(u32):
- flags = VGIC_ACCESS_32bit;
- break;
- case sizeof(u64):
- flags = VGIC_ACCESS_64bit;
- break;
- default:
- return false;
- }
-
- if ((region->access_flags & flags) && IS_ALIGNED(addr, len)) {
- if (!region->bits_per_irq)
- return true;
-
- /* Do we access a non-allocated IRQ? */
- return VGIC_ADDR_TO_INTID(addr, region->bits_per_irq) < nr_irqs;
- }
-
- return false;
-}
-
-static int dispatch_mmio_read(struct kvm_vcpu *vcpu, struct kvm_io_device *dev,
- gpa_t addr, int len, void *val)
-{
- struct vgic_io_device *iodev = kvm_to_vgic_iodev(dev);
- const struct vgic_register_region *region;
- unsigned long data = 0;
-
- region = vgic_find_mmio_region(iodev->regions, iodev->nr_regions,
- addr - iodev->base_addr);
- if (!region || !check_region(vcpu->kvm, region, addr, len)) {
- memset(val, 0, len);
- return 0;
- }
-
- switch (iodev->iodev_type) {
- case IODEV_CPUIF:
- data = region->read(vcpu, addr, len);
- break;
- case IODEV_DIST:
- data = region->read(vcpu, addr, len);
- break;
- case IODEV_REDIST:
- data = region->read(iodev->redist_vcpu, addr, len);
- break;
- case IODEV_ITS:
- data = region->its_read(vcpu->kvm, iodev->its, addr, len);
- break;
- }
-
- vgic_data_host_to_mmio_bus(val, len, data);
- return 0;
-}
-
-static int dispatch_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *dev,
- gpa_t addr, int len, const void *val)
-{
- struct vgic_io_device *iodev = kvm_to_vgic_iodev(dev);
- const struct vgic_register_region *region;
- unsigned long data = vgic_data_mmio_bus_to_host(val, len);
-
- region = vgic_find_mmio_region(iodev->regions, iodev->nr_regions,
- addr - iodev->base_addr);
- if (!region || !check_region(vcpu->kvm, region, addr, len))
- return 0;
-
- switch (iodev->iodev_type) {
- case IODEV_CPUIF:
- region->write(vcpu, addr, len, data);
- break;
- case IODEV_DIST:
- region->write(vcpu, addr, len, data);
- break;
- case IODEV_REDIST:
- region->write(iodev->redist_vcpu, addr, len, data);
- break;
- case IODEV_ITS:
- region->its_write(vcpu->kvm, iodev->its, addr, len, data);
- break;
- }
-
- return 0;
-}
-
-struct kvm_io_device_ops kvm_io_gic_ops = {
- .read = dispatch_mmio_read,
- .write = dispatch_mmio_write,
-};
-
-int vgic_register_dist_iodev(struct kvm *kvm, gpa_t dist_base_address,
- enum vgic_type type)
-{
- struct vgic_io_device *io_device = &kvm->arch.vgic.dist_iodev;
- int ret = 0;
- unsigned int len;
-
- switch (type) {
- case VGIC_V2:
- len = vgic_v2_init_dist_iodev(io_device);
- break;
- case VGIC_V3:
- len = vgic_v3_init_dist_iodev(io_device);
- break;
- default:
- BUG_ON(1);
- }
-
- io_device->base_addr = dist_base_address;
- io_device->iodev_type = IODEV_DIST;
- io_device->redist_vcpu = NULL;
-
- mutex_lock(&kvm->slots_lock);
- ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, dist_base_address,
- len, &io_device->dev);
- mutex_unlock(&kvm->slots_lock);
-
- return ret;
-}
diff --git a/virt/kvm/arm/vgic/vgic-mmio.h b/virt/kvm/arm/vgic/vgic-mmio.h
deleted file mode 100644
index 84961b4..0000000
--- a/virt/kvm/arm/vgic/vgic-mmio.h
+++ /dev/null
@@ -1,171 +0,0 @@
-/*
- * Copyright (C) 2015, 2016 ARM Ltd.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-#ifndef __KVM_ARM_VGIC_MMIO_H__
-#define __KVM_ARM_VGIC_MMIO_H__
-
-struct vgic_register_region {
- unsigned int reg_offset;
- unsigned int len;
- unsigned int bits_per_irq;
- unsigned int access_flags;
- union {
- unsigned long (*read)(struct kvm_vcpu *vcpu, gpa_t addr,
- unsigned int len);
- unsigned long (*its_read)(struct kvm *kvm, struct vgic_its *its,
- gpa_t addr, unsigned int len);
- };
- union {
- void (*write)(struct kvm_vcpu *vcpu, gpa_t addr,
- unsigned int len, unsigned long val);
- void (*its_write)(struct kvm *kvm, struct vgic_its *its,
- gpa_t addr, unsigned int len,
- unsigned long val);
- };
-};
-
-extern struct kvm_io_device_ops kvm_io_gic_ops;
-
-#define VGIC_ACCESS_8bit 1
-#define VGIC_ACCESS_32bit 2
-#define VGIC_ACCESS_64bit 4
-
-/*
- * Generate a mask that covers the number of bytes required to address
- * up to 1024 interrupts, each represented by <bits> bits. This assumes
- * that <bits> is a power of two.
- */
-#define VGIC_ADDR_IRQ_MASK(bits) (((bits) * 1024 / 8) - 1)
-
-/*
- * (addr & mask) gives us the _byte_ offset for the INT ID.
- * We multiply this by 8 the get the _bit_ offset, then divide this by
- * the number of bits to learn the actual INT ID.
- * But instead of a division (which requires a "long long div" implementation),
- * we shift by the binary logarithm of <bits>.
- * This assumes that <bits> is a power of two.
- */
-#define VGIC_ADDR_TO_INTID(addr, bits) (((addr) & VGIC_ADDR_IRQ_MASK(bits)) * \
- 8 >> ilog2(bits))
-
-/*
- * Some VGIC registers store per-IRQ information, with a different number
- * of bits per IRQ. For those registers this macro is used.
- * The _WITH_LENGTH version instantiates registers with a fixed length
- * and is mutually exclusive with the _PER_IRQ version.
- */
-#define REGISTER_DESC_WITH_BITS_PER_IRQ(off, rd, wr, bpi, acc) \
- { \
- .reg_offset = off, \
- .bits_per_irq = bpi, \
- .len = bpi * 1024 / 8, \
- .access_flags = acc, \
- .read = rd, \
- .write = wr, \
- }
-
-#define REGISTER_DESC_WITH_LENGTH(off, rd, wr, length, acc) \
- { \
- .reg_offset = off, \
- .bits_per_irq = 0, \
- .len = length, \
- .access_flags = acc, \
- .read = rd, \
- .write = wr, \
- }
-
-int kvm_vgic_register_mmio_region(struct kvm *kvm, struct kvm_vcpu *vcpu,
- struct vgic_register_region *reg_desc,
- struct vgic_io_device *region,
- int nr_irqs, bool offset_private);
-
-unsigned long vgic_data_mmio_bus_to_host(const void *val, unsigned int len);
-
-void vgic_data_host_to_mmio_bus(void *buf, unsigned int len,
- unsigned long data);
-
-unsigned long extract_bytes(u64 data, unsigned int offset,
- unsigned int num);
-
-u64 update_64bit_reg(u64 reg, unsigned int offset, unsigned int len,
- unsigned long val);
-
-unsigned long vgic_mmio_read_raz(struct kvm_vcpu *vcpu,
- gpa_t addr, unsigned int len);
-
-unsigned long vgic_mmio_read_rao(struct kvm_vcpu *vcpu,
- gpa_t addr, unsigned int len);
-
-void vgic_mmio_write_wi(struct kvm_vcpu *vcpu, gpa_t addr,
- unsigned int len, unsigned long val);
-
-unsigned long vgic_mmio_read_enable(struct kvm_vcpu *vcpu,
- gpa_t addr, unsigned int len);
-
-void vgic_mmio_write_senable(struct kvm_vcpu *vcpu,
- gpa_t addr, unsigned int len,
- unsigned long val);
-
-void vgic_mmio_write_cenable(struct kvm_vcpu *vcpu,
- gpa_t addr, unsigned int len,
- unsigned long val);
-
-unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu,
- gpa_t addr, unsigned int len);
-
-void vgic_mmio_write_spending(struct kvm_vcpu *vcpu,
- gpa_t addr, unsigned int len,
- unsigned long val);
-
-void vgic_mmio_write_cpending(struct kvm_vcpu *vcpu,
- gpa_t addr, unsigned int len,
- unsigned long val);
-
-unsigned long vgic_mmio_read_active(struct kvm_vcpu *vcpu,
- gpa_t addr, unsigned int len);
-
-void vgic_mmio_write_cactive(struct kvm_vcpu *vcpu,
- gpa_t addr, unsigned int len,
- unsigned long val);
-
-void vgic_mmio_write_sactive(struct kvm_vcpu *vcpu,
- gpa_t addr, unsigned int len,
- unsigned long val);
-
-unsigned long vgic_mmio_read_priority(struct kvm_vcpu *vcpu,
- gpa_t addr, unsigned int len);
-
-void vgic_mmio_write_priority(struct kvm_vcpu *vcpu,
- gpa_t addr, unsigned int len,
- unsigned long val);
-
-unsigned long vgic_mmio_read_config(struct kvm_vcpu *vcpu,
- gpa_t addr, unsigned int len);
-
-void vgic_mmio_write_config(struct kvm_vcpu *vcpu,
- gpa_t addr, unsigned int len,
- unsigned long val);
-
-unsigned int vgic_v2_init_dist_iodev(struct vgic_io_device *dev);
-
-unsigned int vgic_v3_init_dist_iodev(struct vgic_io_device *dev);
-
-u64 vgic_sanitise_outer_cacheability(u64 reg);
-u64 vgic_sanitise_inner_cacheability(u64 reg);
-u64 vgic_sanitise_shareability(u64 reg);
-u64 vgic_sanitise_field(u64 reg, u64 field_mask, int field_shift,
- u64 (*sanitise_fn)(u64));
-
-#endif
diff --git a/virt/kvm/arm/vgic/vgic-v2.c b/virt/kvm/arm/vgic/vgic-v2.c
deleted file mode 100644
index 0a063af..0000000
--- a/virt/kvm/arm/vgic/vgic-v2.c
+++ /dev/null
@@ -1,379 +0,0 @@
-/*
- * Copyright (C) 2015, 2016 ARM Ltd.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include <linux/irqchip/arm-gic.h>
-#include <linux/kvm.h>
-#include <linux/kvm_host.h>
-#include <kvm/arm_vgic.h>
-#include <asm/kvm_mmu.h>
-
-#include "vgic.h"
-
-/*
- * Call this function to convert a u64 value to an unsigned long * bitmask
- * in a way that works on both 32-bit and 64-bit LE and BE platforms.
- *
- * Warning: Calling this function may modify *val.
- */
-static unsigned long *u64_to_bitmask(u64 *val)
-{
-#if defined(CONFIG_CPU_BIG_ENDIAN) && BITS_PER_LONG == 32
- *val = (*val >> 32) | (*val << 32);
-#endif
- return (unsigned long *)val;
-}
-
-void vgic_v2_process_maintenance(struct kvm_vcpu *vcpu)
-{
- struct vgic_v2_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v2;
-
- if (cpuif->vgic_misr & GICH_MISR_EOI) {
- u64 eisr = cpuif->vgic_eisr;
- unsigned long *eisr_bmap = u64_to_bitmask(&eisr);
- int lr;
-
- for_each_set_bit(lr, eisr_bmap, kvm_vgic_global_state.nr_lr) {
- u32 intid = cpuif->vgic_lr[lr] & GICH_LR_VIRTUALID;
-
- WARN_ON(cpuif->vgic_lr[lr] & GICH_LR_STATE);
-
- kvm_notify_acked_irq(vcpu->kvm, 0,
- intid - VGIC_NR_PRIVATE_IRQS);
- }
- }
-
- /* check and disable underflow maintenance IRQ */
- cpuif->vgic_hcr &= ~GICH_HCR_UIE;
-
- /*
- * In the next iterations of the vcpu loop, if we sync the
- * vgic state after flushing it, but before entering the guest
- * (this happens for pending signals and vmid rollovers), then
- * make sure we don't pick up any old maintenance interrupts
- * here.
- */
- cpuif->vgic_eisr = 0;
-}
-
-void vgic_v2_set_underflow(struct kvm_vcpu *vcpu)
-{
- struct vgic_v2_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v2;
-
- cpuif->vgic_hcr |= GICH_HCR_UIE;
-}
-
-/*
- * transfer the content of the LRs back into the corresponding ap_list:
- * - active bit is transferred as is
- * - pending bit is
- * - transferred as is in case of edge sensitive IRQs
- * - set to the line-level (resample time) for level sensitive IRQs
- */
-void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu)
-{
- struct vgic_v2_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v2;
- int lr;
-
- for (lr = 0; lr < vcpu->arch.vgic_cpu.used_lrs; lr++) {
- u32 val = cpuif->vgic_lr[lr];
- u32 intid = val & GICH_LR_VIRTUALID;
- struct vgic_irq *irq;
-
- irq = vgic_get_irq(vcpu->kvm, vcpu, intid);
-
- spin_lock(&irq->irq_lock);
-
- /* Always preserve the active bit */
- irq->active = !!(val & GICH_LR_ACTIVE_BIT);
-
- /* Edge is the only case where we preserve the pending bit */
- if (irq->config == VGIC_CONFIG_EDGE &&
- (val & GICH_LR_PENDING_BIT)) {
- irq->pending = true;
-
- if (vgic_irq_is_sgi(intid)) {
- u32 cpuid = val & GICH_LR_PHYSID_CPUID;
-
- cpuid >>= GICH_LR_PHYSID_CPUID_SHIFT;
- irq->source |= (1 << cpuid);
- }
- }
-
- /*
- * Clear soft pending state when level irqs have been acked.
- * Always regenerate the pending state.
- */
- if (irq->config == VGIC_CONFIG_LEVEL) {
- if (!(val & GICH_LR_PENDING_BIT))
- irq->soft_pending = false;
-
- irq->pending = irq->line_level || irq->soft_pending;
- }
-
- spin_unlock(&irq->irq_lock);
- vgic_put_irq(vcpu->kvm, irq);
- }
-}
-
-/*
- * Populates the particular LR with the state of a given IRQ:
- * - for an edge sensitive IRQ the pending state is cleared in struct vgic_irq
- * - for a level sensitive IRQ the pending state value is unchanged;
- * it is dictated directly by the input level
- *
- * If @irq describes an SGI with multiple sources, we choose the
- * lowest-numbered source VCPU and clear that bit in the source bitmap.
- *
- * The irq_lock must be held by the caller.
- */
-void vgic_v2_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr)
-{
- u32 val = irq->intid;
-
- if (irq->pending) {
- val |= GICH_LR_PENDING_BIT;
-
- if (irq->config == VGIC_CONFIG_EDGE)
- irq->pending = false;
-
- if (vgic_irq_is_sgi(irq->intid)) {
- u32 src = ffs(irq->source);
-
- BUG_ON(!src);
- val |= (src - 1) << GICH_LR_PHYSID_CPUID_SHIFT;
- irq->source &= ~(1 << (src - 1));
- if (irq->source)
- irq->pending = true;
- }
- }
-
- if (irq->active)
- val |= GICH_LR_ACTIVE_BIT;
-
- if (irq->hw) {
- val |= GICH_LR_HW;
- val |= irq->hwintid << GICH_LR_PHYSID_CPUID_SHIFT;
- } else {
- if (irq->config == VGIC_CONFIG_LEVEL)
- val |= GICH_LR_EOI;
- }
-
- /* The GICv2 LR only holds five bits of priority. */
- val |= (irq->priority >> 3) << GICH_LR_PRIORITY_SHIFT;
-
- vcpu->arch.vgic_cpu.vgic_v2.vgic_lr[lr] = val;
-}
-
-void vgic_v2_clear_lr(struct kvm_vcpu *vcpu, int lr)
-{
- vcpu->arch.vgic_cpu.vgic_v2.vgic_lr[lr] = 0;
-}
-
-void vgic_v2_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp)
-{
- u32 vmcr;
-
- vmcr = (vmcrp->ctlr << GICH_VMCR_CTRL_SHIFT) & GICH_VMCR_CTRL_MASK;
- vmcr |= (vmcrp->abpr << GICH_VMCR_ALIAS_BINPOINT_SHIFT) &
- GICH_VMCR_ALIAS_BINPOINT_MASK;
- vmcr |= (vmcrp->bpr << GICH_VMCR_BINPOINT_SHIFT) &
- GICH_VMCR_BINPOINT_MASK;
- vmcr |= (vmcrp->pmr << GICH_VMCR_PRIMASK_SHIFT) &
- GICH_VMCR_PRIMASK_MASK;
-
- vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr = vmcr;
-}
-
-void vgic_v2_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp)
-{
- u32 vmcr = vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr;
-
- vmcrp->ctlr = (vmcr & GICH_VMCR_CTRL_MASK) >>
- GICH_VMCR_CTRL_SHIFT;
- vmcrp->abpr = (vmcr & GICH_VMCR_ALIAS_BINPOINT_MASK) >>
- GICH_VMCR_ALIAS_BINPOINT_SHIFT;
- vmcrp->bpr = (vmcr & GICH_VMCR_BINPOINT_MASK) >>
- GICH_VMCR_BINPOINT_SHIFT;
- vmcrp->pmr = (vmcr & GICH_VMCR_PRIMASK_MASK) >>
- GICH_VMCR_PRIMASK_SHIFT;
-}
-
-void vgic_v2_enable(struct kvm_vcpu *vcpu)
-{
- /*
- * By forcing VMCR to zero, the GIC will restore the binary
- * points to their reset values. Anything else resets to zero
- * anyway.
- */
- vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr = 0;
- vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr = ~0;
-
- /* Get the show on the road... */
- vcpu->arch.vgic_cpu.vgic_v2.vgic_hcr = GICH_HCR_EN;
-}
-
-/* check for overlapping regions and for regions crossing the end of memory */
-static bool vgic_v2_check_base(gpa_t dist_base, gpa_t cpu_base)
-{
- if (dist_base + KVM_VGIC_V2_DIST_SIZE < dist_base)
- return false;
- if (cpu_base + KVM_VGIC_V2_CPU_SIZE < cpu_base)
- return false;
-
- if (dist_base + KVM_VGIC_V2_DIST_SIZE <= cpu_base)
- return true;
- if (cpu_base + KVM_VGIC_V2_CPU_SIZE <= dist_base)
- return true;
-
- return false;
-}
-
-int vgic_v2_map_resources(struct kvm *kvm)
-{
- struct vgic_dist *dist = &kvm->arch.vgic;
- int ret = 0;
-
- if (vgic_ready(kvm))
- goto out;
-
- if (IS_VGIC_ADDR_UNDEF(dist->vgic_dist_base) ||
- IS_VGIC_ADDR_UNDEF(dist->vgic_cpu_base)) {
- kvm_err("Need to set vgic cpu and dist addresses first\n");
- ret = -ENXIO;
- goto out;
- }
-
- if (!vgic_v2_check_base(dist->vgic_dist_base, dist->vgic_cpu_base)) {
- kvm_err("VGIC CPU and dist frames overlap\n");
- ret = -EINVAL;
- goto out;
- }
-
- /*
- * Initialize the vgic if this hasn't already been done on demand by
- * accessing the vgic state from userspace.
- */
- ret = vgic_init(kvm);
- if (ret) {
- kvm_err("Unable to initialize VGIC dynamic data structures\n");
- goto out;
- }
-
- ret = vgic_register_dist_iodev(kvm, dist->vgic_dist_base, VGIC_V2);
- if (ret) {
- kvm_err("Unable to register VGIC MMIO regions\n");
- goto out;
- }
-
- if (!static_branch_unlikely(&vgic_v2_cpuif_trap)) {
- ret = kvm_phys_addr_ioremap(kvm, dist->vgic_cpu_base,
- kvm_vgic_global_state.vcpu_base,
- KVM_VGIC_V2_CPU_SIZE, true);
- if (ret) {
- kvm_err("Unable to remap VGIC CPU to VCPU\n");
- goto out;
- }
- }
-
- dist->ready = true;
-
-out:
- if (ret)
- kvm_vgic_destroy(kvm);
- return ret;
-}
-
-DEFINE_STATIC_KEY_FALSE(vgic_v2_cpuif_trap);
-
-/**
- * vgic_v2_probe - probe for a GICv2 compatible interrupt controller in DT
- * @node: pointer to the DT node
- *
- * Returns 0 if a GICv2 has been found, returns an error code otherwise
- */
-int vgic_v2_probe(const struct gic_kvm_info *info)
-{
- int ret;
- u32 vtr;
-
- if (!info->vctrl.start) {
- kvm_err("GICH not present in the firmware table\n");
- return -ENXIO;
- }
-
- if (!PAGE_ALIGNED(info->vcpu.start) ||
- !PAGE_ALIGNED(resource_size(&info->vcpu))) {
- kvm_info("GICV region size/alignment is unsafe, using trapping (reduced performance)\n");
- kvm_vgic_global_state.vcpu_base_va = ioremap(info->vcpu.start,
- resource_size(&info->vcpu));
- if (!kvm_vgic_global_state.vcpu_base_va) {
- kvm_err("Cannot ioremap GICV\n");
- return -ENOMEM;
- }
-
- ret = create_hyp_io_mappings(kvm_vgic_global_state.vcpu_base_va,
- kvm_vgic_global_state.vcpu_base_va + resource_size(&info->vcpu),
- info->vcpu.start);
- if (ret) {
- kvm_err("Cannot map GICV into hyp\n");
- goto out;
- }
-
- static_branch_enable(&vgic_v2_cpuif_trap);
- }
-
- kvm_vgic_global_state.vctrl_base = ioremap(info->vctrl.start,
- resource_size(&info->vctrl));
- if (!kvm_vgic_global_state.vctrl_base) {
- kvm_err("Cannot ioremap GICH\n");
- ret = -ENOMEM;
- goto out;
- }
-
- vtr = readl_relaxed(kvm_vgic_global_state.vctrl_base + GICH_VTR);
- kvm_vgic_global_state.nr_lr = (vtr & 0x3f) + 1;
-
- ret = create_hyp_io_mappings(kvm_vgic_global_state.vctrl_base,
- kvm_vgic_global_state.vctrl_base +
- resource_size(&info->vctrl),
- info->vctrl.start);
- if (ret) {
- kvm_err("Cannot map VCTRL into hyp\n");
- goto out;
- }
-
- ret = kvm_register_vgic_device(KVM_DEV_TYPE_ARM_VGIC_V2);
- if (ret) {
- kvm_err("Cannot register GICv2 KVM device\n");
- goto out;
- }
-
- kvm_vgic_global_state.can_emulate_gicv2 = true;
- kvm_vgic_global_state.vcpu_base = info->vcpu.start;
- kvm_vgic_global_state.type = VGIC_V2;
- kvm_vgic_global_state.max_gic_vcpus = VGIC_V2_MAX_CPUS;
-
- kvm_info("vgic-v2@%llx\n", info->vctrl.start);
-
- return 0;
-out:
- if (kvm_vgic_global_state.vctrl_base)
- iounmap(kvm_vgic_global_state.vctrl_base);
- if (kvm_vgic_global_state.vcpu_base_va)
- iounmap(kvm_vgic_global_state.vcpu_base_va);
-
- return ret;
-}
diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c
deleted file mode 100644
index 9f0dae3..0000000
--- a/virt/kvm/arm/vgic/vgic-v3.c
+++ /dev/null
@@ -1,363 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include <linux/irqchip/arm-gic-v3.h>
-#include <linux/kvm.h>
-#include <linux/kvm_host.h>
-#include <kvm/arm_vgic.h>
-#include <asm/kvm_mmu.h>
-#include <asm/kvm_asm.h>
-
-#include "vgic.h"
-
-void vgic_v3_process_maintenance(struct kvm_vcpu *vcpu)
-{
- struct vgic_v3_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v3;
- u32 model = vcpu->kvm->arch.vgic.vgic_model;
-
- if (cpuif->vgic_misr & ICH_MISR_EOI) {
- unsigned long eisr_bmap = cpuif->vgic_eisr;
- int lr;
-
- for_each_set_bit(lr, &eisr_bmap, kvm_vgic_global_state.nr_lr) {
- u32 intid;
- u64 val = cpuif->vgic_lr[lr];
-
- if (model == KVM_DEV_TYPE_ARM_VGIC_V3)
- intid = val & ICH_LR_VIRTUAL_ID_MASK;
- else
- intid = val & GICH_LR_VIRTUALID;
-
- WARN_ON(cpuif->vgic_lr[lr] & ICH_LR_STATE);
-
- kvm_notify_acked_irq(vcpu->kvm, 0,
- intid - VGIC_NR_PRIVATE_IRQS);
- }
-
- /*
- * In the next iterations of the vcpu loop, if we sync
- * the vgic state after flushing it, but before
- * entering the guest (this happens for pending
- * signals and vmid rollovers), then make sure we
- * don't pick up any old maintenance interrupts here.
- */
- cpuif->vgic_eisr = 0;
- }
-
- cpuif->vgic_hcr &= ~ICH_HCR_UIE;
-}
-
-void vgic_v3_set_underflow(struct kvm_vcpu *vcpu)
-{
- struct vgic_v3_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v3;
-
- cpuif->vgic_hcr |= ICH_HCR_UIE;
-}
-
-void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu)
-{
- struct vgic_v3_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v3;
- u32 model = vcpu->kvm->arch.vgic.vgic_model;
- int lr;
-
- for (lr = 0; lr < vcpu->arch.vgic_cpu.used_lrs; lr++) {
- u64 val = cpuif->vgic_lr[lr];
- u32 intid;
- struct vgic_irq *irq;
-
- if (model == KVM_DEV_TYPE_ARM_VGIC_V3)
- intid = val & ICH_LR_VIRTUAL_ID_MASK;
- else
- intid = val & GICH_LR_VIRTUALID;
- irq = vgic_get_irq(vcpu->kvm, vcpu, intid);
- if (!irq) /* An LPI could have been unmapped. */
- continue;
-
- spin_lock(&irq->irq_lock);
-
- /* Always preserve the active bit */
- irq->active = !!(val & ICH_LR_ACTIVE_BIT);
-
- /* Edge is the only case where we preserve the pending bit */
- if (irq->config == VGIC_CONFIG_EDGE &&
- (val & ICH_LR_PENDING_BIT)) {
- irq->pending = true;
-
- if (vgic_irq_is_sgi(intid) &&
- model == KVM_DEV_TYPE_ARM_VGIC_V2) {
- u32 cpuid = val & GICH_LR_PHYSID_CPUID;
-
- cpuid >>= GICH_LR_PHYSID_CPUID_SHIFT;
- irq->source |= (1 << cpuid);
- }
- }
-
- /*
- * Clear soft pending state when level irqs have been acked.
- * Always regenerate the pending state.
- */
- if (irq->config == VGIC_CONFIG_LEVEL) {
- if (!(val & ICH_LR_PENDING_BIT))
- irq->soft_pending = false;
-
- irq->pending = irq->line_level || irq->soft_pending;
- }
-
- spin_unlock(&irq->irq_lock);
- vgic_put_irq(vcpu->kvm, irq);
- }
-}
-
-/* Requires the irq to be locked already */
-void vgic_v3_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr)
-{
- u32 model = vcpu->kvm->arch.vgic.vgic_model;
- u64 val = irq->intid;
-
- if (irq->pending) {
- val |= ICH_LR_PENDING_BIT;
-
- if (irq->config == VGIC_CONFIG_EDGE)
- irq->pending = false;
-
- if (vgic_irq_is_sgi(irq->intid) &&
- model == KVM_DEV_TYPE_ARM_VGIC_V2) {
- u32 src = ffs(irq->source);
-
- BUG_ON(!src);
- val |= (src - 1) << GICH_LR_PHYSID_CPUID_SHIFT;
- irq->source &= ~(1 << (src - 1));
- if (irq->source)
- irq->pending = true;
- }
- }
-
- if (irq->active)
- val |= ICH_LR_ACTIVE_BIT;
-
- if (irq->hw) {
- val |= ICH_LR_HW;
- val |= ((u64)irq->hwintid) << ICH_LR_PHYS_ID_SHIFT;
- } else {
- if (irq->config == VGIC_CONFIG_LEVEL)
- val |= ICH_LR_EOI;
- }
-
- /*
- * We currently only support Group1 interrupts, which is a
- * known defect. This needs to be addressed at some point.
- */
- if (model == KVM_DEV_TYPE_ARM_VGIC_V3)
- val |= ICH_LR_GROUP;
-
- val |= (u64)irq->priority << ICH_LR_PRIORITY_SHIFT;
-
- vcpu->arch.vgic_cpu.vgic_v3.vgic_lr[lr] = val;
-}
-
-void vgic_v3_clear_lr(struct kvm_vcpu *vcpu, int lr)
-{
- vcpu->arch.vgic_cpu.vgic_v3.vgic_lr[lr] = 0;
-}
-
-void vgic_v3_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp)
-{
- u32 vmcr;
-
- vmcr = (vmcrp->ctlr << ICH_VMCR_CTLR_SHIFT) & ICH_VMCR_CTLR_MASK;
- vmcr |= (vmcrp->abpr << ICH_VMCR_BPR1_SHIFT) & ICH_VMCR_BPR1_MASK;
- vmcr |= (vmcrp->bpr << ICH_VMCR_BPR0_SHIFT) & ICH_VMCR_BPR0_MASK;
- vmcr |= (vmcrp->pmr << ICH_VMCR_PMR_SHIFT) & ICH_VMCR_PMR_MASK;
-
- vcpu->arch.vgic_cpu.vgic_v3.vgic_vmcr = vmcr;
-}
-
-void vgic_v3_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp)
-{
- u32 vmcr = vcpu->arch.vgic_cpu.vgic_v3.vgic_vmcr;
-
- vmcrp->ctlr = (vmcr & ICH_VMCR_CTLR_MASK) >> ICH_VMCR_CTLR_SHIFT;
- vmcrp->abpr = (vmcr & ICH_VMCR_BPR1_MASK) >> ICH_VMCR_BPR1_SHIFT;
- vmcrp->bpr = (vmcr & ICH_VMCR_BPR0_MASK) >> ICH_VMCR_BPR0_SHIFT;
- vmcrp->pmr = (vmcr & ICH_VMCR_PMR_MASK) >> ICH_VMCR_PMR_SHIFT;
-}
-
-#define INITIAL_PENDBASER_VALUE \
- (GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, RaWb) | \
- GIC_BASER_CACHEABILITY(GICR_PENDBASER, OUTER, SameAsInner) | \
- GIC_BASER_SHAREABILITY(GICR_PENDBASER, InnerShareable))
-
-void vgic_v3_enable(struct kvm_vcpu *vcpu)
-{
- struct vgic_v3_cpu_if *vgic_v3 = &vcpu->arch.vgic_cpu.vgic_v3;
-
- /*
- * By forcing VMCR to zero, the GIC will restore the binary
- * points to their reset values. Anything else resets to zero
- * anyway.
- */
- vgic_v3->vgic_vmcr = 0;
- vgic_v3->vgic_elrsr = ~0;
-
- /*
- * If we are emulating a GICv3, we do it in an non-GICv2-compatible
- * way, so we force SRE to 1 to demonstrate this to the guest.
- * This goes with the spec allowing the value to be RAO/WI.
- */
- if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) {
- vgic_v3->vgic_sre = ICC_SRE_EL1_SRE;
- vcpu->arch.vgic_cpu.pendbaser = INITIAL_PENDBASER_VALUE;
- } else {
- vgic_v3->vgic_sre = 0;
- }
-
- /* Get the show on the road... */
- vgic_v3->vgic_hcr = ICH_HCR_EN;
-}
-
-/* check for overlapping regions and for regions crossing the end of memory */
-static bool vgic_v3_check_base(struct kvm *kvm)
-{
- struct vgic_dist *d = &kvm->arch.vgic;
- gpa_t redist_size = KVM_VGIC_V3_REDIST_SIZE;
-
- redist_size *= atomic_read(&kvm->online_vcpus);
-
- if (d->vgic_dist_base + KVM_VGIC_V3_DIST_SIZE < d->vgic_dist_base)
- return false;
- if (d->vgic_redist_base + redist_size < d->vgic_redist_base)
- return false;
-
- if (d->vgic_dist_base + KVM_VGIC_V3_DIST_SIZE <= d->vgic_redist_base)
- return true;
- if (d->vgic_redist_base + redist_size <= d->vgic_dist_base)
- return true;
-
- return false;
-}
-
-int vgic_v3_map_resources(struct kvm *kvm)
-{
- int ret = 0;
- struct vgic_dist *dist = &kvm->arch.vgic;
-
- if (vgic_ready(kvm))
- goto out;
-
- if (IS_VGIC_ADDR_UNDEF(dist->vgic_dist_base) ||
- IS_VGIC_ADDR_UNDEF(dist->vgic_redist_base)) {
- kvm_err("Need to set vgic distributor addresses first\n");
- ret = -ENXIO;
- goto out;
- }
-
- if (!vgic_v3_check_base(kvm)) {
- kvm_err("VGIC redist and dist frames overlap\n");
- ret = -EINVAL;
- goto out;
- }
-
- /*
- * For a VGICv3 we require the userland to explicitly initialize
- * the VGIC before we need to use it.
- */
- if (!vgic_initialized(kvm)) {
- ret = -EBUSY;
- goto out;
- }
-
- ret = vgic_register_dist_iodev(kvm, dist->vgic_dist_base, VGIC_V3);
- if (ret) {
- kvm_err("Unable to register VGICv3 dist MMIO regions\n");
- goto out;
- }
-
- ret = vgic_register_redist_iodevs(kvm, dist->vgic_redist_base);
- if (ret) {
- kvm_err("Unable to register VGICv3 redist MMIO regions\n");
- goto out;
- }
-
- if (vgic_has_its(kvm)) {
- ret = vgic_register_its_iodevs(kvm);
- if (ret) {
- kvm_err("Unable to register VGIC ITS MMIO regions\n");
- goto out;
- }
- }
-
- dist->ready = true;
-
-out:
- if (ret)
- kvm_vgic_destroy(kvm);
- return ret;
-}
-
-/**
- * vgic_v3_probe - probe for a GICv3 compatible interrupt controller in DT
- * @node: pointer to the DT node
- *
- * Returns 0 if a GICv3 has been found, returns an error code otherwise
- */
-int vgic_v3_probe(const struct gic_kvm_info *info)
-{
- u32 ich_vtr_el2 = kvm_call_hyp(__vgic_v3_get_ich_vtr_el2);
- int ret;
-
- /*
- * The ListRegs field is 5 bits, but there is a architectural
- * maximum of 16 list registers. Just ignore bit 4...
- */
- kvm_vgic_global_state.nr_lr = (ich_vtr_el2 & 0xf) + 1;
- kvm_vgic_global_state.can_emulate_gicv2 = false;
-
- if (!info->vcpu.start) {
- kvm_info("GICv3: no GICV resource entry\n");
- kvm_vgic_global_state.vcpu_base = 0;
- } else if (!PAGE_ALIGNED(info->vcpu.start)) {
- pr_warn("GICV physical address 0x%llx not page aligned\n",
- (unsigned long long)info->vcpu.start);
- kvm_vgic_global_state.vcpu_base = 0;
- } else if (!PAGE_ALIGNED(resource_size(&info->vcpu))) {
- pr_warn("GICV size 0x%llx not a multiple of page size 0x%lx\n",
- (unsigned long long)resource_size(&info->vcpu),
- PAGE_SIZE);
- kvm_vgic_global_state.vcpu_base = 0;
- } else {
- kvm_vgic_global_state.vcpu_base = info->vcpu.start;
- kvm_vgic_global_state.can_emulate_gicv2 = true;
- ret = kvm_register_vgic_device(KVM_DEV_TYPE_ARM_VGIC_V2);
- if (ret) {
- kvm_err("Cannot register GICv2 KVM device.\n");
- return ret;
- }
- kvm_info("vgic-v2@%llx\n", info->vcpu.start);
- }
- ret = kvm_register_vgic_device(KVM_DEV_TYPE_ARM_VGIC_V3);
- if (ret) {
- kvm_err("Cannot register GICv3 KVM device.\n");
- kvm_unregister_device_ops(KVM_DEV_TYPE_ARM_VGIC_V2);
- return ret;
- }
-
- if (kvm_vgic_global_state.vcpu_base == 0)
- kvm_info("disabling GICv2 emulation\n");
-
- kvm_vgic_global_state.vctrl_base = NULL;
- kvm_vgic_global_state.type = VGIC_V3;
- kvm_vgic_global_state.max_gic_vcpus = VGIC_V3_MAX_CPUS;
-
- return 0;
-}
diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c
deleted file mode 100644
index 6440b56..0000000
--- a/virt/kvm/arm/vgic/vgic.c
+++ /dev/null
@@ -1,731 +0,0 @@
-/*
- * Copyright (C) 2015, 2016 ARM Ltd.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include <linux/kvm.h>
-#include <linux/kvm_host.h>
-#include <linux/list_sort.h>
-
-#include "vgic.h"
-
-#define CREATE_TRACE_POINTS
-#include "../trace.h"
-
-#ifdef CONFIG_DEBUG_SPINLOCK
-#define DEBUG_SPINLOCK_BUG_ON(p) BUG_ON(p)
-#else
-#define DEBUG_SPINLOCK_BUG_ON(p)
-#endif
-
-struct vgic_global __section(.hyp.text) kvm_vgic_global_state = {.gicv3_cpuif = STATIC_KEY_FALSE_INIT,};
-
-/*
- * Locking order is always:
- * its->cmd_lock (mutex)
- * its->its_lock (mutex)
- * vgic_cpu->ap_list_lock
- * kvm->lpi_list_lock
- * vgic_irq->irq_lock
- *
- * If you need to take multiple locks, always take the upper lock first,
- * then the lower ones, e.g. first take the its_lock, then the irq_lock.
- * If you are already holding a lock and need to take a higher one, you
- * have to drop the lower ranking lock first and re-aquire it after having
- * taken the upper one.
- *
- * When taking more than one ap_list_lock at the same time, always take the
- * lowest numbered VCPU's ap_list_lock first, so:
- * vcpuX->vcpu_id < vcpuY->vcpu_id:
- * spin_lock(vcpuX->arch.vgic_cpu.ap_list_lock);
- * spin_lock(vcpuY->arch.vgic_cpu.ap_list_lock);
- */
-
-/*
- * Iterate over the VM's list of mapped LPIs to find the one with a
- * matching interrupt ID and return a reference to the IRQ structure.
- */
-static struct vgic_irq *vgic_get_lpi(struct kvm *kvm, u32 intid)
-{
- struct vgic_dist *dist = &kvm->arch.vgic;
- struct vgic_irq *irq = NULL;
-
- spin_lock(&dist->lpi_list_lock);
-
- list_for_each_entry(irq, &dist->lpi_list_head, lpi_list) {
- if (irq->intid != intid)
- continue;
-
- /*
- * This increases the refcount, the caller is expected to
- * call vgic_put_irq() later once it's finished with the IRQ.
- */
- vgic_get_irq_kref(irq);
- goto out_unlock;
- }
- irq = NULL;
-
-out_unlock:
- spin_unlock(&dist->lpi_list_lock);
-
- return irq;
-}
-
-/*
- * This looks up the virtual interrupt ID to get the corresponding
- * struct vgic_irq. It also increases the refcount, so any caller is expected
- * to call vgic_put_irq() once it's finished with this IRQ.
- */
-struct vgic_irq *vgic_get_irq(struct kvm *kvm, struct kvm_vcpu *vcpu,
- u32 intid)
-{
- /* SGIs and PPIs */
- if (intid <= VGIC_MAX_PRIVATE)
- return &vcpu->arch.vgic_cpu.private_irqs[intid];
-
- /* SPIs */
- if (intid <= VGIC_MAX_SPI)
- return &kvm->arch.vgic.spis[intid - VGIC_NR_PRIVATE_IRQS];
-
- /* LPIs */
- if (intid >= VGIC_MIN_LPI)
- return vgic_get_lpi(kvm, intid);
-
- WARN(1, "Looking up struct vgic_irq for reserved INTID");
- return NULL;
-}
-
-/*
- * We can't do anything in here, because we lack the kvm pointer to
- * lock and remove the item from the lpi_list. So we keep this function
- * empty and use the return value of kref_put() to trigger the freeing.
- */
-static void vgic_irq_release(struct kref *ref)
-{
-}
-
-void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq)
-{
- struct vgic_dist *dist = &kvm->arch.vgic;
-
- if (irq->intid < VGIC_MIN_LPI)
- return;
-
- spin_lock(&dist->lpi_list_lock);
- if (!kref_put(&irq->refcount, vgic_irq_release)) {
- spin_unlock(&dist->lpi_list_lock);
- return;
- };
-
- list_del(&irq->lpi_list);
- dist->lpi_list_count--;
- spin_unlock(&dist->lpi_list_lock);
-
- kfree(irq);
-}
-
-/**
- * kvm_vgic_target_oracle - compute the target vcpu for an irq
- *
- * @irq: The irq to route. Must be already locked.
- *
- * Based on the current state of the interrupt (enabled, pending,
- * active, vcpu and target_vcpu), compute the next vcpu this should be
- * given to. Return NULL if this shouldn't be injected at all.
- *
- * Requires the IRQ lock to be held.
- */
-static struct kvm_vcpu *vgic_target_oracle(struct vgic_irq *irq)
-{
- DEBUG_SPINLOCK_BUG_ON(!spin_is_locked(&irq->irq_lock));
-
- /* If the interrupt is active, it must stay on the current vcpu */
- if (irq->active)
- return irq->vcpu ? : irq->target_vcpu;
-
- /*
- * If the IRQ is not active but enabled and pending, we should direct
- * it to its configured target VCPU.
- * If the distributor is disabled, pending interrupts shouldn't be
- * forwarded.
- */
- if (irq->enabled && irq->pending) {
- if (unlikely(irq->target_vcpu &&
- !irq->target_vcpu->kvm->arch.vgic.enabled))
- return NULL;
-
- return irq->target_vcpu;
- }
-
- /* If neither active nor pending and enabled, then this IRQ should not
- * be queued to any VCPU.
- */
- return NULL;
-}
-
-/*
- * The order of items in the ap_lists defines how we'll pack things in LRs as
- * well, the first items in the list being the first things populated in the
- * LRs.
- *
- * A hard rule is that active interrupts can never be pushed out of the LRs
- * (and therefore take priority) since we cannot reliably trap on deactivation
- * of IRQs and therefore they have to be present in the LRs.
- *
- * Otherwise things should be sorted by the priority field and the GIC
- * hardware support will take care of preemption of priority groups etc.
- *
- * Return negative if "a" sorts before "b", 0 to preserve order, and positive
- * to sort "b" before "a".
- */
-static int vgic_irq_cmp(void *priv, struct list_head *a, struct list_head *b)
-{
- struct vgic_irq *irqa = container_of(a, struct vgic_irq, ap_list);
- struct vgic_irq *irqb = container_of(b, struct vgic_irq, ap_list);
- bool penda, pendb;
- int ret;
-
- spin_lock(&irqa->irq_lock);
- spin_lock_nested(&irqb->irq_lock, SINGLE_DEPTH_NESTING);
-
- if (irqa->active || irqb->active) {
- ret = (int)irqb->active - (int)irqa->active;
- goto out;
- }
-
- penda = irqa->enabled && irqa->pending;
- pendb = irqb->enabled && irqb->pending;
-
- if (!penda || !pendb) {
- ret = (int)pendb - (int)penda;
- goto out;
- }
-
- /* Both pending and enabled, sort by priority */
- ret = irqa->priority - irqb->priority;
-out:
- spin_unlock(&irqb->irq_lock);
- spin_unlock(&irqa->irq_lock);
- return ret;
-}
-
-/* Must be called with the ap_list_lock held */
-static void vgic_sort_ap_list(struct kvm_vcpu *vcpu)
-{
- struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
-
- DEBUG_SPINLOCK_BUG_ON(!spin_is_locked(&vgic_cpu->ap_list_lock));
-
- list_sort(NULL, &vgic_cpu->ap_list_head, vgic_irq_cmp);
-}
-
-/*
- * Only valid injection if changing level for level-triggered IRQs or for a
- * rising edge.
- */
-static bool vgic_validate_injection(struct vgic_irq *irq, bool level)
-{
- switch (irq->config) {
- case VGIC_CONFIG_LEVEL:
- return irq->line_level != level;
- case VGIC_CONFIG_EDGE:
- return level;
- }
-
- return false;
-}
-
-/*
- * Check whether an IRQ needs to (and can) be queued to a VCPU's ap list.
- * Do the queuing if necessary, taking the right locks in the right order.
- * Returns true when the IRQ was queued, false otherwise.
- *
- * Needs to be entered with the IRQ lock already held, but will return
- * with all locks dropped.
- */
-bool vgic_queue_irq_unlock(struct kvm *kvm, struct vgic_irq *irq)
-{
- struct kvm_vcpu *vcpu;
-
- DEBUG_SPINLOCK_BUG_ON(!spin_is_locked(&irq->irq_lock));
-
-retry:
- vcpu = vgic_target_oracle(irq);
- if (irq->vcpu || !vcpu) {
- /*
- * If this IRQ is already on a VCPU's ap_list, then it
- * cannot be moved or modified and there is no more work for
- * us to do.
- *
- * Otherwise, if the irq is not pending and enabled, it does
- * not need to be inserted into an ap_list and there is also
- * no more work for us to do.
- */
- spin_unlock(&irq->irq_lock);
-
- /*
- * We have to kick the VCPU here, because we could be
- * queueing an edge-triggered interrupt for which we
- * get no EOI maintenance interrupt. In that case,
- * while the IRQ is already on the VCPU's AP list, the
- * VCPU could have EOI'ed the original interrupt and
- * won't see this one until it exits for some other
- * reason.
- */
- if (vcpu)
- kvm_vcpu_kick(vcpu);
- return false;
- }
-
- /*
- * We must unlock the irq lock to take the ap_list_lock where
- * we are going to insert this new pending interrupt.
- */
- spin_unlock(&irq->irq_lock);
-
- /* someone can do stuff here, which we re-check below */
-
- spin_lock(&vcpu->arch.vgic_cpu.ap_list_lock);
- spin_lock(&irq->irq_lock);
-
- /*
- * Did something change behind our backs?
- *
- * There are two cases:
- * 1) The irq lost its pending state or was disabled behind our
- * backs and/or it was queued to another VCPU's ap_list.
- * 2) Someone changed the affinity on this irq behind our
- * backs and we are now holding the wrong ap_list_lock.
- *
- * In both cases, drop the locks and retry.
- */
-
- if (unlikely(irq->vcpu || vcpu != vgic_target_oracle(irq))) {
- spin_unlock(&irq->irq_lock);
- spin_unlock(&vcpu->arch.vgic_cpu.ap_list_lock);
-
- spin_lock(&irq->irq_lock);
- goto retry;
- }
-
- /*
- * Grab a reference to the irq to reflect the fact that it is
- * now in the ap_list.
- */
- vgic_get_irq_kref(irq);
- list_add_tail(&irq->ap_list, &vcpu->arch.vgic_cpu.ap_list_head);
- irq->vcpu = vcpu;
-
- spin_unlock(&irq->irq_lock);
- spin_unlock(&vcpu->arch.vgic_cpu.ap_list_lock);
-
- kvm_vcpu_kick(vcpu);
-
- return true;
-}
-
-static int vgic_update_irq_pending(struct kvm *kvm, int cpuid,
- unsigned int intid, bool level,
- bool mapped_irq)
-{
- struct kvm_vcpu *vcpu;
- struct vgic_irq *irq;
- int ret;
-
- trace_vgic_update_irq_pending(cpuid, intid, level);
-
- ret = vgic_lazy_init(kvm);
- if (ret)
- return ret;
-
- vcpu = kvm_get_vcpu(kvm, cpuid);
- if (!vcpu && intid < VGIC_NR_PRIVATE_IRQS)
- return -EINVAL;
-
- irq = vgic_get_irq(kvm, vcpu, intid);
- if (!irq)
- return -EINVAL;
-
- if (irq->hw != mapped_irq) {
- vgic_put_irq(kvm, irq);
- return -EINVAL;
- }
-
- spin_lock(&irq->irq_lock);
-
- if (!vgic_validate_injection(irq, level)) {
- /* Nothing to see here, move along... */
- spin_unlock(&irq->irq_lock);
- vgic_put_irq(kvm, irq);
- return 0;
- }
-
- if (irq->config == VGIC_CONFIG_LEVEL) {
- irq->line_level = level;
- irq->pending = level || irq->soft_pending;
- } else {
- irq->pending = true;
- }
-
- vgic_queue_irq_unlock(kvm, irq);
- vgic_put_irq(kvm, irq);
-
- return 0;
-}
-
-/**
- * kvm_vgic_inject_irq - Inject an IRQ from a device to the vgic
- * @kvm: The VM structure pointer
- * @cpuid: The CPU for PPIs
- * @intid: The INTID to inject a new state to.
- * @level: Edge-triggered: true: to trigger the interrupt
- * false: to ignore the call
- * Level-sensitive true: raise the input signal
- * false: lower the input signal
- *
- * The VGIC is not concerned with devices being active-LOW or active-HIGH for
- * level-sensitive interrupts. You can think of the level parameter as 1
- * being HIGH and 0 being LOW and all devices being active-HIGH.
- */
-int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int intid,
- bool level)
-{
- return vgic_update_irq_pending(kvm, cpuid, intid, level, false);
-}
-
-int kvm_vgic_inject_mapped_irq(struct kvm *kvm, int cpuid, unsigned int intid,
- bool level)
-{
- return vgic_update_irq_pending(kvm, cpuid, intid, level, true);
-}
-
-int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, u32 virt_irq, u32 phys_irq)
-{
- struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, virt_irq);
-
- BUG_ON(!irq);
-
- spin_lock(&irq->irq_lock);
-
- irq->hw = true;
- irq->hwintid = phys_irq;
-
- spin_unlock(&irq->irq_lock);
- vgic_put_irq(vcpu->kvm, irq);
-
- return 0;
-}
-
-int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int virt_irq)
-{
- struct vgic_irq *irq;
-
- if (!vgic_initialized(vcpu->kvm))
- return -EAGAIN;
-
- irq = vgic_get_irq(vcpu->kvm, vcpu, virt_irq);
- BUG_ON(!irq);
-
- spin_lock(&irq->irq_lock);
-
- irq->hw = false;
- irq->hwintid = 0;
-
- spin_unlock(&irq->irq_lock);
- vgic_put_irq(vcpu->kvm, irq);
-
- return 0;
-}
-
-/**
- * vgic_prune_ap_list - Remove non-relevant interrupts from the list
- *
- * @vcpu: The VCPU pointer
- *
- * Go over the list of "interesting" interrupts, and prune those that we
- * won't have to consider in the near future.
- */
-static void vgic_prune_ap_list(struct kvm_vcpu *vcpu)
-{
- struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
- struct vgic_irq *irq, *tmp;
-
-retry:
- spin_lock(&vgic_cpu->ap_list_lock);
-
- list_for_each_entry_safe(irq, tmp, &vgic_cpu->ap_list_head, ap_list) {
- struct kvm_vcpu *target_vcpu, *vcpuA, *vcpuB;
-
- spin_lock(&irq->irq_lock);
-
- BUG_ON(vcpu != irq->vcpu);
-
- target_vcpu = vgic_target_oracle(irq);
-
- if (!target_vcpu) {
- /*
- * We don't need to process this interrupt any
- * further, move it off the list.
- */
- list_del(&irq->ap_list);
- irq->vcpu = NULL;
- spin_unlock(&irq->irq_lock);
-
- /*
- * This vgic_put_irq call matches the
- * vgic_get_irq_kref in vgic_queue_irq_unlock,
- * where we added the LPI to the ap_list. As
- * we remove the irq from the list, we drop
- * also drop the refcount.
- */
- vgic_put_irq(vcpu->kvm, irq);
- continue;
- }
-
- if (target_vcpu == vcpu) {
- /* We're on the right CPU */
- spin_unlock(&irq->irq_lock);
- continue;
- }
-
- /* This interrupt looks like it has to be migrated. */
-
- spin_unlock(&irq->irq_lock);
- spin_unlock(&vgic_cpu->ap_list_lock);
-
- /*
- * Ensure locking order by always locking the smallest
- * ID first.
- */
- if (vcpu->vcpu_id < target_vcpu->vcpu_id) {
- vcpuA = vcpu;
- vcpuB = target_vcpu;
- } else {
- vcpuA = target_vcpu;
- vcpuB = vcpu;
- }
-
- spin_lock(&vcpuA->arch.vgic_cpu.ap_list_lock);
- spin_lock_nested(&vcpuB->arch.vgic_cpu.ap_list_lock,
- SINGLE_DEPTH_NESTING);
- spin_lock(&irq->irq_lock);
-
- /*
- * If the affinity has been preserved, move the
- * interrupt around. Otherwise, it means things have
- * changed while the interrupt was unlocked, and we
- * need to replay this.
- *
- * In all cases, we cannot trust the list not to have
- * changed, so we restart from the beginning.
- */
- if (target_vcpu == vgic_target_oracle(irq)) {
- struct vgic_cpu *new_cpu = &target_vcpu->arch.vgic_cpu;
-
- list_del(&irq->ap_list);
- irq->vcpu = target_vcpu;
- list_add_tail(&irq->ap_list, &new_cpu->ap_list_head);
- }
-
- spin_unlock(&irq->irq_lock);
- spin_unlock(&vcpuB->arch.vgic_cpu.ap_list_lock);
- spin_unlock(&vcpuA->arch.vgic_cpu.ap_list_lock);
- goto retry;
- }
-
- spin_unlock(&vgic_cpu->ap_list_lock);
-}
-
-static inline void vgic_process_maintenance_interrupt(struct kvm_vcpu *vcpu)
-{
- if (kvm_vgic_global_state.type == VGIC_V2)
- vgic_v2_process_maintenance(vcpu);
- else
- vgic_v3_process_maintenance(vcpu);
-}
-
-static inline void vgic_fold_lr_state(struct kvm_vcpu *vcpu)
-{
- if (kvm_vgic_global_state.type == VGIC_V2)
- vgic_v2_fold_lr_state(vcpu);
- else
- vgic_v3_fold_lr_state(vcpu);
-}
-
-/* Requires the irq_lock to be held. */
-static inline void vgic_populate_lr(struct kvm_vcpu *vcpu,
- struct vgic_irq *irq, int lr)
-{
- DEBUG_SPINLOCK_BUG_ON(!spin_is_locked(&irq->irq_lock));
-
- if (kvm_vgic_global_state.type == VGIC_V2)
- vgic_v2_populate_lr(vcpu, irq, lr);
- else
- vgic_v3_populate_lr(vcpu, irq, lr);
-}
-
-static inline void vgic_clear_lr(struct kvm_vcpu *vcpu, int lr)
-{
- if (kvm_vgic_global_state.type == VGIC_V2)
- vgic_v2_clear_lr(vcpu, lr);
- else
- vgic_v3_clear_lr(vcpu, lr);
-}
-
-static inline void vgic_set_underflow(struct kvm_vcpu *vcpu)
-{
- if (kvm_vgic_global_state.type == VGIC_V2)
- vgic_v2_set_underflow(vcpu);
- else
- vgic_v3_set_underflow(vcpu);
-}
-
-/* Requires the ap_list_lock to be held. */
-static int compute_ap_list_depth(struct kvm_vcpu *vcpu)
-{
- struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
- struct vgic_irq *irq;
- int count = 0;
-
- DEBUG_SPINLOCK_BUG_ON(!spin_is_locked(&vgic_cpu->ap_list_lock));
-
- list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) {
- spin_lock(&irq->irq_lock);
- /* GICv2 SGIs can count for more than one... */
- if (vgic_irq_is_sgi(irq->intid) && irq->source)
- count += hweight8(irq->source);
- else
- count++;
- spin_unlock(&irq->irq_lock);
- }
- return count;
-}
-
-/* Requires the VCPU's ap_list_lock to be held. */
-static void vgic_flush_lr_state(struct kvm_vcpu *vcpu)
-{
- struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
- struct vgic_irq *irq;
- int count = 0;
-
- DEBUG_SPINLOCK_BUG_ON(!spin_is_locked(&vgic_cpu->ap_list_lock));
-
- if (compute_ap_list_depth(vcpu) > kvm_vgic_global_state.nr_lr) {
- vgic_set_underflow(vcpu);
- vgic_sort_ap_list(vcpu);
- }
-
- list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) {
- spin_lock(&irq->irq_lock);
-
- if (unlikely(vgic_target_oracle(irq) != vcpu))
- goto next;
-
- /*
- * If we get an SGI with multiple sources, try to get
- * them in all at once.
- */
- do {
- vgic_populate_lr(vcpu, irq, count++);
- } while (irq->source && count < kvm_vgic_global_state.nr_lr);
-
-next:
- spin_unlock(&irq->irq_lock);
-
- if (count == kvm_vgic_global_state.nr_lr)
- break;
- }
-
- vcpu->arch.vgic_cpu.used_lrs = count;
-
- /* Nuke remaining LRs */
- for ( ; count < kvm_vgic_global_state.nr_lr; count++)
- vgic_clear_lr(vcpu, count);
-}
-
-/* Sync back the hardware VGIC state into our emulation after a guest's run. */
-void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
-{
- if (unlikely(!vgic_initialized(vcpu->kvm)))
- return;
-
- vgic_process_maintenance_interrupt(vcpu);
- vgic_fold_lr_state(vcpu);
- vgic_prune_ap_list(vcpu);
-}
-
-/* Flush our emulation state into the GIC hardware before entering the guest. */
-void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu)
-{
- if (unlikely(!vgic_initialized(vcpu->kvm)))
- return;
-
- spin_lock(&vcpu->arch.vgic_cpu.ap_list_lock);
- vgic_flush_lr_state(vcpu);
- spin_unlock(&vcpu->arch.vgic_cpu.ap_list_lock);
-}
-
-int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu)
-{
- struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
- struct vgic_irq *irq;
- bool pending = false;
-
- if (!vcpu->kvm->arch.vgic.enabled)
- return false;
-
- spin_lock(&vgic_cpu->ap_list_lock);
-
- list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) {
- spin_lock(&irq->irq_lock);
- pending = irq->pending && irq->enabled;
- spin_unlock(&irq->irq_lock);
-
- if (pending)
- break;
- }
-
- spin_unlock(&vgic_cpu->ap_list_lock);
-
- return pending;
-}
-
-void vgic_kick_vcpus(struct kvm *kvm)
-{
- struct kvm_vcpu *vcpu;
- int c;
-
- /*
- * We've injected an interrupt, time to find out who deserves
- * a good kick...
- */
- kvm_for_each_vcpu(c, vcpu, kvm) {
- if (kvm_vgic_vcpu_pending_irq(vcpu))
- kvm_vcpu_kick(vcpu);
- }
-}
-
-bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, unsigned int virt_irq)
-{
- struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, virt_irq);
- bool map_is_active;
-
- spin_lock(&irq->irq_lock);
- map_is_active = irq->hw && irq->active;
- spin_unlock(&irq->irq_lock);
- vgic_put_irq(vcpu->kvm, irq);
-
- return map_is_active;
-}
-
diff --git a/virt/kvm/arm/vgic/vgic.h b/virt/kvm/arm/vgic/vgic.h
deleted file mode 100644
index 9d9e014..0000000
--- a/virt/kvm/arm/vgic/vgic.h
+++ /dev/null
@@ -1,123 +0,0 @@
-/*
- * Copyright (C) 2015, 2016 ARM Ltd.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-#ifndef __KVM_ARM_VGIC_NEW_H__
-#define __KVM_ARM_VGIC_NEW_H__
-
-#include <linux/irqchip/arm-gic-common.h>
-
-#define PRODUCT_ID_KVM 0x4b /* ASCII code K */
-#define IMPLEMENTER_ARM 0x43b
-
-#define VGIC_ADDR_UNDEF (-1)
-#define IS_VGIC_ADDR_UNDEF(_x) ((_x) == VGIC_ADDR_UNDEF)
-
-#define INTERRUPT_ID_BITS_SPIS 10
-#define INTERRUPT_ID_BITS_ITS 16
-#define VGIC_PRI_BITS 5
-
-#define vgic_irq_is_sgi(intid) ((intid) < VGIC_NR_SGIS)
-
-struct vgic_vmcr {
- u32 ctlr;
- u32 abpr;
- u32 bpr;
- u32 pmr;
-};
-
-struct vgic_irq *vgic_get_irq(struct kvm *kvm, struct kvm_vcpu *vcpu,
- u32 intid);
-void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq);
-bool vgic_queue_irq_unlock(struct kvm *kvm, struct vgic_irq *irq);
-void vgic_kick_vcpus(struct kvm *kvm);
-
-int vgic_check_ioaddr(struct kvm *kvm, phys_addr_t *ioaddr,
- phys_addr_t addr, phys_addr_t alignment);
-
-void vgic_v2_process_maintenance(struct kvm_vcpu *vcpu);
-void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu);
-void vgic_v2_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr);
-void vgic_v2_clear_lr(struct kvm_vcpu *vcpu, int lr);
-void vgic_v2_set_underflow(struct kvm_vcpu *vcpu);
-int vgic_v2_has_attr_regs(struct kvm_device *dev, struct kvm_device_attr *attr);
-int vgic_v2_dist_uaccess(struct kvm_vcpu *vcpu, bool is_write,
- int offset, u32 *val);
-int vgic_v2_cpuif_uaccess(struct kvm_vcpu *vcpu, bool is_write,
- int offset, u32 *val);
-void vgic_v2_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr);
-void vgic_v2_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr);
-void vgic_v2_enable(struct kvm_vcpu *vcpu);
-int vgic_v2_probe(const struct gic_kvm_info *info);
-int vgic_v2_map_resources(struct kvm *kvm);
-int vgic_register_dist_iodev(struct kvm *kvm, gpa_t dist_base_address,
- enum vgic_type);
-
-static inline void vgic_get_irq_kref(struct vgic_irq *irq)
-{
- if (irq->intid < VGIC_MIN_LPI)
- return;
-
- kref_get(&irq->refcount);
-}
-
-void vgic_v3_process_maintenance(struct kvm_vcpu *vcpu);
-void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu);
-void vgic_v3_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr);
-void vgic_v3_clear_lr(struct kvm_vcpu *vcpu, int lr);
-void vgic_v3_set_underflow(struct kvm_vcpu *vcpu);
-void vgic_v3_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr);
-void vgic_v3_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr);
-void vgic_v3_enable(struct kvm_vcpu *vcpu);
-int vgic_v3_probe(const struct gic_kvm_info *info);
-int vgic_v3_map_resources(struct kvm *kvm);
-int vgic_register_redist_iodevs(struct kvm *kvm, gpa_t dist_base_address);
-
-#ifdef CONFIG_KVM_ARM_VGIC_V3_ITS
-int vgic_register_its_iodevs(struct kvm *kvm);
-bool vgic_has_its(struct kvm *kvm);
-int kvm_vgic_register_its_device(void);
-void vgic_enable_lpis(struct kvm_vcpu *vcpu);
-int vgic_its_inject_msi(struct kvm *kvm, struct kvm_msi *msi);
-#else
-static inline int vgic_register_its_iodevs(struct kvm *kvm)
-{
- return -ENODEV;
-}
-
-static inline bool vgic_has_its(struct kvm *kvm)
-{
- return false;
-}
-
-static inline int kvm_vgic_register_its_device(void)
-{
- return -ENODEV;
-}
-
-static inline void vgic_enable_lpis(struct kvm_vcpu *vcpu)
-{
-}
-
-static inline int vgic_its_inject_msi(struct kvm *kvm, struct kvm_msi *msi)
-{
- return -ENODEV;
-}
-#endif
-
-int kvm_register_vgic_device(unsigned long type);
-int vgic_lazy_init(struct kvm *kvm);
-int vgic_init(struct kvm *kvm);
-
-#endif
diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c
deleted file mode 100644
index efeceb0..0000000
--- a/virt/kvm/async_pf.c
+++ /dev/null
@@ -1,246 +0,0 @@
-/*
- * kvm asynchronous fault support
- *
- * Copyright 2010 Red Hat, Inc.
- *
- * Author:
- * Gleb Natapov <gleb@redhat.com>
- *
- * This file is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
- */
-
-#include <linux/kvm_host.h>
-#include <linux/slab.h>
-#include <linux/module.h>
-#include <linux/mmu_context.h>
-
-#include "async_pf.h"
-#include <trace/events/kvm.h>
-
-static inline void kvm_async_page_present_sync(struct kvm_vcpu *vcpu,
- struct kvm_async_pf *work)
-{
-#ifdef CONFIG_KVM_ASYNC_PF_SYNC
- kvm_arch_async_page_present(vcpu, work);
-#endif
-}
-static inline void kvm_async_page_present_async(struct kvm_vcpu *vcpu,
- struct kvm_async_pf *work)
-{
-#ifndef CONFIG_KVM_ASYNC_PF_SYNC
- kvm_arch_async_page_present(vcpu, work);
-#endif
-}
-
-static struct kmem_cache *async_pf_cache;
-
-int kvm_async_pf_init(void)
-{
- async_pf_cache = KMEM_CACHE(kvm_async_pf, 0);
-
- if (!async_pf_cache)
- return -ENOMEM;
-
- return 0;
-}
-
-void kvm_async_pf_deinit(void)
-{
- kmem_cache_destroy(async_pf_cache);
- async_pf_cache = NULL;
-}
-
-void kvm_async_pf_vcpu_init(struct kvm_vcpu *vcpu)
-{
- INIT_LIST_HEAD(&vcpu->async_pf.done);
- INIT_LIST_HEAD(&vcpu->async_pf.queue);
- spin_lock_init(&vcpu->async_pf.lock);
-}
-
-static void async_pf_execute(struct work_struct *work)
-{
- struct kvm_async_pf *apf =
- container_of(work, struct kvm_async_pf, work);
- struct mm_struct *mm = apf->mm;
- struct kvm_vcpu *vcpu = apf->vcpu;
- unsigned long addr = apf->addr;
- gva_t gva = apf->gva;
-
- might_sleep();
-
- /*
- * This work is run asynchromously to the task which owns
- * mm and might be done in another context, so we must
- * use FOLL_REMOTE.
- */
- __get_user_pages_unlocked(NULL, mm, addr, 1, NULL,
- FOLL_WRITE | FOLL_REMOTE);
-
- kvm_async_page_present_sync(vcpu, apf);
-
- spin_lock(&vcpu->async_pf.lock);
- list_add_tail(&apf->link, &vcpu->async_pf.done);
- apf->vcpu = NULL;
- spin_unlock(&vcpu->async_pf.lock);
-
- /*
- * apf may be freed by kvm_check_async_pf_completion() after
- * this point
- */
-
- trace_kvm_async_pf_completed(addr, gva);
-
- /*
- * This memory barrier pairs with prepare_to_wait's set_current_state()
- */
- smp_mb();
- if (swait_active(&vcpu->wq))
- swake_up(&vcpu->wq);
-
- mmput(mm);
- kvm_put_kvm(vcpu->kvm);
-}
-
-void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu)
-{
- spin_lock(&vcpu->async_pf.lock);
-
- /* cancel outstanding work queue item */
- while (!list_empty(&vcpu->async_pf.queue)) {
- struct kvm_async_pf *work =
- list_first_entry(&vcpu->async_pf.queue,
- typeof(*work), queue);
- list_del(&work->queue);
-
- /*
- * We know it's present in vcpu->async_pf.done, do
- * nothing here.
- */
- if (!work->vcpu)
- continue;
-
- spin_unlock(&vcpu->async_pf.lock);
-#ifdef CONFIG_KVM_ASYNC_PF_SYNC
- flush_work(&work->work);
-#else
- if (cancel_work_sync(&work->work)) {
- mmput(work->mm);
- kvm_put_kvm(vcpu->kvm); /* == work->vcpu->kvm */
- kmem_cache_free(async_pf_cache, work);
- }
-#endif
- spin_lock(&vcpu->async_pf.lock);
- }
-
- while (!list_empty(&vcpu->async_pf.done)) {
- struct kvm_async_pf *work =
- list_first_entry(&vcpu->async_pf.done,
- typeof(*work), link);
- list_del(&work->link);
- kmem_cache_free(async_pf_cache, work);
- }
- spin_unlock(&vcpu->async_pf.lock);
-
- vcpu->async_pf.queued = 0;
-}
-
-void kvm_check_async_pf_completion(struct kvm_vcpu *vcpu)
-{
- struct kvm_async_pf *work;
-
- while (!list_empty_careful(&vcpu->async_pf.done) &&
- kvm_arch_can_inject_async_page_present(vcpu)) {
- spin_lock(&vcpu->async_pf.lock);
- work = list_first_entry(&vcpu->async_pf.done, typeof(*work),
- link);
- list_del(&work->link);
- spin_unlock(&vcpu->async_pf.lock);
-
- kvm_arch_async_page_ready(vcpu, work);
- kvm_async_page_present_async(vcpu, work);
-
- list_del(&work->queue);
- vcpu->async_pf.queued--;
- kmem_cache_free(async_pf_cache, work);
- }
-}
-
-int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, unsigned long hva,
- struct kvm_arch_async_pf *arch)
-{
- struct kvm_async_pf *work;
-
- if (vcpu->async_pf.queued >= ASYNC_PF_PER_VCPU)
- return 0;
-
- /* setup delayed work */
-
- /*
- * do alloc nowait since if we are going to sleep anyway we
- * may as well sleep faulting in page
- */
- work = kmem_cache_zalloc(async_pf_cache, GFP_NOWAIT | __GFP_NOWARN);
- if (!work)
- return 0;
-
- work->wakeup_all = false;
- work->vcpu = vcpu;
- work->gva = gva;
- work->addr = hva;
- work->arch = *arch;
- work->mm = current->mm;
- atomic_inc(&work->mm->mm_users);
- kvm_get_kvm(work->vcpu->kvm);
-
- /* this can't really happen otherwise gfn_to_pfn_async
- would succeed */
- if (unlikely(kvm_is_error_hva(work->addr)))
- goto retry_sync;
-
- INIT_WORK(&work->work, async_pf_execute);
- if (!schedule_work(&work->work))
- goto retry_sync;
-
- list_add_tail(&work->queue, &vcpu->async_pf.queue);
- vcpu->async_pf.queued++;
- kvm_arch_async_page_not_present(vcpu, work);
- return 1;
-retry_sync:
- kvm_put_kvm(work->vcpu->kvm);
- mmput(work->mm);
- kmem_cache_free(async_pf_cache, work);
- return 0;
-}
-
-int kvm_async_pf_wakeup_all(struct kvm_vcpu *vcpu)
-{
- struct kvm_async_pf *work;
-
- if (!list_empty_careful(&vcpu->async_pf.done))
- return 0;
-
- work = kmem_cache_zalloc(async_pf_cache, GFP_ATOMIC);
- if (!work)
- return -ENOMEM;
-
- work->wakeup_all = true;
- INIT_LIST_HEAD(&work->queue); /* for list_del to work */
-
- spin_lock(&vcpu->async_pf.lock);
- list_add_tail(&work->link, &vcpu->async_pf.done);
- spin_unlock(&vcpu->async_pf.lock);
-
- vcpu->async_pf.queued++;
- return 0;
-}
diff --git a/virt/kvm/async_pf.h b/virt/kvm/async_pf.h
deleted file mode 100644
index ec4cfa2..0000000
--- a/virt/kvm/async_pf.h
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * kvm asynchronous fault support
- *
- * Copyright 2010 Red Hat, Inc.
- *
- * Author:
- * Gleb Natapov <gleb@redhat.com>
- *
- * This file is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
- */
-
-#ifndef __KVM_ASYNC_PF_H__
-#define __KVM_ASYNC_PF_H__
-
-#ifdef CONFIG_KVM_ASYNC_PF
-int kvm_async_pf_init(void);
-void kvm_async_pf_deinit(void);
-void kvm_async_pf_vcpu_init(struct kvm_vcpu *vcpu);
-#else
-#define kvm_async_pf_init() (0)
-#define kvm_async_pf_deinit() do {} while (0)
-#define kvm_async_pf_vcpu_init(C) do {} while (0)
-#endif
-
-#endif
diff --git a/virt/kvm/coalesced_mmio.c b/virt/kvm/coalesced_mmio.c
deleted file mode 100644
index 571c1ce..0000000
--- a/virt/kvm/coalesced_mmio.c
+++ /dev/null
@@ -1,183 +0,0 @@
-/*
- * KVM coalesced MMIO
- *
- * Copyright (c) 2008 Bull S.A.S.
- * Copyright 2009 Red Hat, Inc. and/or its affiliates.
- *
- * Author: Laurent Vivier <Laurent.Vivier@bull.net>
- *
- */
-
-#include <kvm/iodev.h>
-
-#include <linux/kvm_host.h>
-#include <linux/slab.h>
-#include <linux/kvm.h>
-
-#include "coalesced_mmio.h"
-
-static inline struct kvm_coalesced_mmio_dev *to_mmio(struct kvm_io_device *dev)
-{
- return container_of(dev, struct kvm_coalesced_mmio_dev, dev);
-}
-
-static int coalesced_mmio_in_range(struct kvm_coalesced_mmio_dev *dev,
- gpa_t addr, int len)
-{
- /* is it in a batchable area ?
- * (addr,len) is fully included in
- * (zone->addr, zone->size)
- */
- if (len < 0)
- return 0;
- if (addr + len < addr)
- return 0;
- if (addr < dev->zone.addr)
- return 0;
- if (addr + len > dev->zone.addr + dev->zone.size)
- return 0;
- return 1;
-}
-
-static int coalesced_mmio_has_room(struct kvm_coalesced_mmio_dev *dev)
-{
- struct kvm_coalesced_mmio_ring *ring;
- unsigned avail;
-
- /* Are we able to batch it ? */
-
- /* last is the first free entry
- * check if we don't meet the first used entry
- * there is always one unused entry in the buffer
- */
- ring = dev->kvm->coalesced_mmio_ring;
- avail = (ring->first - ring->last - 1) % KVM_COALESCED_MMIO_MAX;
- if (avail == 0) {
- /* full */
- return 0;
- }
-
- return 1;
-}
-
-static int coalesced_mmio_write(struct kvm_vcpu *vcpu,
- struct kvm_io_device *this, gpa_t addr,
- int len, const void *val)
-{
- struct kvm_coalesced_mmio_dev *dev = to_mmio(this);
- struct kvm_coalesced_mmio_ring *ring = dev->kvm->coalesced_mmio_ring;
-
- if (!coalesced_mmio_in_range(dev, addr, len))
- return -EOPNOTSUPP;
-
- spin_lock(&dev->kvm->ring_lock);
-
- if (!coalesced_mmio_has_room(dev)) {
- spin_unlock(&dev->kvm->ring_lock);
- return -EOPNOTSUPP;
- }
-
- /* copy data in first free entry of the ring */
-
- ring->coalesced_mmio[ring->last].phys_addr = addr;
- ring->coalesced_mmio[ring->last].len = len;
- memcpy(ring->coalesced_mmio[ring->last].data, val, len);
- smp_wmb();
- ring->last = (ring->last + 1) % KVM_COALESCED_MMIO_MAX;
- spin_unlock(&dev->kvm->ring_lock);
- return 0;
-}
-
-static void coalesced_mmio_destructor(struct kvm_io_device *this)
-{
- struct kvm_coalesced_mmio_dev *dev = to_mmio(this);
-
- list_del(&dev->list);
-
- kfree(dev);
-}
-
-static const struct kvm_io_device_ops coalesced_mmio_ops = {
- .write = coalesced_mmio_write,
- .destructor = coalesced_mmio_destructor,
-};
-
-int kvm_coalesced_mmio_init(struct kvm *kvm)
-{
- struct page *page;
- int ret;
-
- ret = -ENOMEM;
- page = alloc_page(GFP_KERNEL | __GFP_ZERO);
- if (!page)
- goto out_err;
-
- ret = 0;
- kvm->coalesced_mmio_ring = page_address(page);
-
- /*
- * We're using this spinlock to sync access to the coalesced ring.
- * The list doesn't need it's own lock since device registration and
- * unregistration should only happen when kvm->slots_lock is held.
- */
- spin_lock_init(&kvm->ring_lock);
- INIT_LIST_HEAD(&kvm->coalesced_zones);
-
-out_err:
- return ret;
-}
-
-void kvm_coalesced_mmio_free(struct kvm *kvm)
-{
- if (kvm->coalesced_mmio_ring)
- free_page((unsigned long)kvm->coalesced_mmio_ring);
-}
-
-int kvm_vm_ioctl_register_coalesced_mmio(struct kvm *kvm,
- struct kvm_coalesced_mmio_zone *zone)
-{
- int ret;
- struct kvm_coalesced_mmio_dev *dev;
-
- dev = kzalloc(sizeof(struct kvm_coalesced_mmio_dev), GFP_KERNEL);
- if (!dev)
- return -ENOMEM;
-
- kvm_iodevice_init(&dev->dev, &coalesced_mmio_ops);
- dev->kvm = kvm;
- dev->zone = *zone;
-
- mutex_lock(&kvm->slots_lock);
- ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, zone->addr,
- zone->size, &dev->dev);
- if (ret < 0)
- goto out_free_dev;
- list_add_tail(&dev->list, &kvm->coalesced_zones);
- mutex_unlock(&kvm->slots_lock);
-
- return 0;
-
-out_free_dev:
- mutex_unlock(&kvm->slots_lock);
- kfree(dev);
-
- return ret;
-}
-
-int kvm_vm_ioctl_unregister_coalesced_mmio(struct kvm *kvm,
- struct kvm_coalesced_mmio_zone *zone)
-{
- struct kvm_coalesced_mmio_dev *dev, *tmp;
-
- mutex_lock(&kvm->slots_lock);
-
- list_for_each_entry_safe(dev, tmp, &kvm->coalesced_zones, list)
- if (coalesced_mmio_in_range(dev, zone->addr, zone->size)) {
- kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, &dev->dev);
- kvm_iodevice_destructor(&dev->dev);
- }
-
- mutex_unlock(&kvm->slots_lock);
-
- return 0;
-}
diff --git a/virt/kvm/coalesced_mmio.h b/virt/kvm/coalesced_mmio.h
deleted file mode 100644
index 6bca74c..0000000
--- a/virt/kvm/coalesced_mmio.h
+++ /dev/null
@@ -1,38 +0,0 @@
-#ifndef __KVM_COALESCED_MMIO_H__
-#define __KVM_COALESCED_MMIO_H__
-
-/*
- * KVM coalesced MMIO
- *
- * Copyright (c) 2008 Bull S.A.S.
- *
- * Author: Laurent Vivier <Laurent.Vivier@bull.net>
- *
- */
-
-#ifdef CONFIG_KVM_MMIO
-
-#include <linux/list.h>
-
-struct kvm_coalesced_mmio_dev {
- struct list_head list;
- struct kvm_io_device dev;
- struct kvm *kvm;
- struct kvm_coalesced_mmio_zone zone;
-};
-
-int kvm_coalesced_mmio_init(struct kvm *kvm);
-void kvm_coalesced_mmio_free(struct kvm *kvm);
-int kvm_vm_ioctl_register_coalesced_mmio(struct kvm *kvm,
- struct kvm_coalesced_mmio_zone *zone);
-int kvm_vm_ioctl_unregister_coalesced_mmio(struct kvm *kvm,
- struct kvm_coalesced_mmio_zone *zone);
-
-#else
-
-static inline int kvm_coalesced_mmio_init(struct kvm *kvm) { return 0; }
-static inline void kvm_coalesced_mmio_free(struct kvm *kvm) { }
-
-#endif
-
-#endif
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
deleted file mode 100644
index a29786d..0000000
--- a/virt/kvm/eventfd.c
+++ /dev/null
@@ -1,956 +0,0 @@
-/*
- * kvm eventfd support - use eventfd objects to signal various KVM events
- *
- * Copyright 2009 Novell. All Rights Reserved.
- * Copyright 2010 Red Hat, Inc. and/or its affiliates.
- *
- * Author:
- * Gregory Haskins <ghaskins@novell.com>
- *
- * This file is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
- */
-
-#include <linux/kvm_host.h>
-#include <linux/kvm.h>
-#include <linux/kvm_irqfd.h>
-#include <linux/workqueue.h>
-#include <linux/syscalls.h>
-#include <linux/wait.h>
-#include <linux/poll.h>
-#include <linux/file.h>
-#include <linux/list.h>
-#include <linux/eventfd.h>
-#include <linux/kernel.h>
-#include <linux/srcu.h>
-#include <linux/slab.h>
-#include <linux/seqlock.h>
-#include <linux/irqbypass.h>
-#include <trace/events/kvm.h>
-
-#include <kvm/iodev.h>
-
-#ifdef CONFIG_HAVE_KVM_IRQFD
-
-static struct workqueue_struct *irqfd_cleanup_wq;
-
-static void
-irqfd_inject(struct work_struct *work)
-{
- struct kvm_kernel_irqfd *irqfd =
- container_of(work, struct kvm_kernel_irqfd, inject);
- struct kvm *kvm = irqfd->kvm;
-
- if (!irqfd->resampler) {
- kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 1,
- false);
- kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 0,
- false);
- } else
- kvm_set_irq(kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID,
- irqfd->gsi, 1, false);
-}
-
-/*
- * Since resampler irqfds share an IRQ source ID, we de-assert once
- * then notify all of the resampler irqfds using this GSI. We can't
- * do multiple de-asserts or we risk racing with incoming re-asserts.
- */
-static void
-irqfd_resampler_ack(struct kvm_irq_ack_notifier *kian)
-{
- struct kvm_kernel_irqfd_resampler *resampler;
- struct kvm *kvm;
- struct kvm_kernel_irqfd *irqfd;
- int idx;
-
- resampler = container_of(kian,
- struct kvm_kernel_irqfd_resampler, notifier);
- kvm = resampler->kvm;
-
- kvm_set_irq(kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID,
- resampler->notifier.gsi, 0, false);
-
- idx = srcu_read_lock(&kvm->irq_srcu);
-
- list_for_each_entry_rcu(irqfd, &resampler->list, resampler_link)
- eventfd_signal(irqfd->resamplefd, 1);
-
- srcu_read_unlock(&kvm->irq_srcu, idx);
-}
-
-static void
-irqfd_resampler_shutdown(struct kvm_kernel_irqfd *irqfd)
-{
- struct kvm_kernel_irqfd_resampler *resampler = irqfd->resampler;
- struct kvm *kvm = resampler->kvm;
-
- mutex_lock(&kvm->irqfds.resampler_lock);
-
- list_del_rcu(&irqfd->resampler_link);
- synchronize_srcu(&kvm->irq_srcu);
-
- if (list_empty(&resampler->list)) {
- list_del(&resampler->link);
- kvm_unregister_irq_ack_notifier(kvm, &resampler->notifier);
- kvm_set_irq(kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID,
- resampler->notifier.gsi, 0, false);
- kfree(resampler);
- }
-
- mutex_unlock(&kvm->irqfds.resampler_lock);
-}
-
-/*
- * Race-free decouple logic (ordering is critical)
- */
-static void
-irqfd_shutdown(struct work_struct *work)
-{
- struct kvm_kernel_irqfd *irqfd =
- container_of(work, struct kvm_kernel_irqfd, shutdown);
- u64 cnt;
-
- /*
- * Synchronize with the wait-queue and unhook ourselves to prevent
- * further events.
- */
- eventfd_ctx_remove_wait_queue(irqfd->eventfd, &irqfd->wait, &cnt);
-
- /*
- * We know no new events will be scheduled at this point, so block
- * until all previously outstanding events have completed
- */
- flush_work(&irqfd->inject);
-
- if (irqfd->resampler) {
- irqfd_resampler_shutdown(irqfd);
- eventfd_ctx_put(irqfd->resamplefd);
- }
-
- /*
- * It is now safe to release the object's resources
- */
-#ifdef CONFIG_HAVE_KVM_IRQ_BYPASS
- irq_bypass_unregister_consumer(&irqfd->consumer);
-#endif
- eventfd_ctx_put(irqfd->eventfd);
- kfree(irqfd);
-}
-
-
-/* assumes kvm->irqfds.lock is held */
-static bool
-irqfd_is_active(struct kvm_kernel_irqfd *irqfd)
-{
- return list_empty(&irqfd->list) ? false : true;
-}
-
-/*
- * Mark the irqfd as inactive and schedule it for removal
- *
- * assumes kvm->irqfds.lock is held
- */
-static void
-irqfd_deactivate(struct kvm_kernel_irqfd *irqfd)
-{
- BUG_ON(!irqfd_is_active(irqfd));
-
- list_del_init(&irqfd->list);
-
- queue_work(irqfd_cleanup_wq, &irqfd->shutdown);
-}
-
-int __attribute__((weak)) kvm_arch_set_irq_inatomic(
- struct kvm_kernel_irq_routing_entry *irq,
- struct kvm *kvm, int irq_source_id,
- int level,
- bool line_status)
-{
- return -EWOULDBLOCK;
-}
-
-/*
- * Called with wqh->lock held and interrupts disabled
- */
-static int
-irqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, void *key)
-{
- struct kvm_kernel_irqfd *irqfd =
- container_of(wait, struct kvm_kernel_irqfd, wait);
- unsigned long flags = (unsigned long)key;
- struct kvm_kernel_irq_routing_entry irq;
- struct kvm *kvm = irqfd->kvm;
- unsigned seq;
- int idx;
-
- if (flags & POLLIN) {
- idx = srcu_read_lock(&kvm->irq_srcu);
- do {
- seq = read_seqcount_begin(&irqfd->irq_entry_sc);
- irq = irqfd->irq_entry;
- } while (read_seqcount_retry(&irqfd->irq_entry_sc, seq));
- /* An event has been signaled, inject an interrupt */
- if (kvm_arch_set_irq_inatomic(&irq, kvm,
- KVM_USERSPACE_IRQ_SOURCE_ID, 1,
- false) == -EWOULDBLOCK)
- schedule_work(&irqfd->inject);
- srcu_read_unlock(&kvm->irq_srcu, idx);
- }
-
- if (flags & POLLHUP) {
- /* The eventfd is closing, detach from KVM */
- unsigned long flags;
-
- spin_lock_irqsave(&kvm->irqfds.lock, flags);
-
- /*
- * We must check if someone deactivated the irqfd before
- * we could acquire the irqfds.lock since the item is
- * deactivated from the KVM side before it is unhooked from
- * the wait-queue. If it is already deactivated, we can
- * simply return knowing the other side will cleanup for us.
- * We cannot race against the irqfd going away since the
- * other side is required to acquire wqh->lock, which we hold
- */
- if (irqfd_is_active(irqfd))
- irqfd_deactivate(irqfd);
-
- spin_unlock_irqrestore(&kvm->irqfds.lock, flags);
- }
-
- return 0;
-}
-
-static void
-irqfd_ptable_queue_proc(struct file *file, wait_queue_head_t *wqh,
- poll_table *pt)
-{
- struct kvm_kernel_irqfd *irqfd =
- container_of(pt, struct kvm_kernel_irqfd, pt);
- add_wait_queue(wqh, &irqfd->wait);
-}
-
-/* Must be called under irqfds.lock */
-static void irqfd_update(struct kvm *kvm, struct kvm_kernel_irqfd *irqfd)
-{
- struct kvm_kernel_irq_routing_entry *e;
- struct kvm_kernel_irq_routing_entry entries[KVM_NR_IRQCHIPS];
- int n_entries;
-
- n_entries = kvm_irq_map_gsi(kvm, entries, irqfd->gsi);
-
- write_seqcount_begin(&irqfd->irq_entry_sc);
-
- e = entries;
- if (n_entries == 1)
- irqfd->irq_entry = *e;
- else
- irqfd->irq_entry.type = 0;
-
- write_seqcount_end(&irqfd->irq_entry_sc);
-}
-
-#ifdef CONFIG_HAVE_KVM_IRQ_BYPASS
-void __attribute__((weak)) kvm_arch_irq_bypass_stop(
- struct irq_bypass_consumer *cons)
-{
-}
-
-void __attribute__((weak)) kvm_arch_irq_bypass_start(
- struct irq_bypass_consumer *cons)
-{
-}
-
-int __attribute__((weak)) kvm_arch_update_irqfd_routing(
- struct kvm *kvm, unsigned int host_irq,
- uint32_t guest_irq, bool set)
-{
- return 0;
-}
-#endif
-
-static int
-kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
-{
- struct kvm_kernel_irqfd *irqfd, *tmp;
- struct fd f;
- struct eventfd_ctx *eventfd = NULL, *resamplefd = NULL;
- int ret;
- unsigned int events;
- int idx;
-
- if (!kvm_arch_intc_initialized(kvm))
- return -EAGAIN;
-
- irqfd = kzalloc(sizeof(*irqfd), GFP_KERNEL);
- if (!irqfd)
- return -ENOMEM;
-
- irqfd->kvm = kvm;
- irqfd->gsi = args->gsi;
- INIT_LIST_HEAD(&irqfd->list);
- INIT_WORK(&irqfd->inject, irqfd_inject);
- INIT_WORK(&irqfd->shutdown, irqfd_shutdown);
- seqcount_init(&irqfd->irq_entry_sc);
-
- f = fdget(args->fd);
- if (!f.file) {
- ret = -EBADF;
- goto out;
- }
-
- eventfd = eventfd_ctx_fileget(f.file);
- if (IS_ERR(eventfd)) {
- ret = PTR_ERR(eventfd);
- goto fail;
- }
-
- irqfd->eventfd = eventfd;
-
- if (args->flags & KVM_IRQFD_FLAG_RESAMPLE) {
- struct kvm_kernel_irqfd_resampler *resampler;
-
- resamplefd = eventfd_ctx_fdget(args->resamplefd);
- if (IS_ERR(resamplefd)) {
- ret = PTR_ERR(resamplefd);
- goto fail;
- }
-
- irqfd->resamplefd = resamplefd;
- INIT_LIST_HEAD(&irqfd->resampler_link);
-
- mutex_lock(&kvm->irqfds.resampler_lock);
-
- list_for_each_entry(resampler,
- &kvm->irqfds.resampler_list, link) {
- if (resampler->notifier.gsi == irqfd->gsi) {
- irqfd->resampler = resampler;
- break;
- }
- }
-
- if (!irqfd->resampler) {
- resampler = kzalloc(sizeof(*resampler), GFP_KERNEL);
- if (!resampler) {
- ret = -ENOMEM;
- mutex_unlock(&kvm->irqfds.resampler_lock);
- goto fail;
- }
-
- resampler->kvm = kvm;
- INIT_LIST_HEAD(&resampler->list);
- resampler->notifier.gsi = irqfd->gsi;
- resampler->notifier.irq_acked = irqfd_resampler_ack;
- INIT_LIST_HEAD(&resampler->link);
-
- list_add(&resampler->link, &kvm->irqfds.resampler_list);
- kvm_register_irq_ack_notifier(kvm,
- &resampler->notifier);
- irqfd->resampler = resampler;
- }
-
- list_add_rcu(&irqfd->resampler_link, &irqfd->resampler->list);
- synchronize_srcu(&kvm->irq_srcu);
-
- mutex_unlock(&kvm->irqfds.resampler_lock);
- }
-
- /*
- * Install our own custom wake-up handling so we are notified via
- * a callback whenever someone signals the underlying eventfd
- */
- init_waitqueue_func_entry(&irqfd->wait, irqfd_wakeup);
- init_poll_funcptr(&irqfd->pt, irqfd_ptable_queue_proc);
-
- spin_lock_irq(&kvm->irqfds.lock);
-
- ret = 0;
- list_for_each_entry(tmp, &kvm->irqfds.items, list) {
- if (irqfd->eventfd != tmp->eventfd)
- continue;
- /* This fd is used for another irq already. */
- ret = -EBUSY;
- spin_unlock_irq(&kvm->irqfds.lock);
- goto fail;
- }
-
- idx = srcu_read_lock(&kvm->irq_srcu);
- irqfd_update(kvm, irqfd);
- srcu_read_unlock(&kvm->irq_srcu, idx);
-
- list_add_tail(&irqfd->list, &kvm->irqfds.items);
-
- spin_unlock_irq(&kvm->irqfds.lock);
-
- /*
- * Check if there was an event already pending on the eventfd
- * before we registered, and trigger it as if we didn't miss it.
- */
- events = f.file->f_op->poll(f.file, &irqfd->pt);
-
- if (events & POLLIN)
- schedule_work(&irqfd->inject);
-
- /*
- * do not drop the file until the irqfd is fully initialized, otherwise
- * we might race against the POLLHUP
- */
- fdput(f);
-#ifdef CONFIG_HAVE_KVM_IRQ_BYPASS
- if (kvm_arch_has_irq_bypass()) {
- irqfd->consumer.token = (void *)irqfd->eventfd;
- irqfd->consumer.add_producer = kvm_arch_irq_bypass_add_producer;
- irqfd->consumer.del_producer = kvm_arch_irq_bypass_del_producer;
- irqfd->consumer.stop = kvm_arch_irq_bypass_stop;
- irqfd->consumer.start = kvm_arch_irq_bypass_start;
- ret = irq_bypass_register_consumer(&irqfd->consumer);
- if (ret)
- pr_info("irq bypass consumer (token %p) registration fails: %d\n",
- irqfd->consumer.token, ret);
- }
-#endif
-
- return 0;
-
-fail:
- if (irqfd->resampler)
- irqfd_resampler_shutdown(irqfd);
-
- if (resamplefd && !IS_ERR(resamplefd))
- eventfd_ctx_put(resamplefd);
-
- if (eventfd && !IS_ERR(eventfd))
- eventfd_ctx_put(eventfd);
-
- fdput(f);
-
-out:
- kfree(irqfd);
- return ret;
-}
-
-bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin)
-{
- struct kvm_irq_ack_notifier *kian;
- int gsi, idx;
-
- idx = srcu_read_lock(&kvm->irq_srcu);
- gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin);
- if (gsi != -1)
- hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list,
- link)
- if (kian->gsi == gsi) {
- srcu_read_unlock(&kvm->irq_srcu, idx);
- return true;
- }
-
- srcu_read_unlock(&kvm->irq_srcu, idx);
-
- return false;
-}
-EXPORT_SYMBOL_GPL(kvm_irq_has_notifier);
-
-void kvm_notify_acked_gsi(struct kvm *kvm, int gsi)
-{
- struct kvm_irq_ack_notifier *kian;
-
- hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list,
- link)
- if (kian->gsi == gsi)
- kian->irq_acked(kian);
-}
-
-void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
-{
- int gsi, idx;
-
- trace_kvm_ack_irq(irqchip, pin);
-
- idx = srcu_read_lock(&kvm->irq_srcu);
- gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin);
- if (gsi != -1)
- kvm_notify_acked_gsi(kvm, gsi);
- srcu_read_unlock(&kvm->irq_srcu, idx);
-}
-
-void kvm_register_irq_ack_notifier(struct kvm *kvm,
- struct kvm_irq_ack_notifier *kian)
-{
- mutex_lock(&kvm->irq_lock);
- hlist_add_head_rcu(&kian->link, &kvm->irq_ack_notifier_list);
- mutex_unlock(&kvm->irq_lock);
- kvm_vcpu_request_scan_ioapic(kvm);
-}
-
-void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
- struct kvm_irq_ack_notifier *kian)
-{
- mutex_lock(&kvm->irq_lock);
- hlist_del_init_rcu(&kian->link);
- mutex_unlock(&kvm->irq_lock);
- synchronize_srcu(&kvm->irq_srcu);
- kvm_vcpu_request_scan_ioapic(kvm);
-}
-#endif
-
-void
-kvm_eventfd_init(struct kvm *kvm)
-{
-#ifdef CONFIG_HAVE_KVM_IRQFD
- spin_lock_init(&kvm->irqfds.lock);
- INIT_LIST_HEAD(&kvm->irqfds.items);
- INIT_LIST_HEAD(&kvm->irqfds.resampler_list);
- mutex_init(&kvm->irqfds.resampler_lock);
-#endif
- INIT_LIST_HEAD(&kvm->ioeventfds);
-}
-
-#ifdef CONFIG_HAVE_KVM_IRQFD
-/*
- * shutdown any irqfd's that match fd+gsi
- */
-static int
-kvm_irqfd_deassign(struct kvm *kvm, struct kvm_irqfd *args)
-{
- struct kvm_kernel_irqfd *irqfd, *tmp;
- struct eventfd_ctx *eventfd;
-
- eventfd = eventfd_ctx_fdget(args->fd);
- if (IS_ERR(eventfd))
- return PTR_ERR(eventfd);
-
- spin_lock_irq(&kvm->irqfds.lock);
-
- list_for_each_entry_safe(irqfd, tmp, &kvm->irqfds.items, list) {
- if (irqfd->eventfd == eventfd && irqfd->gsi == args->gsi) {
- /*
- * This clearing of irq_entry.type is needed for when
- * another thread calls kvm_irq_routing_update before
- * we flush workqueue below (we synchronize with
- * kvm_irq_routing_update using irqfds.lock).
- */
- write_seqcount_begin(&irqfd->irq_entry_sc);
- irqfd->irq_entry.type = 0;
- write_seqcount_end(&irqfd->irq_entry_sc);
- irqfd_deactivate(irqfd);
- }
- }
-
- spin_unlock_irq(&kvm->irqfds.lock);
- eventfd_ctx_put(eventfd);
-
- /*
- * Block until we know all outstanding shutdown jobs have completed
- * so that we guarantee there will not be any more interrupts on this
- * gsi once this deassign function returns.
- */
- flush_workqueue(irqfd_cleanup_wq);
-
- return 0;
-}
-
-int
-kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args)
-{
- if (args->flags & ~(KVM_IRQFD_FLAG_DEASSIGN | KVM_IRQFD_FLAG_RESAMPLE))
- return -EINVAL;
-
- if (args->flags & KVM_IRQFD_FLAG_DEASSIGN)
- return kvm_irqfd_deassign(kvm, args);
-
- return kvm_irqfd_assign(kvm, args);
-}
-
-/*
- * This function is called as the kvm VM fd is being released. Shutdown all
- * irqfds that still remain open
- */
-void
-kvm_irqfd_release(struct kvm *kvm)
-{
- struct kvm_kernel_irqfd *irqfd, *tmp;
-
- spin_lock_irq(&kvm->irqfds.lock);
-
- list_for_each_entry_safe(irqfd, tmp, &kvm->irqfds.items, list)
- irqfd_deactivate(irqfd);
-
- spin_unlock_irq(&kvm->irqfds.lock);
-
- /*
- * Block until we know all outstanding shutdown jobs have completed
- * since we do not take a kvm* reference.
- */
- flush_workqueue(irqfd_cleanup_wq);
-
-}
-
-/*
- * Take note of a change in irq routing.
- * Caller must invoke synchronize_srcu(&kvm->irq_srcu) afterwards.
- */
-void kvm_irq_routing_update(struct kvm *kvm)
-{
- struct kvm_kernel_irqfd *irqfd;
-
- spin_lock_irq(&kvm->irqfds.lock);
-
- list_for_each_entry(irqfd, &kvm->irqfds.items, list) {
- irqfd_update(kvm, irqfd);
-
-#ifdef CONFIG_HAVE_KVM_IRQ_BYPASS
- if (irqfd->producer) {
- int ret = kvm_arch_update_irqfd_routing(
- irqfd->kvm, irqfd->producer->irq,
- irqfd->gsi, 1);
- WARN_ON(ret);
- }
-#endif
- }
-
- spin_unlock_irq(&kvm->irqfds.lock);
-}
-
-/*
- * create a host-wide workqueue for issuing deferred shutdown requests
- * aggregated from all vm* instances. We need our own isolated
- * queue to ease flushing work items when a VM exits.
- */
-int kvm_irqfd_init(void)
-{
- irqfd_cleanup_wq = alloc_workqueue("kvm-irqfd-cleanup", 0, 0);
- if (!irqfd_cleanup_wq)
- return -ENOMEM;
-
- return 0;
-}
-
-void kvm_irqfd_exit(void)
-{
- destroy_workqueue(irqfd_cleanup_wq);
-}
-#endif
-
-/*
- * --------------------------------------------------------------------
- * ioeventfd: translate a PIO/MMIO memory write to an eventfd signal.
- *
- * userspace can register a PIO/MMIO address with an eventfd for receiving
- * notification when the memory has been touched.
- * --------------------------------------------------------------------
- */
-
-struct _ioeventfd {
- struct list_head list;
- u64 addr;
- int length;
- struct eventfd_ctx *eventfd;
- u64 datamatch;
- struct kvm_io_device dev;
- u8 bus_idx;
- bool wildcard;
-};
-
-static inline struct _ioeventfd *
-to_ioeventfd(struct kvm_io_device *dev)
-{
- return container_of(dev, struct _ioeventfd, dev);
-}
-
-static void
-ioeventfd_release(struct _ioeventfd *p)
-{
- eventfd_ctx_put(p->eventfd);
- list_del(&p->list);
- kfree(p);
-}
-
-static bool
-ioeventfd_in_range(struct _ioeventfd *p, gpa_t addr, int len, const void *val)
-{
- u64 _val;
-
- if (addr != p->addr)
- /* address must be precise for a hit */
- return false;
-
- if (!p->length)
- /* length = 0 means only look at the address, so always a hit */
- return true;
-
- if (len != p->length)
- /* address-range must be precise for a hit */
- return false;
-
- if (p->wildcard)
- /* all else equal, wildcard is always a hit */
- return true;
-
- /* otherwise, we have to actually compare the data */
-
- BUG_ON(!IS_ALIGNED((unsigned long)val, len));
-
- switch (len) {
- case 1:
- _val = *(u8 *)val;
- break;
- case 2:
- _val = *(u16 *)val;
- break;
- case 4:
- _val = *(u32 *)val;
- break;
- case 8:
- _val = *(u64 *)val;
- break;
- default:
- return false;
- }
-
- return _val == p->datamatch ? true : false;
-}
-
-/* MMIO/PIO writes trigger an event if the addr/val match */
-static int
-ioeventfd_write(struct kvm_vcpu *vcpu, struct kvm_io_device *this, gpa_t addr,
- int len, const void *val)
-{
- struct _ioeventfd *p = to_ioeventfd(this);
-
- if (!ioeventfd_in_range(p, addr, len, val))
- return -EOPNOTSUPP;
-
- eventfd_signal(p->eventfd, 1);
- return 0;
-}
-
-/*
- * This function is called as KVM is completely shutting down. We do not
- * need to worry about locking just nuke anything we have as quickly as possible
- */
-static void
-ioeventfd_destructor(struct kvm_io_device *this)
-{
- struct _ioeventfd *p = to_ioeventfd(this);
-
- ioeventfd_release(p);
-}
-
-static const struct kvm_io_device_ops ioeventfd_ops = {
- .write = ioeventfd_write,
- .destructor = ioeventfd_destructor,
-};
-
-/* assumes kvm->slots_lock held */
-static bool
-ioeventfd_check_collision(struct kvm *kvm, struct _ioeventfd *p)
-{
- struct _ioeventfd *_p;
-
- list_for_each_entry(_p, &kvm->ioeventfds, list)
- if (_p->bus_idx == p->bus_idx &&
- _p->addr == p->addr &&
- (!_p->length || !p->length ||
- (_p->length == p->length &&
- (_p->wildcard || p->wildcard ||
- _p->datamatch == p->datamatch))))
- return true;
-
- return false;
-}
-
-static enum kvm_bus ioeventfd_bus_from_flags(__u32 flags)
-{
- if (flags & KVM_IOEVENTFD_FLAG_PIO)
- return KVM_PIO_BUS;
- if (flags & KVM_IOEVENTFD_FLAG_VIRTIO_CCW_NOTIFY)
- return KVM_VIRTIO_CCW_NOTIFY_BUS;
- return KVM_MMIO_BUS;
-}
-
-static int kvm_assign_ioeventfd_idx(struct kvm *kvm,
- enum kvm_bus bus_idx,
- struct kvm_ioeventfd *args)
-{
-
- struct eventfd_ctx *eventfd;
- struct _ioeventfd *p;
- int ret;
-
- eventfd = eventfd_ctx_fdget(args->fd);
- if (IS_ERR(eventfd))
- return PTR_ERR(eventfd);
-
- p = kzalloc(sizeof(*p), GFP_KERNEL);
- if (!p) {
- ret = -ENOMEM;
- goto fail;
- }
-
- INIT_LIST_HEAD(&p->list);
- p->addr = args->addr;
- p->bus_idx = bus_idx;
- p->length = args->len;
- p->eventfd = eventfd;
-
- /* The datamatch feature is optional, otherwise this is a wildcard */
- if (args->flags & KVM_IOEVENTFD_FLAG_DATAMATCH)
- p->datamatch = args->datamatch;
- else
- p->wildcard = true;
-
- mutex_lock(&kvm->slots_lock);
-
- /* Verify that there isn't a match already */
- if (ioeventfd_check_collision(kvm, p)) {
- ret = -EEXIST;
- goto unlock_fail;
- }
-
- kvm_iodevice_init(&p->dev, &ioeventfd_ops);
-
- ret = kvm_io_bus_register_dev(kvm, bus_idx, p->addr, p->length,
- &p->dev);
- if (ret < 0)
- goto unlock_fail;
-
- kvm->buses[bus_idx]->ioeventfd_count++;
- list_add_tail(&p->list, &kvm->ioeventfds);
-
- mutex_unlock(&kvm->slots_lock);
-
- return 0;
-
-unlock_fail:
- mutex_unlock(&kvm->slots_lock);
-
-fail:
- kfree(p);
- eventfd_ctx_put(eventfd);
-
- return ret;
-}
-
-static int
-kvm_deassign_ioeventfd_idx(struct kvm *kvm, enum kvm_bus bus_idx,
- struct kvm_ioeventfd *args)
-{
- struct _ioeventfd *p, *tmp;
- struct eventfd_ctx *eventfd;
- int ret = -ENOENT;
-
- eventfd = eventfd_ctx_fdget(args->fd);
- if (IS_ERR(eventfd))
- return PTR_ERR(eventfd);
-
- mutex_lock(&kvm->slots_lock);
-
- list_for_each_entry_safe(p, tmp, &kvm->ioeventfds, list) {
- bool wildcard = !(args->flags & KVM_IOEVENTFD_FLAG_DATAMATCH);
-
- if (p->bus_idx != bus_idx ||
- p->eventfd != eventfd ||
- p->addr != args->addr ||
- p->length != args->len ||
- p->wildcard != wildcard)
- continue;
-
- if (!p->wildcard && p->datamatch != args->datamatch)
- continue;
-
- kvm_io_bus_unregister_dev(kvm, bus_idx, &p->dev);
- kvm->buses[bus_idx]->ioeventfd_count--;
- ioeventfd_release(p);
- ret = 0;
- break;
- }
-
- mutex_unlock(&kvm->slots_lock);
-
- eventfd_ctx_put(eventfd);
-
- return ret;
-}
-
-static int kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
-{
- enum kvm_bus bus_idx = ioeventfd_bus_from_flags(args->flags);
- int ret = kvm_deassign_ioeventfd_idx(kvm, bus_idx, args);
-
- if (!args->len && bus_idx == KVM_MMIO_BUS)
- kvm_deassign_ioeventfd_idx(kvm, KVM_FAST_MMIO_BUS, args);
-
- return ret;
-}
-
-static int
-kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
-{
- enum kvm_bus bus_idx;
- int ret;
-
- bus_idx = ioeventfd_bus_from_flags(args->flags);
- /* must be natural-word sized, or 0 to ignore length */
- switch (args->len) {
- case 0:
- case 1:
- case 2:
- case 4:
- case 8:
- break;
- default:
- return -EINVAL;
- }
-
- /* check for range overflow */
- if (args->addr + args->len < args->addr)
- return -EINVAL;
-
- /* check for extra flags that we don't understand */
- if (args->flags & ~KVM_IOEVENTFD_VALID_FLAG_MASK)
- return -EINVAL;
-
- /* ioeventfd with no length can't be combined with DATAMATCH */
- if (!args->len && (args->flags & KVM_IOEVENTFD_FLAG_DATAMATCH))
- return -EINVAL;
-
- ret = kvm_assign_ioeventfd_idx(kvm, bus_idx, args);
- if (ret)
- goto fail;
-
- /* When length is ignored, MMIO is also put on a separate bus, for
- * faster lookups.
- */
- if (!args->len && bus_idx == KVM_MMIO_BUS) {
- ret = kvm_assign_ioeventfd_idx(kvm, KVM_FAST_MMIO_BUS, args);
- if (ret < 0)
- goto fast_fail;
- }
-
- return 0;
-
-fast_fail:
- kvm_deassign_ioeventfd_idx(kvm, bus_idx, args);
-fail:
- return ret;
-}
-
-int
-kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
-{
- if (args->flags & KVM_IOEVENTFD_FLAG_DEASSIGN)
- return kvm_deassign_ioeventfd(kvm, args);
-
- return kvm_assign_ioeventfd(kvm, args);
-}
diff --git a/virt/kvm/irqchip.c b/virt/kvm/irqchip.c
index 3bcc999..3885f42 100644..100755
--- a/virt/kvm/irqchip.c
+++ b/virt/kvm/irqchip.c
@@ -3,6 +3,7 @@
* Copyright (c) 2007, Intel Corporation.
* Copyright 2010 Red Hat, Inc. and/or its affiliates.
* Copyright (c) 2013, Alexander Graf <agraf@suse.de>
+ * Copyright 2019 Google LLC
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
@@ -25,11 +26,7 @@
*/
#include <linux/kvm_host.h>
-#include <linux/slab.h>
-#include <linux/srcu.h>
-#include <linux/export.h>
-#include <trace/events/kvm.h>
-#include "irq.h"
+#include "arch\x86\kvm\irq.h"
int kvm_irq_map_gsi(struct kvm *kvm,
struct kvm_kernel_irq_routing_entry *entries, int gsi)
@@ -38,13 +35,15 @@ int kvm_irq_map_gsi(struct kvm *kvm,
struct kvm_kernel_irq_routing_entry *e;
int n = 0;
- irq_rt = srcu_dereference_check(kvm->irq_routing, &kvm->irq_srcu,
- lockdep_is_held(&kvm->irq_lock));
+ irq_rt = kvm->irq_routing;
+
if (irq_rt && gsi < irq_rt->nr_rt_entries) {
+#define LIST_ENTRY_TYPE_INFO struct kvm_kernel_irq_routing_entry
hlist_for_each_entry(e, &irq_rt->map[gsi], link) {
entries[n] = *e;
++n;
}
+#undef LIST_ENTRY_TYPE_INFO
}
return n;
@@ -62,7 +61,7 @@ int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi)
{
struct kvm_kernel_irq_routing_entry route;
- if (!irqchip_in_kernel(kvm) || (msi->flags & ~KVM_MSI_VALID_DEVID))
+ if (!irqchip_in_kernel(kvm) || (msi->flags & ~GVM_MSI_VALID_DEVID))
return -EINVAL;
route.msi.address_lo = msi->address_lo;
@@ -71,7 +70,7 @@ int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi)
route.msi.flags = msi->flags;
route.msi.devid = msi->devid;
- return kvm_set_msi(&route, kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 1, false);
+ return kvm_set_msi(&route, kvm, GVM_USERSPACE_IRQ_SOURCE_ID, 1, false);
}
/*
@@ -83,11 +82,9 @@ int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi)
int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level,
bool line_status)
{
- struct kvm_kernel_irq_routing_entry irq_set[KVM_NR_IRQCHIPS];
+ struct kvm_kernel_irq_routing_entry irq_set[GVM_NR_IRQCHIPS];
int ret = -1, i, idx;
- trace_kvm_set_irq(irq, level, irq_source_id);
-
/* Not possible to detect if the guest uses the PIC or the
* IOAPIC. So set the bit in both. The guest will ignore
* writes to the unused one.
@@ -120,10 +117,13 @@ static void free_irq_routing_table(struct kvm_irq_routing_table *rt)
struct kvm_kernel_irq_routing_entry *e;
struct hlist_node *n;
+#define LIST_ENTRY_TYPE_INFO struct kvm_kernel_irq_routing_entry
hlist_for_each_entry_safe(e, n, &rt->map[i], link) {
+ n = e->link.next;
hlist_del(&e->link);
kfree(e);
}
+#undef LIST_ENTRY_TYPE_INFO
}
kfree(rt);
@@ -133,7 +133,7 @@ void kvm_free_irq_routing(struct kvm *kvm)
{
/* Called only during vm destruction. Nobody can use the pointer
at this stage */
- struct kvm_irq_routing_table *rt = rcu_access_pointer(kvm->irq_routing);
+ struct kvm_irq_routing_table *rt = kvm->irq_routing;
free_irq_routing_table(rt);
}
@@ -149,18 +149,20 @@ static int setup_routing_entry(struct kvm *kvm,
* Do not allow GSI to be mapped to the same irqchip more than once.
* Allow only one to one mapping between GSI and non-irqchip routing.
*/
+#define LIST_ENTRY_TYPE_INFO struct kvm_kernel_irq_routing_entry
hlist_for_each_entry(ei, &rt->map[ue->gsi], link)
- if (ei->type != KVM_IRQ_ROUTING_IRQCHIP ||
- ue->type != KVM_IRQ_ROUTING_IRQCHIP ||
+ if (ei->type != GVM_IRQ_ROUTING_IRQCHIP ||
+ ue->type != GVM_IRQ_ROUTING_IRQCHIP ||
ue->u.irqchip.irqchip == ei->irqchip.irqchip)
return r;
+#undef LIST_ENTRY_TYPE_INFO
e->gsi = ue->gsi;
e->type = ue->type;
r = kvm_set_routing_entry(kvm, e, ue);
if (r)
goto out;
- if (e->type == KVM_IRQ_ROUTING_IRQCHIP)
+ if (e->type == GVM_IRQ_ROUTING_IRQCHIP)
rt->chip[e->irqchip.irqchip][e->irqchip.pin] = e->gsi;
hlist_add_head(&e->link, &rt->map[e->gsi]);
@@ -169,9 +171,10 @@ out:
return r;
}
-void __attribute__((weak)) kvm_arch_irq_routing_update(struct kvm *kvm)
+void kvm_arch_irq_routing_update_default(struct kvm *kvm)
{
}
+#pragma comment(linker, "/alternatename:kvm_arch_irq_routing_update=kvm_arch_irq_routing_update_default")
int kvm_set_irq_routing(struct kvm *kvm,
const struct kvm_irq_routing_entry *ue,
@@ -184,7 +187,7 @@ int kvm_set_irq_routing(struct kvm *kvm,
int r;
for (i = 0; i < nr; ++i) {
- if (ue[i].gsi >= KVM_MAX_IRQ_ROUTES)
+ if (ue[i].gsi >= GVM_MAX_IRQ_ROUTES)
return -EINVAL;
nr_rt_entries = max(nr_rt_entries, ue[i].gsi);
}
@@ -198,8 +201,8 @@ int kvm_set_irq_routing(struct kvm *kvm,
return -ENOMEM;
new->nr_rt_entries = nr_rt_entries;
- for (i = 0; i < KVM_NR_IRQCHIPS; i++)
- for (j = 0; j < KVM_IRQCHIP_NUM_PINS; j++)
+ for (i = 0; i < GVM_NR_IRQCHIPS; i++)
+ for (j = 0; j < GVM_IRQCHIP_NUM_PINS; j++)
new->chip[i][j] = -1;
for (i = 0; i < nr; ++i) {
@@ -210,8 +213,8 @@ int kvm_set_irq_routing(struct kvm *kvm,
r = -EINVAL;
switch (ue->type) {
- case KVM_IRQ_ROUTING_MSI:
- if (ue->flags & ~KVM_MSI_VALID_DEVID)
+ case GVM_IRQ_ROUTING_MSI:
+ if (ue->flags & ~GVM_MSI_VALID_DEVID)
goto free_entry;
break;
default:
@@ -227,9 +230,7 @@ int kvm_set_irq_routing(struct kvm *kvm,
mutex_lock(&kvm->irq_lock);
old = kvm->irq_routing;
- rcu_assign_pointer(kvm->irq_routing, new);
- kvm_irq_routing_update(kvm);
- kvm_arch_irq_routing_update(kvm);
+ kvm->irq_routing = new;
mutex_unlock(&kvm->irq_lock);
kvm_arch_post_irq_routing_update(kvm);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 5c36034..e521da2 100644..100755
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -6,6 +6,7 @@
*
* Copyright (C) 2006 Qumranet, Inc.
* Copyright 2010 Red Hat, Inc. and/or its affiliates.
+ * Copyright 2019 Google LLC
*
* Authors:
* Avi Kivity <avi@qumranet.com>
@@ -19,68 +20,13 @@
#include <kvm/iodev.h>
#include <linux/kvm_host.h>
-#include <linux/kvm.h>
-#include <linux/module.h>
-#include <linux/errno.h>
-#include <linux/percpu.h>
-#include <linux/mm.h>
-#include <linux/miscdevice.h>
-#include <linux/vmalloc.h>
-#include <linux/reboot.h>
-#include <linux/debugfs.h>
-#include <linux/highmem.h>
-#include <linux/file.h>
-#include <linux/syscore_ops.h>
-#include <linux/cpu.h>
-#include <linux/sched.h>
-#include <linux/cpumask.h>
-#include <linux/smp.h>
-#include <linux/anon_inodes.h>
-#include <linux/profile.h>
-#include <linux/kvm_para.h>
-#include <linux/pagemap.h>
-#include <linux/mman.h>
-#include <linux/swap.h>
-#include <linux/bitops.h>
-#include <linux/spinlock.h>
-#include <linux/compat.h>
-#include <linux/srcu.h>
-#include <linux/hugetlb.h>
-#include <linux/slab.h>
-#include <linux/sort.h>
-#include <linux/bsearch.h>
-
-#include <asm/processor.h>
-#include <asm/io.h>
-#include <asm/ioctl.h>
-#include <asm/uaccess.h>
-#include <asm/pgtable.h>
-
-#include "coalesced_mmio.h"
-#include "async_pf.h"
-#include "vfio.h"
-
-#define CREATE_TRACE_POINTS
-#include <trace/events/kvm.h>
+#include <uapi/linux/kvm.h>
+#include <ntkrutils.h>
+#include <gvm-main.h>
/* Worst case buffer size needed for holding an integer. */
#define ITOA_MAX_LEN 12
-MODULE_AUTHOR("Qumranet");
-MODULE_LICENSE("GPL");
-
-/* Architectures should define their poll value according to the halt latency */
-static unsigned int halt_poll_ns = KVM_HALT_POLL_NS_DEFAULT;
-module_param(halt_poll_ns, uint, S_IRUGO | S_IWUSR);
-
-/* Default doubles per-vcpu halt_poll_ns. */
-static unsigned int halt_poll_ns_grow = 2;
-module_param(halt_poll_ns_grow, uint, S_IRUGO | S_IWUSR);
-
-/* Default resets per-vcpu halt_poll_ns . */
-static unsigned int halt_poll_ns_shrink;
-module_param(halt_poll_ns_shrink, uint, S_IRUGO | S_IWUSR);
-
/*
* Ordering of locks:
*
@@ -90,47 +36,129 @@ module_param(halt_poll_ns_shrink, uint, S_IRUGO | S_IWUSR);
DEFINE_SPINLOCK(kvm_lock);
static DEFINE_RAW_SPINLOCK(kvm_count_lock);
LIST_HEAD(vm_list);
+static LONG64 global_vm_id = -1;
static cpumask_var_t cpus_hardware_enabled;
static int kvm_usage_count;
static atomic_t hardware_enable_failed;
struct kmem_cache *kvm_vcpu_cache;
-EXPORT_SYMBOL_GPL(kvm_vcpu_cache);
-
-static __read_mostly struct preempt_ops kvm_preempt_ops;
-
-struct dentry *kvm_debugfs_dir;
-EXPORT_SYMBOL_GPL(kvm_debugfs_dir);
-static int kvm_debugfs_num_entries;
-static const struct file_operations *stat_fops_per_vm[];
-
-static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl,
- unsigned long arg);
-#ifdef CONFIG_KVM_COMPAT
-static long kvm_vcpu_compat_ioctl(struct file *file, unsigned int ioctl,
- unsigned long arg);
-#endif
static int hardware_enable_all(void);
static void hardware_disable_all(void);
static void kvm_io_bus_destroy(struct kvm_io_bus *bus);
-static void kvm_release_pfn_dirty(kvm_pfn_t pfn);
+void kvm_release_pfn_dirty(kvm_pfn_t pfn);
static void mark_page_dirty_in_slot(struct kvm_memory_slot *memslot, gfn_t gfn);
-__visible bool kvm_rebooting;
-EXPORT_SYMBOL_GPL(kvm_rebooting);
-
-static bool largepages_enabled = true;
+/*
+* bsearch - binary search an array of elements
+* @key: pointer to item being searched for
+* @base: pointer to first element to search
+* @num: number of elements
+* @size: size of each element
+* @cmp: pointer to comparison function
+*
+* This function does a binary search on the given array. The
+* contents of the array should already be in ascending sorted order
+* under the provided comparison function.
+*
+* Note that the key need not have the same type as the elements in
+* the array, e.g. key could be a string and the comparison function
+* could compare the string with the struct's name field. However, if
+* the key and elements in the array are of the same type, you can use
+* the same comparison function for both sort() and bsearch().
+*/
+void *bsearch(const void *key, const void *base, size_t num, size_t size,
+ int(*cmp)(const void *key, const void *elt))
+{
+ size_t start = 0, end = num;
+ int result;
+ const char *__base = base;
+
+ while (start < end) {
+ size_t mid = start + (end - start) / 2;
+
+ result = cmp(key, __base + mid * size);
+ if (result < 0)
+ end = mid;
+ else if (result > 0)
+ start = mid + 1;
+ else
+ return (void *)(__base + mid * size);
+ }
+
+ return NULL;
+}
+
+static void generic_swap(void *a, void *b, int size)
+{
+ char t;
+ char *__a = a, *__b = b;
+
+ do {
+ t = *__a;
+ *__a++ = *__b;
+ *__b++ = t;
+ } while (--size > 0);
+}
-bool kvm_is_reserved_pfn(kvm_pfn_t pfn)
-{
- if (pfn_valid(pfn))
- return PageReserved(pfn_to_page(pfn));
+/**
+* sort - sort an array of elements
+* @base: pointer to data to sort
+* @num: number of elements
+* @size: size of each element
+* @cmp_func: pointer to comparison function
+* @swap_func: pointer to swap function or NULL
+*
+* This function does a heapsort on the given array. You may provide a
+* swap_func function optimized to your element type.
+*
+* Sorting time is O(n log n) both on average and worst-case. While
+* qsort is about 20% faster on average, it suffers from exploitable
+* O(n*n) worst-case behavior and extra memory requirements that make
+* it less suitable for kernel use.
+*/
+
+static void sort(void *base, size_t num, size_t size,
+ int(*cmp_func)(const void *, const void *),
+ void(*swap_func)(void *, void *, int size))
+{
+ /* pre-scale counters for performance */
+ int i = (num / 2 - 1) * size, n = num * size, c, r;
+ char *__base = base;
+
+ if (!swap_func) {
+ swap_func = generic_swap;
+}
+
+ /* heapify */
+ for (; i >= 0; i -= size) {
+ for (r = i; r * 2 + size < n; r = c) {
+ c = r * 2 + size;
+ if (c < n - size &&
+ cmp_func(__base + c, __base + c + size) < 0)
+ c += size;
+ if (cmp_func(__base + r, __base + c) >= 0)
+ break;
+ swap_func(__base + r, __base + c, size);
+ }
+ }
- return true;
+ /* sort */
+ for (i = n - size; i > 0; i -= size) {
+ swap_func(__base, __base + i, size);
+ for (r = 0; r * 2 + size < i; r = c) {
+ c = r * 2 + size;
+ if (c < i - size &&
+ cmp_func(__base + c, __base + c + size) < 0)
+ c += size;
+ if (cmp_func(__base + r, __base + c) >= 0)
+ break;
+ swap_func(__base + r, __base + c, size);
+ }
+ }
}
/*
@@ -140,27 +168,20 @@ int vcpu_load(struct kvm_vcpu *vcpu)
{
int cpu;
- if (mutex_lock_killable(&vcpu->mutex))
- return -EINTR;
+ mutex_lock(&vcpu->mutex);
cpu = get_cpu();
- preempt_notifier_register(&vcpu->preempt_notifier);
kvm_arch_vcpu_load(vcpu, cpu);
- put_cpu();
- return 0;
+ return cpu;
}
-EXPORT_SYMBOL_GPL(vcpu_load);
void vcpu_put(struct kvm_vcpu *vcpu)
{
- preempt_disable();
kvm_arch_vcpu_put(vcpu);
- preempt_notifier_unregister(&vcpu->preempt_notifier);
- preempt_enable();
+ put_cpu();
mutex_unlock(&vcpu->mutex);
}
-EXPORT_SYMBOL_GPL(vcpu_put);
-static void ack_flush(void *_completed)
+void ack_flush(void *_completed)
{
}
@@ -173,7 +194,7 @@ bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req)
zalloc_cpumask_var(&cpus, GFP_ATOMIC);
- me = get_cpu();
+ me = smp_processor_id();
kvm_for_each_vcpu(i, vcpu, kvm) {
kvm_make_request(req, vcpu);
cpu = vcpu->cpu;
@@ -191,19 +212,18 @@ bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req)
smp_call_function_many(cpus, ack_flush, NULL, 1);
else
called = false;
- put_cpu();
free_cpumask_var(cpus);
return called;
}
-#ifndef CONFIG_HAVE_KVM_ARCH_TLB_FLUSH_ALL
void kvm_flush_remote_tlbs(struct kvm *kvm)
{
/*
- * Read tlbs_dirty before setting KVM_REQ_TLB_FLUSH in
+ * Read tlbs_dirty before setting GVM_REQ_TLB_FLUSH in
* kvm_make_all_cpus_request.
*/
- long dirty_count = smp_load_acquire(&kvm->tlbs_dirty);
+ long dirty_count;
+ READ_ONCE(kvm->tlbs_dirty, dirty_count);
/*
* We want to publish modifications to the page tables before reading
@@ -216,66 +236,65 @@ void kvm_flush_remote_tlbs(struct kvm *kvm)
* kvm_make_all_cpus_request() reads vcpu->mode. We reuse that
* barrier here.
*/
- if (kvm_make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH))
+ if (kvm_make_all_cpus_request(kvm, GVM_REQ_TLB_FLUSH))
++kvm->stat.remote_tlb_flush;
cmpxchg(&kvm->tlbs_dirty, dirty_count, 0);
}
-EXPORT_SYMBOL_GPL(kvm_flush_remote_tlbs);
-#endif
void kvm_reload_remote_mmus(struct kvm *kvm)
{
- kvm_make_all_cpus_request(kvm, KVM_REQ_MMU_RELOAD);
+ kvm_make_all_cpus_request(kvm, GVM_REQ_MMU_RELOAD);
}
int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)
{
- struct page *page;
int r;
mutex_init(&vcpu->mutex);
vcpu->cpu = -1;
vcpu->kvm = kvm;
vcpu->vcpu_id = id;
- vcpu->pid = NULL;
- init_swait_queue_head(&vcpu->wq);
- kvm_async_pf_vcpu_init(vcpu);
+ vcpu->thread = NULL;
vcpu->pre_pcpu = -1;
INIT_LIST_HEAD(&vcpu->blocked_vcpu_list);
- page = alloc_page(GFP_KERNEL | __GFP_ZERO);
- if (!page) {
+ /*
+ * KVM(Lin) allocates two seperate pages for vcpu->run and MMIO Emulation page
+ * vcpu->arch.piodata. These two pages will be mapped to userland as continuous
+ * virtual address space. Linux API allows to do that but I did not find a
+ * Windows equivalent API. So keep the physical pages also continuous.
+ */
+ vcpu->run = ExAllocatePoolWithTag(NonPagedPool, 2 * PAGE_SIZE, GVM_POOL_TAG);
+ if (!vcpu->run) {
r = -ENOMEM;
goto fail;
}
- vcpu->run = page_address(page);
- kvm_vcpu_set_in_spin_loop(vcpu, false);
- kvm_vcpu_set_dy_eligible(vcpu, false);
vcpu->preempted = false;
+ KeInitializeEvent(&vcpu->kick_event, SynchronizationEvent, FALSE);
+
r = kvm_arch_vcpu_init(vcpu);
if (r < 0)
goto fail_free_run;
return 0;
fail_free_run:
- free_page((unsigned long)vcpu->run);
+ ExFreePoolWithTag(vcpu->run, GVM_POOL_TAG);
fail:
return r;
}
-EXPORT_SYMBOL_GPL(kvm_vcpu_init);
void kvm_vcpu_uninit(struct kvm_vcpu *vcpu)
{
- put_pid(vcpu->pid);
kvm_arch_vcpu_uninit(vcpu);
- free_page((unsigned long)vcpu->run);
+ if (vcpu->run_userva)
+ __vm_munmap(vcpu->run_userva, 2 * PAGE_SIZE, false);
+ ExFreePoolWithTag(vcpu->run, GVM_POOL_TAG);
}
-EXPORT_SYMBOL_GPL(kvm_vcpu_uninit);
-#if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER)
+#if defined(CONFIG_MMU_NOTIFIER) && defined(GVM_ARCH_WANT_MMU_NOTIFIER)
static inline struct kvm *mmu_notifier_to_kvm(struct mmu_notifier *mn)
{
return container_of(mn, struct kvm, mmu_notifier);
@@ -283,7 +302,7 @@ static inline struct kvm *mmu_notifier_to_kvm(struct mmu_notifier *mn)
static void kvm_mmu_notifier_invalidate_page(struct mmu_notifier *mn,
struct mm_struct *mm,
- unsigned long address)
+ size_t address)
{
struct kvm *kvm = mmu_notifier_to_kvm(mn);
int need_tlb_flush, idx;
@@ -324,7 +343,7 @@ static void kvm_mmu_notifier_invalidate_page(struct mmu_notifier *mn,
static void kvm_mmu_notifier_change_pte(struct mmu_notifier *mn,
struct mm_struct *mm,
- unsigned long address,
+ size_t address,
pte_t pte)
{
struct kvm *kvm = mmu_notifier_to_kvm(mn);
@@ -340,8 +359,8 @@ static void kvm_mmu_notifier_change_pte(struct mmu_notifier *mn,
static void kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
struct mm_struct *mm,
- unsigned long start,
- unsigned long end)
+ size_t start,
+ size_t end)
{
struct kvm *kvm = mmu_notifier_to_kvm(mn);
int need_tlb_flush = 0, idx;
@@ -366,8 +385,8 @@ static void kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn,
struct mm_struct *mm,
- unsigned long start,
- unsigned long end)
+ size_t start,
+ size_t end)
{
struct kvm *kvm = mmu_notifier_to_kvm(mn);
@@ -392,8 +411,8 @@ static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn,
static int kvm_mmu_notifier_clear_flush_young(struct mmu_notifier *mn,
struct mm_struct *mm,
- unsigned long start,
- unsigned long end)
+ size_t start,
+ size_t end)
{
struct kvm *kvm = mmu_notifier_to_kvm(mn);
int young, idx;
@@ -413,8 +432,8 @@ static int kvm_mmu_notifier_clear_flush_young(struct mmu_notifier *mn,
static int kvm_mmu_notifier_clear_young(struct mmu_notifier *mn,
struct mm_struct *mm,
- unsigned long start,
- unsigned long end)
+ size_t start,
+ size_t end)
{
struct kvm *kvm = mmu_notifier_to_kvm(mn);
int young, idx;
@@ -443,7 +462,7 @@ static int kvm_mmu_notifier_clear_young(struct mmu_notifier *mn,
static int kvm_mmu_notifier_test_young(struct mmu_notifier *mn,
struct mm_struct *mm,
- unsigned long address)
+ size_t address)
{
struct kvm *kvm = mmu_notifier_to_kvm(mn);
int young, idx;
@@ -485,16 +504,16 @@ static int kvm_init_mmu_notifier(struct kvm *kvm)
return mmu_notifier_register(&kvm->mmu_notifier, current->mm);
}
-#else /* !(CONFIG_MMU_NOTIFIER && KVM_ARCH_WANT_MMU_NOTIFIER) */
+#else /* !(CONFIG_MMU_NOTIFIER && GVM_ARCH_WANT_MMU_NOTIFIER) */
static int kvm_init_mmu_notifier(struct kvm *kvm)
{
return 0;
}
-#endif /* CONFIG_MMU_NOTIFIER && KVM_ARCH_WANT_MMU_NOTIFIER */
+#endif /* CONFIG_MMU_NOTIFIER && GVM_ARCH_WANT_MMU_NOTIFIER */
-static struct kvm_memslots *kvm_alloc_memslots(void)
+static struct kvm_memslots *kvm_alloc_memslots(struct kvm *kvm)
{
int i;
struct kvm_memslots *slots;
@@ -508,8 +527,10 @@ static struct kvm_memslots *kvm_alloc_memslots(void)
* code of handling generation number wrap-around.
*/
slots->generation = -150;
- for (i = 0; i < KVM_MEM_SLOTS_NUM; i++)
+ for (i = 0; i < GVM_MEM_SLOTS_NUM; i++) {
slots->id_to_index[i] = slots->memslots[i].id = i;
+ slots->memslots[i].kvm = kvm;
+ }
return slots;
}
@@ -529,9 +550,27 @@ static void kvm_destroy_dirty_bitmap(struct kvm_memory_slot *memslot)
static void kvm_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
struct kvm_memory_slot *dont)
{
+ struct pmem_lock *pl;
+ int i;
+
if (!dont || free->dirty_bitmap != dont->dirty_bitmap)
kvm_destroy_dirty_bitmap(free);
+ if (!dont || free->pmem_lock != dont->pmem_lock)
+ if (free->pmem_lock) {
+ for (i = 0; i < free->npages; i++) {
+ pl = &free->pmem_lock[i];
+ if (!pl->lock_mdl)
+ continue;
+ spin_lock(&pl->lock);
+ MmUnlockPages(pl->lock_mdl);
+ IoFreeMdl(pl->lock_mdl);
+ pl->lock_mdl = NULL;
+ spin_unlock(&pl->lock);
+ }
+ kfree(free->pmem_lock);
+ }
+
kvm_arch_free_memslot(kvm, free, dont);
free->npages = 0;
@@ -550,61 +589,7 @@ static void kvm_free_memslots(struct kvm *kvm, struct kvm_memslots *slots)
kvfree(slots);
}
-static void kvm_destroy_vm_debugfs(struct kvm *kvm)
-{
- int i;
-
- if (!kvm->debugfs_dentry)
- return;
-
- debugfs_remove_recursive(kvm->debugfs_dentry);
-
- if (kvm->debugfs_stat_data) {
- for (i = 0; i < kvm_debugfs_num_entries; i++)
- kfree(kvm->debugfs_stat_data[i]);
- kfree(kvm->debugfs_stat_data);
- }
-}
-
-static int kvm_create_vm_debugfs(struct kvm *kvm, int fd)
-{
- char dir_name[ITOA_MAX_LEN * 2];
- struct kvm_stat_data *stat_data;
- struct kvm_stats_debugfs_item *p;
-
- if (!debugfs_initialized())
- return 0;
-
- snprintf(dir_name, sizeof(dir_name), "%d-%d", task_pid_nr(current), fd);
- kvm->debugfs_dentry = debugfs_create_dir(dir_name,
- kvm_debugfs_dir);
- if (!kvm->debugfs_dentry)
- return -ENOMEM;
-
- kvm->debugfs_stat_data = kcalloc(kvm_debugfs_num_entries,
- sizeof(*kvm->debugfs_stat_data),
- GFP_KERNEL);
- if (!kvm->debugfs_stat_data)
- return -ENOMEM;
-
- for (p = debugfs_entries; p->name; p++) {
- stat_data = kzalloc(sizeof(*stat_data), GFP_KERNEL);
- if (!stat_data)
- return -ENOMEM;
-
- stat_data->kvm = kvm;
- stat_data->offset = p->offset;
- kvm->debugfs_stat_data[p - debugfs_entries] = stat_data;
- if (!debugfs_create_file(p->name, 0444,
- kvm->debugfs_dentry,
- stat_data,
- stat_fops_per_vm[p->kind]))
- return -ENOMEM;
- }
- return 0;
-}
-
-static struct kvm *kvm_create_vm(unsigned long type)
+static struct kvm *kvm_create_vm(size_t type)
{
int r, i;
struct kvm *kvm = kvm_arch_alloc_vm();
@@ -613,14 +598,12 @@ static struct kvm *kvm_create_vm(unsigned long type)
return ERR_PTR(-ENOMEM);
spin_lock_init(&kvm->mmu_lock);
- atomic_inc(&current->mm->mm_count);
- kvm->mm = current->mm;
- kvm_eventfd_init(kvm);
+ kvm->process = IoGetCurrentProcess();
+ kvm->vm_id = InterlockedIncrement64(&global_vm_id);
mutex_init(&kvm->lock);
mutex_init(&kvm->irq_lock);
mutex_init(&kvm->slots_lock);
atomic_set(&kvm->users_count, 1);
- INIT_LIST_HEAD(&kvm->devices);
r = kvm_arch_init_vm(kvm, type);
if (r)
@@ -630,15 +613,9 @@ static struct kvm *kvm_create_vm(unsigned long type)
if (r)
goto out_err_no_disable;
-#ifdef CONFIG_HAVE_KVM_IRQFD
- INIT_HLIST_HEAD(&kvm->irq_ack_notifier_list);
-#endif
-
- BUILD_BUG_ON(KVM_MEM_SLOTS_NUM > SHRT_MAX);
-
r = -ENOMEM;
- for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
- kvm->memslots[i] = kvm_alloc_memslots();
+ for (i = 0; i < GVM_ADDRESS_SPACE_NUM; i++) {
+ kvm->memslots[i] = kvm_alloc_memslots(kvm);
if (!kvm->memslots[i])
goto out_err_no_srcu;
}
@@ -647,7 +624,7 @@ static struct kvm *kvm_create_vm(unsigned long type)
goto out_err_no_srcu;
if (init_srcu_struct(&kvm->irq_srcu))
goto out_err_no_irq_srcu;
- for (i = 0; i < KVM_NR_BUSES; i++) {
+ for (i = 0; i < GVM_NR_BUSES; i++) {
kvm->buses[i] = kzalloc(sizeof(struct kvm_io_bus),
GFP_KERNEL);
if (!kvm->buses[i])
@@ -662,8 +639,6 @@ static struct kvm *kvm_create_vm(unsigned long type)
list_add(&kvm->vm_list, &vm_list);
spin_unlock(&kvm_lock);
- preempt_notifier_inc();
-
return kvm;
out_err:
@@ -673,12 +648,11 @@ out_err_no_irq_srcu:
out_err_no_srcu:
hardware_disable_all();
out_err_no_disable:
- for (i = 0; i < KVM_NR_BUSES; i++)
+ for (i = 0; i < GVM_NR_BUSES; i++)
kfree(kvm->buses[i]);
- for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++)
+ for (i = 0; i < GVM_ADDRESS_SPACE_NUM; i++)
kvm_free_memslots(kvm, kvm->memslots[i]);
kvm_arch_free_vm(kvm);
- mmdrop(current->mm);
return ERR_PTR(r);
}
@@ -686,7 +660,7 @@ out_err_no_disable:
* Avoid using vmalloc for a small buffer.
* Should not be used when the size is statically known.
*/
-void *kvm_kvzalloc(unsigned long size)
+void *kvm_kvzalloc(size_t size)
{
if (size > PAGE_SIZE)
return vzalloc(size);
@@ -694,71 +668,43 @@ void *kvm_kvzalloc(unsigned long size)
return kzalloc(size, GFP_KERNEL);
}
-static void kvm_destroy_devices(struct kvm *kvm)
-{
- struct kvm_device *dev, *tmp;
-
- /*
- * We do not need to take the kvm->lock here, because nobody else
- * has a reference to the struct kvm at this point and therefore
- * cannot access the devices list anyhow.
- */
- list_for_each_entry_safe(dev, tmp, &kvm->devices, vm_node) {
- list_del(&dev->vm_node);
- dev->ops->destroy(dev);
- }
-}
-
static void kvm_destroy_vm(struct kvm *kvm)
{
int i;
- struct mm_struct *mm = kvm->mm;
- kvm_destroy_vm_debugfs(kvm);
- kvm_arch_sync_events(kvm);
spin_lock(&kvm_lock);
list_del(&kvm->vm_list);
spin_unlock(&kvm_lock);
kvm_free_irq_routing(kvm);
- for (i = 0; i < KVM_NR_BUSES; i++)
+ for (i = 0; i < GVM_NR_BUSES; i++)
kvm_io_bus_destroy(kvm->buses[i]);
- kvm_coalesced_mmio_free(kvm);
-#if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER)
- mmu_notifier_unregister(&kvm->mmu_notifier, kvm->mm);
-#else
kvm_arch_flush_shadow_all(kvm);
-#endif
kvm_arch_destroy_vm(kvm);
- kvm_destroy_devices(kvm);
- for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++)
+ for (i = 0; i < GVM_ADDRESS_SPACE_NUM; i++)
kvm_free_memslots(kvm, kvm->memslots[i]);
+ kfree(kvm->rp_bitmap);
cleanup_srcu_struct(&kvm->irq_srcu);
cleanup_srcu_struct(&kvm->srcu);
kvm_arch_free_vm(kvm);
- preempt_notifier_dec();
hardware_disable_all();
- mmdrop(mm);
}
void kvm_get_kvm(struct kvm *kvm)
{
atomic_inc(&kvm->users_count);
}
-EXPORT_SYMBOL_GPL(kvm_get_kvm);
void kvm_put_kvm(struct kvm *kvm)
{
if (atomic_dec_and_test(&kvm->users_count))
kvm_destroy_vm(kvm);
}
-EXPORT_SYMBOL_GPL(kvm_put_kvm);
-static int kvm_vm_release(struct inode *inode, struct file *filp)
+NTSTATUS kvm_vm_release(PDEVICE_OBJECT pDevObj, PIRP pIrp)
{
- struct kvm *kvm = filp->private_data;
-
- kvm_irqfd_release(kvm);
+ struct gvm_device_extension *devext = pDevObj->DeviceExtension;
+ struct kvm *kvm = devext->PrivData;
kvm_put_kvm(kvm);
return 0;
@@ -770,7 +716,7 @@ static int kvm_vm_release(struct inode *inode, struct file *filp)
*/
static int kvm_create_dirty_bitmap(struct kvm_memory_slot *memslot)
{
- unsigned long dirty_bytes = 2 * kvm_dirty_bitmap_bytes(memslot);
+ size_t dirty_bytes = 2 * kvm_dirty_bitmap_bytes(memslot);
memslot->dirty_bitmap = kvm_kvzalloc(dirty_bytes);
if (!memslot->dirty_bitmap)
@@ -802,7 +748,7 @@ static void update_memslots(struct kvm_memslots *slots,
slots->used_slots++;
}
- while (i < KVM_MEM_SLOTS_NUM - 1 &&
+ while (i < GVM_MEM_SLOTS_NUM - 1 &&
new->base_gfn <= mslots[i + 1].base_gfn) {
if (!mslots[i + 1].npages)
break;
@@ -836,10 +782,10 @@ static void update_memslots(struct kvm_memslots *slots,
static int check_memory_region_flags(const struct kvm_userspace_memory_region *mem)
{
- u32 valid_flags = KVM_MEM_LOG_DIRTY_PAGES;
+ u32 valid_flags = GVM_MEM_LOG_DIRTY_PAGES;
-#ifdef __KVM_HAVE_READONLY_MEM
- valid_flags |= KVM_MEM_READONLY;
+#ifdef __GVM_HAVE_READONLY_MEM
+ valid_flags |= GVM_MEM_READONLY;
#endif
if (mem->flags & ~valid_flags)
@@ -888,7 +834,7 @@ int __kvm_set_memory_region(struct kvm *kvm,
{
int r;
gfn_t base_gfn;
- unsigned long npages;
+ size_t npages;
struct kvm_memory_slot *slot;
struct kvm_memory_slot old, new;
struct kvm_memslots *slots = NULL, *old_memslots;
@@ -908,14 +854,7 @@ int __kvm_set_memory_region(struct kvm *kvm,
goto out;
if (mem->guest_phys_addr & (PAGE_SIZE - 1))
goto out;
- /* We can read the guest memory with __xxx_user() later on. */
- if ((id < KVM_USER_MEM_SLOTS) &&
- ((mem->userspace_addr & (PAGE_SIZE - 1)) ||
- !access_ok(VERIFY_WRITE,
- (void __user *)(unsigned long)mem->userspace_addr,
- mem->memory_size)))
- goto out;
- if (as_id >= KVM_ADDRESS_SPACE_NUM || id >= KVM_MEM_SLOTS_NUM)
+ if (as_id >= GVM_ADDRESS_SPACE_NUM || id >= GVM_MEM_SLOTS_NUM)
goto out;
if (mem->guest_phys_addr + mem->memory_size < mem->guest_phys_addr)
goto out;
@@ -924,7 +863,7 @@ int __kvm_set_memory_region(struct kvm *kvm,
base_gfn = mem->guest_phys_addr >> PAGE_SHIFT;
npages = mem->memory_size >> PAGE_SHIFT;
- if (npages > KVM_MEM_MAX_NR_PAGES)
+ if (npages > GVM_MEM_MAX_NR_PAGES)
goto out;
new = old = *slot;
@@ -936,17 +875,17 @@ int __kvm_set_memory_region(struct kvm *kvm,
if (npages) {
if (!old.npages)
- change = KVM_MR_CREATE;
+ change = GVM_MR_CREATE;
else { /* Modify an existing slot. */
if ((mem->userspace_addr != old.userspace_addr) ||
(npages != old.npages) ||
- ((new.flags ^ old.flags) & KVM_MEM_READONLY))
+ ((new.flags ^ old.flags) & GVM_MEM_READONLY))
goto out;
if (base_gfn != old.base_gfn)
- change = KVM_MR_MOVE;
+ change = GVM_MR_MOVE;
else if (new.flags != old.flags)
- change = KVM_MR_FLAGS_ONLY;
+ change = GVM_MR_FLAGS_ONLY;
else { /* Nothing to change. */
r = 0;
goto out;
@@ -956,16 +895,16 @@ int __kvm_set_memory_region(struct kvm *kvm,
if (!old.npages)
goto out;
- change = KVM_MR_DELETE;
+ change = GVM_MR_DELETE;
new.base_gfn = 0;
new.flags = 0;
}
- if ((change == KVM_MR_CREATE) || (change == KVM_MR_MOVE)) {
+ if ((change == GVM_MR_CREATE) || (change == GVM_MR_MOVE)) {
/* Check for overlaps */
r = -EEXIST;
kvm_for_each_memslot(slot, __kvm_memslots(kvm, as_id)) {
- if ((slot->id >= KVM_USER_MEM_SLOTS) ||
+ if ((slot->id >= GVM_USER_MEM_SLOTS) ||
(slot->id == id))
continue;
if (!((base_gfn + npages <= slot->base_gfn) ||
@@ -975,36 +914,43 @@ int __kvm_set_memory_region(struct kvm *kvm,
}
/* Free page dirty bitmap if unneeded */
- if (!(new.flags & KVM_MEM_LOG_DIRTY_PAGES))
+ if (!(new.flags & GVM_MEM_LOG_DIRTY_PAGES))
new.dirty_bitmap = NULL;
r = -ENOMEM;
- if (change == KVM_MR_CREATE) {
+ if (change == GVM_MR_CREATE) {
new.userspace_addr = mem->userspace_addr;
if (kvm_arch_create_memslot(kvm, &new, npages))
goto out_free;
+
}
/* Allocate page dirty bitmap if needed */
- if ((new.flags & KVM_MEM_LOG_DIRTY_PAGES) && !new.dirty_bitmap) {
+ if ((new.flags & GVM_MEM_LOG_DIRTY_PAGES) && !new.dirty_bitmap) {
if (kvm_create_dirty_bitmap(&new) < 0)
goto out_free;
}
+ /* Allocate physical page pinning data structure */
+ if (!new.pmem_lock) {
+ new.pmem_lock =
+ kzalloc(sizeof(struct pmem_lock) * new.npages, GFP_KERNEL);
+ if (!new.pmem_lock)
+ goto out_free;
+ }
+
slots = kvm_kvzalloc(sizeof(struct kvm_memslots));
if (!slots)
goto out_free;
memcpy(slots, __kvm_memslots(kvm, as_id), sizeof(struct kvm_memslots));
- if ((change == KVM_MR_DELETE) || (change == KVM_MR_MOVE)) {
+ if ((change == GVM_MR_DELETE) || (change == GVM_MR_MOVE)) {
slot = id_to_memslot(slots, id);
- slot->flags |= KVM_MEMSLOT_INVALID;
+ slot->flags |= GVM_MEMSLOT_INVALID;
old_memslots = install_new_memslots(kvm, as_id, slots);
- /* slot was deleted or moved, clear iommu mapping */
- kvm_iommu_unmap_pages(kvm, &old);
/* From this point no new shadow pages pointing to a deleted,
* or moved, memslot will be created.
*
@@ -1027,8 +973,9 @@ int __kvm_set_memory_region(struct kvm *kvm,
goto out_slots;
/* actual memory is freed via old in kvm_free_memslot below */
- if (change == KVM_MR_DELETE) {
+ if (change == GVM_MR_DELETE) {
new.dirty_bitmap = NULL;
+ new.pmem_lock = NULL;
memset(&new.arch, 0, sizeof(new.arch));
}
@@ -1040,20 +987,6 @@ int __kvm_set_memory_region(struct kvm *kvm,
kvm_free_memslot(kvm, &old, &new);
kvfree(old_memslots);
- /*
- * IOMMU mapping: New slots need to be mapped. Old slots need to be
- * un-mapped and re-mapped if their base changes. Since base change
- * unmapping is handled above with slot deletion, mapping alone is
- * needed here. Anything else the iommu might care about for existing
- * slots (size changes, userspace addr changes and read-only flag
- * changes) is disallowed above, so any other attribute changes getting
- * here can be skipped.
- */
- if ((change == KVM_MR_CREATE) || (change == KVM_MR_MOVE)) {
- r = kvm_iommu_map_pages(kvm, &new);
- return r;
- }
-
return 0;
out_slots:
@@ -1063,7 +996,6 @@ out_free:
out:
return r;
}
-EXPORT_SYMBOL_GPL(__kvm_set_memory_region);
int kvm_set_memory_region(struct kvm *kvm,
const struct kvm_userspace_memory_region *mem)
@@ -1075,12 +1007,11 @@ int kvm_set_memory_region(struct kvm *kvm,
mutex_unlock(&kvm->slots_lock);
return r;
}
-EXPORT_SYMBOL_GPL(kvm_set_memory_region);
static int kvm_vm_ioctl_set_memory_region(struct kvm *kvm,
struct kvm_userspace_memory_region *mem)
{
- if ((u16)mem->slot >= KVM_USER_MEM_SLOTS)
+ if ((u16)mem->slot >= GVM_USER_MEM_SLOTS)
return -EINVAL;
return kvm_set_memory_region(kvm, mem);
@@ -1092,13 +1023,13 @@ int kvm_get_dirty_log(struct kvm *kvm,
struct kvm_memslots *slots;
struct kvm_memory_slot *memslot;
int r, i, as_id, id;
- unsigned long n;
- unsigned long any = 0;
+ size_t n;
+ size_t any = 0;
r = -EINVAL;
as_id = log->slot >> 16;
id = (u16)log->slot;
- if (as_id >= KVM_ADDRESS_SPACE_NUM || id >= KVM_USER_MEM_SLOTS)
+ if (as_id >= GVM_ADDRESS_SPACE_NUM || id >= GVM_USER_MEM_SLOTS)
goto out;
slots = __kvm_memslots(kvm, as_id);
@@ -1113,7 +1044,7 @@ int kvm_get_dirty_log(struct kvm *kvm,
any = memslot->dirty_bitmap[i];
r = -EFAULT;
- if (copy_to_user(log->dirty_bitmap, memslot->dirty_bitmap, n))
+ if ( __copy_to_user(log->dirty_bitmap, memslot->dirty_bitmap, n))
goto out;
if (any)
@@ -1123,9 +1054,7 @@ int kvm_get_dirty_log(struct kvm *kvm,
out:
return r;
}
-EXPORT_SYMBOL_GPL(kvm_get_dirty_log);
-#ifdef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT
/**
* kvm_get_dirty_log_protect - get a snapshot of dirty pages, and if any pages
* are dirty write protect them for next write.
@@ -1154,14 +1083,14 @@ int kvm_get_dirty_log_protect(struct kvm *kvm,
struct kvm_memslots *slots;
struct kvm_memory_slot *memslot;
int r, i, as_id, id;
- unsigned long n;
- unsigned long *dirty_bitmap;
- unsigned long *dirty_bitmap_buffer;
+ size_t n;
+ size_t *dirty_bitmap;
+ size_t *dirty_bitmap_buffer;
r = -EINVAL;
as_id = log->slot >> 16;
id = (u16)log->slot;
- if (as_id >= KVM_ADDRESS_SPACE_NUM || id >= KVM_USER_MEM_SLOTS)
+ if (as_id >= GVM_ADDRESS_SPACE_NUM || id >= GVM_USER_MEM_SLOTS)
goto out;
slots = __kvm_memslots(kvm, as_id);
@@ -1174,13 +1103,13 @@ int kvm_get_dirty_log_protect(struct kvm *kvm,
n = kvm_dirty_bitmap_bytes(memslot);
- dirty_bitmap_buffer = dirty_bitmap + n / sizeof(long);
+ dirty_bitmap_buffer = dirty_bitmap + n / sizeof(size_t);
memset(dirty_bitmap_buffer, 0, n);
spin_lock(&kvm->mmu_lock);
*is_dirty = false;
- for (i = 0; i < n / sizeof(long); i++) {
- unsigned long mask;
+ for (i = 0; i < n / sizeof(size_t); i++) {
+ size_t mask;
gfn_t offset;
if (!dirty_bitmap[i])
@@ -1201,32 +1130,18 @@ int kvm_get_dirty_log_protect(struct kvm *kvm,
spin_unlock(&kvm->mmu_lock);
r = -EFAULT;
- if (copy_to_user(log->dirty_bitmap, dirty_bitmap_buffer, n))
+ if ( __copy_to_user(log->dirty_bitmap, dirty_bitmap_buffer, n))
goto out;
r = 0;
out:
return r;
}
-EXPORT_SYMBOL_GPL(kvm_get_dirty_log_protect);
-#endif
-
-bool kvm_largepages_enabled(void)
-{
- return largepages_enabled;
-}
-
-void kvm_disable_largepages(void)
-{
- largepages_enabled = false;
-}
-EXPORT_SYMBOL_GPL(kvm_disable_largepages);
struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn)
{
return __gfn_to_memslot(kvm_memslots(kvm), gfn);
}
-EXPORT_SYMBOL_GPL(gfn_to_memslot);
struct kvm_memory_slot *kvm_vcpu_gfn_to_memslot(struct kvm_vcpu *vcpu, gfn_t gfn)
{
@@ -1237,51 +1152,31 @@ bool kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn)
{
struct kvm_memory_slot *memslot = gfn_to_memslot(kvm, gfn);
- if (!memslot || memslot->id >= KVM_USER_MEM_SLOTS ||
- memslot->flags & KVM_MEMSLOT_INVALID)
+ if (!memslot || memslot->id >= GVM_USER_MEM_SLOTS ||
+ memslot->flags & GVM_MEMSLOT_INVALID)
return false;
return true;
}
-EXPORT_SYMBOL_GPL(kvm_is_visible_gfn);
-unsigned long kvm_host_page_size(struct kvm *kvm, gfn_t gfn)
+size_t kvm_host_page_size(struct kvm *kvm, gfn_t gfn)
{
- struct vm_area_struct *vma;
- unsigned long addr, size;
-
- size = PAGE_SIZE;
-
- addr = gfn_to_hva(kvm, gfn);
- if (kvm_is_error_hva(addr))
- return PAGE_SIZE;
-
- down_read(&current->mm->mmap_sem);
- vma = find_vma(current->mm, addr);
- if (!vma)
- goto out;
-
- size = vma_kernel_pagesize(vma);
-
-out:
- up_read(&current->mm->mmap_sem);
-
- return size;
+ return PAGE_SIZE;
}
static bool memslot_is_readonly(struct kvm_memory_slot *slot)
{
- return slot->flags & KVM_MEM_READONLY;
+ return slot->flags & GVM_MEM_READONLY;
}
-static unsigned long __gfn_to_hva_many(struct kvm_memory_slot *slot, gfn_t gfn,
+static size_t __gfn_to_hva_many(struct kvm_memory_slot *slot, gfn_t gfn,
gfn_t *nr_pages, bool write)
{
- if (!slot || slot->flags & KVM_MEMSLOT_INVALID)
- return KVM_HVA_ERR_BAD;
+ if (!slot || slot->flags & GVM_MEMSLOT_INVALID)
+ return GVM_HVA_ERR_BAD;
if (memslot_is_readonly(slot) && write)
- return KVM_HVA_ERR_RO_BAD;
+ return GVM_HVA_ERR_RO_BAD;
if (nr_pages)
*nr_pages = slot->npages - (gfn - slot->base_gfn);
@@ -1289,39 +1184,36 @@ static unsigned long __gfn_to_hva_many(struct kvm_memory_slot *slot, gfn_t gfn,
return __gfn_to_hva_memslot(slot, gfn);
}
-static unsigned long gfn_to_hva_many(struct kvm_memory_slot *slot, gfn_t gfn,
+static size_t gfn_to_hva_many(struct kvm_memory_slot *slot, gfn_t gfn,
gfn_t *nr_pages)
{
return __gfn_to_hva_many(slot, gfn, nr_pages, true);
}
-unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot,
+size_t gfn_to_hva_memslot(struct kvm_memory_slot *slot,
gfn_t gfn)
{
return gfn_to_hva_many(slot, gfn, NULL);
}
-EXPORT_SYMBOL_GPL(gfn_to_hva_memslot);
-unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn)
+size_t gfn_to_hva(struct kvm *kvm, gfn_t gfn)
{
return gfn_to_hva_many(gfn_to_memslot(kvm, gfn), gfn, NULL);
}
-EXPORT_SYMBOL_GPL(gfn_to_hva);
-unsigned long kvm_vcpu_gfn_to_hva(struct kvm_vcpu *vcpu, gfn_t gfn)
+size_t kvm_vcpu_gfn_to_hva(struct kvm_vcpu *vcpu, gfn_t gfn)
{
return gfn_to_hva_many(kvm_vcpu_gfn_to_memslot(vcpu, gfn), gfn, NULL);
}
-EXPORT_SYMBOL_GPL(kvm_vcpu_gfn_to_hva);
/*
* If writable is set to false, the hva returned by this function is only
* allowed to be read.
*/
-unsigned long gfn_to_hva_memslot_prot(struct kvm_memory_slot *slot,
+size_t gfn_to_hva_memslot_prot(struct kvm_memory_slot *slot,
gfn_t gfn, bool *writable)
{
- unsigned long hva = __gfn_to_hva_many(slot, gfn, NULL, false);
+ size_t hva = __gfn_to_hva_many(slot, gfn, NULL, false);
if (!kvm_is_error_hva(hva) && writable)
*writable = !memslot_is_readonly(slot);
@@ -1329,177 +1221,39 @@ unsigned long gfn_to_hva_memslot_prot(struct kvm_memory_slot *slot,
return hva;
}
-unsigned long gfn_to_hva_prot(struct kvm *kvm, gfn_t gfn, bool *writable)
+size_t gfn_to_hva_prot(struct kvm *kvm, gfn_t gfn, bool *writable)
{
struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn);
return gfn_to_hva_memslot_prot(slot, gfn, writable);
}
-unsigned long kvm_vcpu_gfn_to_hva_prot(struct kvm_vcpu *vcpu, gfn_t gfn, bool *writable)
+size_t kvm_vcpu_gfn_to_hva_prot(struct kvm_vcpu *vcpu, gfn_t gfn, bool *writable)
{
struct kvm_memory_slot *slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
return gfn_to_hva_memslot_prot(slot, gfn, writable);
}
-static int get_user_page_nowait(unsigned long start, int write,
- struct page **page)
-{
- int flags = FOLL_NOWAIT | FOLL_HWPOISON;
-
- if (write)
- flags |= FOLL_WRITE;
-
- return get_user_pages(start, 1, flags, page, NULL);
-}
-
-static inline int check_user_page_hwpoison(unsigned long addr)
-{
- int rc, flags = FOLL_HWPOISON | FOLL_WRITE;
-
- rc = get_user_pages(addr, 1, flags, NULL, NULL);
- return rc == -EHWPOISON;
-}
-
/*
* The atomic path to get the writable pfn which will be stored in @pfn,
* true indicates success, otherwise false is returned.
*/
-static bool hva_to_pfn_fast(unsigned long addr, bool atomic, bool *async,
+static bool __hva_to_pfn(size_t addr,
bool write_fault, bool *writable, kvm_pfn_t *pfn)
{
- struct page *page[1];
- int npages;
-
- if (!(async || atomic))
- return false;
-
- /*
- * Fast pin a writable pfn only if it is a write fault request
- * or the caller allows to map a writable pfn for a read fault
- * request.
- */
- if (!(write_fault || writable))
- return false;
-
- npages = __get_user_pages_fast(addr, 1, 1, page);
- if (npages == 1) {
- *pfn = page_to_pfn(page[0]);
-
- if (writable)
- *writable = true;
- return true;
- }
-
- return false;
-}
-
-/*
- * The slow path to get the pfn of the specified host virtual address,
- * 1 indicates success, -errno is returned if error is detected.
- */
-static int hva_to_pfn_slow(unsigned long addr, bool *async, bool write_fault,
- bool *writable, kvm_pfn_t *pfn)
-{
- struct page *page[1];
- int npages = 0;
-
- might_sleep();
-
if (writable)
*writable = write_fault;
- if (async) {
- down_read(&current->mm->mmap_sem);
- npages = get_user_page_nowait(addr, write_fault, page);
- up_read(&current->mm->mmap_sem);
- } else {
- unsigned int flags = FOLL_TOUCH | FOLL_HWPOISON;
-
- if (write_fault)
- flags |= FOLL_WRITE;
-
- npages = __get_user_pages_unlocked(current, current->mm, addr, 1,
- page, flags);
- }
- if (npages != 1)
- return npages;
-
/* map read fault as writable if possible */
- if (unlikely(!write_fault) && writable) {
- struct page *wpage[1];
-
- npages = __get_user_pages_fast(addr, 1, 1, wpage);
- if (npages == 1) {
- *writable = true;
- put_page(page[0]);
- page[0] = wpage[0];
- }
+ if (!write_fault && writable)
+ *writable = true;
- npages = 1;
- }
- *pfn = page_to_pfn(page[0]);
- return npages;
-}
-
-static bool vma_is_valid(struct vm_area_struct *vma, bool write_fault)
-{
- if (unlikely(!(vma->vm_flags & VM_READ)))
- return false;
-
- if (write_fault && (unlikely(!(vma->vm_flags & VM_WRITE))))
- return false;
+ *pfn = __pa((void *)addr) >> PAGE_SHIFT;
return true;
}
-static int hva_to_pfn_remapped(struct vm_area_struct *vma,
- unsigned long addr, bool *async,
- bool write_fault, kvm_pfn_t *p_pfn)
-{
- unsigned long pfn;
- int r;
-
- r = follow_pfn(vma, addr, &pfn);
- if (r) {
- /*
- * get_user_pages fails for VM_IO and VM_PFNMAP vmas and does
- * not call the fault handler, so do it here.
- */
- bool unlocked = false;
- r = fixup_user_fault(current, current->mm, addr,
- (write_fault ? FAULT_FLAG_WRITE : 0),
- &unlocked);
- if (unlocked)
- return -EAGAIN;
- if (r)
- return r;
-
- r = follow_pfn(vma, addr, &pfn);
- if (r)
- return r;
-
- }
-
-
- /*
- * Get a reference here because callers of *hva_to_pfn* and
- * *gfn_to_pfn* ultimately call kvm_release_pfn_clean on the
- * returned pfn. This is only needed if the VMA has VM_MIXEDMAP
- * set, but the kvm_get_pfn/kvm_release_pfn_clean pair will
- * simply do nothing for reserved pfns.
- *
- * Whoever called remap_pfn_range is also going to call e.g.
- * unmap_mapping_range before the underlying pages are freed,
- * causing a call to our MMU notifier.
- */
- kvm_get_pfn(pfn);
-
- *p_pfn = pfn;
- return 0;
-}
-
/*
* Pin guest page in memory and return its pfn.
* @addr: host virtual address which maps memory to the guest
@@ -1514,70 +1268,67 @@ static int hva_to_pfn_remapped(struct vm_area_struct *vma,
* 2): @write_fault = false && @writable, @writable will tell the caller
* whether the mapping is writable.
*/
-static kvm_pfn_t hva_to_pfn(unsigned long addr, bool atomic, bool *async,
+static kvm_pfn_t hva_to_pfn(size_t addr,
bool write_fault, bool *writable)
{
- struct vm_area_struct *vma;
kvm_pfn_t pfn = 0;
- int npages, r;
-
- /* we can do it either atomically or asynchronously, not both */
- BUG_ON(atomic && async);
- if (hva_to_pfn_fast(addr, atomic, async, write_fault, writable, &pfn))
+ if (__hva_to_pfn(addr, write_fault, writable, &pfn))
return pfn;
- if (atomic)
- return KVM_PFN_ERR_FAULT;
+ return GVM_PFN_ERR_FAULT;
+}
- npages = hva_to_pfn_slow(addr, async, write_fault, writable, &pfn);
- if (npages == 1)
- return pfn;
+static int gvm_pin_user_memory(size_t addr, struct pmem_lock *pmem_lock)
+{
+ pmem_lock->lock_mdl = IoAllocateMdl((PVOID)addr, PAGE_SIZE,
+ FALSE, FALSE, NULL);
+ if (!pmem_lock->lock_mdl)
+ return -1;
+ MmProbeAndLockPages(pmem_lock->lock_mdl, UserMode,
+ IoWriteAccess);
+ return 0;
+}
- down_read(&current->mm->mmap_sem);
- if (npages == -EHWPOISON ||
- (!async && check_user_page_hwpoison(addr))) {
- pfn = KVM_PFN_ERR_HWPOISON;
- goto exit;
- }
+static int kvm_is_ram_prot(struct kvm* kvm, gfn_t gfn);
+static int kvm_should_ram_prot_exit(struct kvm *kvm, gfn_t gfn)
+{
+ struct kvm_vcpu* vcpu;
-retry:
- vma = find_vma_intersection(current->mm, addr, addr + 1);
-
- if (vma == NULL)
- pfn = KVM_PFN_ERR_FAULT;
- else if (vma->vm_flags & (VM_IO | VM_PFNMAP)) {
- r = hva_to_pfn_remapped(vma, addr, async, write_fault, &pfn);
- if (r == -EAGAIN)
- goto retry;
- if (r < 0)
- pfn = KVM_PFN_ERR_FAULT;
- } else {
- if (async && vma_is_valid(vma, write_fault))
- *async = true;
- pfn = KVM_PFN_ERR_FAULT;
- }
-exit:
- up_read(&current->mm->mmap_sem);
- return pfn;
+ if (!kvm_is_ram_prot(kvm, gfn))
+ return 0;
+
+ /*
+ * We assume get user pages always run
+ * in the vcpu thread requesting that
+ * page.
+ */
+ vcpu = kvm_get_vcpu_by_thread(kvm, PsGetCurrentThread());
+ vcpu->run->exit_reason = GVM_EXIT_RAM_PROT;
+ vcpu->run->rp.gfn = gfn;
+ return 1;
}
kvm_pfn_t __gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn,
bool atomic, bool *async, bool write_fault,
bool *writable)
{
- unsigned long addr = __gfn_to_hva_many(slot, gfn, NULL, write_fault);
+ size_t addr = __gfn_to_hva_many(slot, gfn, NULL, write_fault);
+ struct pmem_lock *pmem_lock = NULL;
+
+ /* We removed async pafe fault support for gvm*/
+ BUG_ON(async);
- if (addr == KVM_HVA_ERR_RO_BAD) {
+ if (addr == GVM_HVA_ERR_RO_BAD) {
if (writable)
*writable = false;
- return KVM_PFN_ERR_RO_FAULT;
+ return GVM_PFN_ERR_RO_FAULT;
}
if (kvm_is_error_hva(addr)) {
if (writable)
*writable = false;
- return KVM_PFN_NOSLOT;
+ return GVM_PFN_NOSLOT;
}
/* Do not map writable pfn in the readonly memslot. */
@@ -1586,10 +1337,22 @@ kvm_pfn_t __gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn,
writable = NULL;
}
- return hva_to_pfn(addr, atomic, async, write_fault,
- writable);
+ if (kvm_should_ram_prot_exit(slot->kvm, gfn))
+ return 0;
+
+ pmem_lock = &slot->pmem_lock[gfn - slot->base_gfn];
+ spin_lock(&pmem_lock->lock);
+ if (!pmem_lock->lock_mdl) {
+ gvm_pin_user_memory(addr, pmem_lock);
+ if (!pmem_lock->lock_mdl) {
+ spin_unlock(&pmem_lock->lock);
+ return GVM_PFN_ERR_FAULT;
+ }
+ }
+ spin_unlock(&pmem_lock->lock);
+
+ return hva_to_pfn(addr, write_fault, writable);
}
-EXPORT_SYMBOL_GPL(__gfn_to_pfn_memslot);
kvm_pfn_t gfn_to_pfn_prot(struct kvm *kvm, gfn_t gfn, bool write_fault,
bool *writable)
@@ -1597,49 +1360,44 @@ kvm_pfn_t gfn_to_pfn_prot(struct kvm *kvm, gfn_t gfn, bool write_fault,
return __gfn_to_pfn_memslot(gfn_to_memslot(kvm, gfn), gfn, false, NULL,
write_fault, writable);
}
-EXPORT_SYMBOL_GPL(gfn_to_pfn_prot);
kvm_pfn_t gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn)
{
return __gfn_to_pfn_memslot(slot, gfn, false, NULL, true, NULL);
}
-EXPORT_SYMBOL_GPL(gfn_to_pfn_memslot);
kvm_pfn_t gfn_to_pfn_memslot_atomic(struct kvm_memory_slot *slot, gfn_t gfn)
{
return __gfn_to_pfn_memslot(slot, gfn, true, NULL, true, NULL);
}
-EXPORT_SYMBOL_GPL(gfn_to_pfn_memslot_atomic);
kvm_pfn_t gfn_to_pfn_atomic(struct kvm *kvm, gfn_t gfn)
{
return gfn_to_pfn_memslot_atomic(gfn_to_memslot(kvm, gfn), gfn);
}
-EXPORT_SYMBOL_GPL(gfn_to_pfn_atomic);
kvm_pfn_t kvm_vcpu_gfn_to_pfn_atomic(struct kvm_vcpu *vcpu, gfn_t gfn)
{
return gfn_to_pfn_memslot_atomic(kvm_vcpu_gfn_to_memslot(vcpu, gfn), gfn);
}
-EXPORT_SYMBOL_GPL(kvm_vcpu_gfn_to_pfn_atomic);
kvm_pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn)
{
return gfn_to_pfn_memslot(gfn_to_memslot(kvm, gfn), gfn);
}
-EXPORT_SYMBOL_GPL(gfn_to_pfn);
kvm_pfn_t kvm_vcpu_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn)
{
return gfn_to_pfn_memslot(kvm_vcpu_gfn_to_memslot(vcpu, gfn), gfn);
}
-EXPORT_SYMBOL_GPL(kvm_vcpu_gfn_to_pfn);
-int gfn_to_page_many_atomic(struct kvm_memory_slot *slot, gfn_t gfn,
- struct page **pages, int nr_pages)
+int gfn_to_pfn_many_atomic(struct kvm_memory_slot *slot, gfn_t gfn,
+ pfn_t *pfn, int nr_pages)
{
- unsigned long addr;
+ size_t addr;
gfn_t entry;
+ size_t i;
+ struct pmem_lock *pmem_lock;
addr = gfn_to_hva_many(slot, gfn, &entry);
if (kvm_is_error_hva(addr))
@@ -1648,32 +1406,36 @@ int gfn_to_page_many_atomic(struct kvm_memory_slot *slot, gfn_t gfn,
if (entry < nr_pages)
return 0;
- return __get_user_pages_fast(addr, nr_pages, 1, pages);
-}
-EXPORT_SYMBOL_GPL(gfn_to_page_many_atomic);
+ for (i = 0; i < nr_pages; i++) {
+ if (kvm_should_ram_prot_exit(slot->kvm, gfn + i))
+ return 0;
-static struct page *kvm_pfn_to_page(kvm_pfn_t pfn)
-{
- if (is_error_noslot_pfn(pfn))
- return KVM_ERR_PTR_BAD_PAGE;
-
- if (kvm_is_reserved_pfn(pfn)) {
- WARN_ON(1);
- return KVM_ERR_PTR_BAD_PAGE;
+ pmem_lock = &slot->pmem_lock[gfn + i - slot->base_gfn];
+ spin_lock(&pmem_lock->lock);
+ if (!pmem_lock->lock_mdl) {
+ gvm_pin_user_memory(addr + i * PAGE_SIZE, pmem_lock);
+ if (!pmem_lock->lock_mdl) {
+ spin_unlock(&pmem_lock->lock);
+ break;
+ }
+ }
+ spin_unlock(&pmem_lock->lock);
}
- return pfn_to_page(pfn);
+ nr_pages = i;
+
+ while(i--)
+ pfn[i] = __pa((void*)(addr + i * PAGE_SIZE)) >> PAGE_SHIFT;
+ return nr_pages;
}
-struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn)
+static struct page *kvm_pfn_to_page(kvm_pfn_t pfn)
{
- kvm_pfn_t pfn;
-
- pfn = gfn_to_pfn(kvm, gfn);
+ if (is_error_noslot_pfn(pfn))
+ return GVM_ERR_PTR_BAD_PAGE;
- return kvm_pfn_to_page(pfn);
+ return pfn_to_page(pfn);
}
-EXPORT_SYMBOL_GPL(gfn_to_page);
struct page *kvm_vcpu_gfn_to_page(struct kvm_vcpu *vcpu, gfn_t gfn)
{
@@ -1683,63 +1445,8 @@ struct page *kvm_vcpu_gfn_to_page(struct kvm_vcpu *vcpu, gfn_t gfn)
return kvm_pfn_to_page(pfn);
}
-EXPORT_SYMBOL_GPL(kvm_vcpu_gfn_to_page);
-
-void kvm_release_page_clean(struct page *page)
-{
- WARN_ON(is_error_page(page));
-
- kvm_release_pfn_clean(page_to_pfn(page));
-}
-EXPORT_SYMBOL_GPL(kvm_release_page_clean);
-
-void kvm_release_pfn_clean(kvm_pfn_t pfn)
-{
- if (!is_error_noslot_pfn(pfn) && !kvm_is_reserved_pfn(pfn))
- put_page(pfn_to_page(pfn));
-}
-EXPORT_SYMBOL_GPL(kvm_release_pfn_clean);
-
-void kvm_release_page_dirty(struct page *page)
-{
- WARN_ON(is_error_page(page));
-
- kvm_release_pfn_dirty(page_to_pfn(page));
-}
-EXPORT_SYMBOL_GPL(kvm_release_page_dirty);
-
-static void kvm_release_pfn_dirty(kvm_pfn_t pfn)
-{
- kvm_set_pfn_dirty(pfn);
- kvm_release_pfn_clean(pfn);
-}
-
-void kvm_set_pfn_dirty(kvm_pfn_t pfn)
-{
- if (!kvm_is_reserved_pfn(pfn)) {
- struct page *page = pfn_to_page(pfn);
-
- if (!PageReserved(page))
- SetPageDirty(page);
- }
-}
-EXPORT_SYMBOL_GPL(kvm_set_pfn_dirty);
-void kvm_set_pfn_accessed(kvm_pfn_t pfn)
-{
- if (!kvm_is_reserved_pfn(pfn))
- mark_page_accessed(pfn_to_page(pfn));
-}
-EXPORT_SYMBOL_GPL(kvm_set_pfn_accessed);
-
-void kvm_get_pfn(kvm_pfn_t pfn)
-{
- if (!kvm_is_reserved_pfn(pfn))
- get_page(pfn_to_page(pfn));
-}
-EXPORT_SYMBOL_GPL(kvm_get_pfn);
-
-static int next_segment(unsigned long len, int offset)
+static int next_segment(size_t len, int offset)
{
if (len > PAGE_SIZE - offset)
return PAGE_SIZE - offset;
@@ -1750,13 +1457,13 @@ static int next_segment(unsigned long len, int offset)
static int __kvm_read_guest_page(struct kvm_memory_slot *slot, gfn_t gfn,
void *data, int offset, int len)
{
- int r;
- unsigned long addr;
+ int r = 0;
+ size_t addr;
addr = gfn_to_hva_memslot_prot(slot, gfn, NULL);
if (kvm_is_error_hva(addr))
return -EFAULT;
- r = __copy_from_user(data, (void __user *)addr + offset, len);
+ r = __copy_from_user(data, (char __user *)addr + offset, len);
if (r)
return -EFAULT;
return 0;
@@ -1769,7 +1476,6 @@ int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset,
return __kvm_read_guest_page(slot, gfn, data, offset, len);
}
-EXPORT_SYMBOL_GPL(kvm_read_guest_page);
int kvm_vcpu_read_guest_page(struct kvm_vcpu *vcpu, gfn_t gfn, void *data,
int offset, int len)
@@ -1778,9 +1484,8 @@ int kvm_vcpu_read_guest_page(struct kvm_vcpu *vcpu, gfn_t gfn, void *data,
return __kvm_read_guest_page(slot, gfn, data, offset, len);
}
-EXPORT_SYMBOL_GPL(kvm_vcpu_read_guest_page);
-int kvm_read_guest(struct kvm *kvm, gpa_t gpa, void *data, unsigned long len)
+int kvm_read_guest(struct kvm *kvm, gpa_t gpa, void *data, size_t len)
{
gfn_t gfn = gpa >> PAGE_SHIFT;
int seg;
@@ -1793,14 +1498,13 @@ int kvm_read_guest(struct kvm *kvm, gpa_t gpa, void *data, unsigned long len)
return ret;
offset = 0;
len -= seg;
- data += seg;
+ //data += seg;
++gfn;
}
return 0;
}
-EXPORT_SYMBOL_GPL(kvm_read_guest);
-int kvm_vcpu_read_guest(struct kvm_vcpu *vcpu, gpa_t gpa, void *data, unsigned long len)
+int kvm_vcpu_read_guest(struct kvm_vcpu *vcpu, gpa_t gpa, void *data, size_t len)
{
gfn_t gfn = gpa >> PAGE_SHIFT;
int seg;
@@ -1813,43 +1517,29 @@ int kvm_vcpu_read_guest(struct kvm_vcpu *vcpu, gpa_t gpa, void *data, unsigned l
return ret;
offset = 0;
len -= seg;
- data += seg;
+ //data += seg;
++gfn;
}
return 0;
}
-EXPORT_SYMBOL_GPL(kvm_vcpu_read_guest);
static int __kvm_read_guest_atomic(struct kvm_memory_slot *slot, gfn_t gfn,
- void *data, int offset, unsigned long len)
+ void *data, int offset, size_t len)
{
- int r;
- unsigned long addr;
+ int r = 0;
+ size_t addr;
addr = gfn_to_hva_memslot_prot(slot, gfn, NULL);
if (kvm_is_error_hva(addr))
return -EFAULT;
- pagefault_disable();
- r = __copy_from_user_inatomic(data, (void __user *)addr + offset, len);
- pagefault_enable();
+ r = __copy_from_user(data, (char __user *)addr + offset, len);
if (r)
return -EFAULT;
return 0;
}
-int kvm_read_guest_atomic(struct kvm *kvm, gpa_t gpa, void *data,
- unsigned long len)
-{
- gfn_t gfn = gpa >> PAGE_SHIFT;
- struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn);
- int offset = offset_in_page(gpa);
-
- return __kvm_read_guest_atomic(slot, gfn, data, offset, len);
-}
-EXPORT_SYMBOL_GPL(kvm_read_guest_atomic);
-
int kvm_vcpu_read_guest_atomic(struct kvm_vcpu *vcpu, gpa_t gpa,
- void *data, unsigned long len)
+ void *data, size_t len)
{
gfn_t gfn = gpa >> PAGE_SHIFT;
struct kvm_memory_slot *slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
@@ -1857,18 +1547,17 @@ int kvm_vcpu_read_guest_atomic(struct kvm_vcpu *vcpu, gpa_t gpa,
return __kvm_read_guest_atomic(slot, gfn, data, offset, len);
}
-EXPORT_SYMBOL_GPL(kvm_vcpu_read_guest_atomic);
static int __kvm_write_guest_page(struct kvm_memory_slot *memslot, gfn_t gfn,
const void *data, int offset, int len)
{
- int r;
- unsigned long addr;
+ int r = 0;
+ size_t addr;
addr = gfn_to_hva_memslot(memslot, gfn);
if (kvm_is_error_hva(addr))
return -EFAULT;
- r = __copy_to_user((void __user *)addr + offset, data, len);
+ r = __copy_to_user((void __user *)(addr + offset), data, len);
if (r)
return -EFAULT;
mark_page_dirty_in_slot(memslot, gfn);
@@ -1882,7 +1571,6 @@ int kvm_write_guest_page(struct kvm *kvm, gfn_t gfn,
return __kvm_write_guest_page(slot, gfn, data, offset, len);
}
-EXPORT_SYMBOL_GPL(kvm_write_guest_page);
int kvm_vcpu_write_guest_page(struct kvm_vcpu *vcpu, gfn_t gfn,
const void *data, int offset, int len)
@@ -1891,10 +1579,9 @@ int kvm_vcpu_write_guest_page(struct kvm_vcpu *vcpu, gfn_t gfn,
return __kvm_write_guest_page(slot, gfn, data, offset, len);
}
-EXPORT_SYMBOL_GPL(kvm_vcpu_write_guest_page);
int kvm_write_guest(struct kvm *kvm, gpa_t gpa, const void *data,
- unsigned long len)
+ size_t len)
{
gfn_t gfn = gpa >> PAGE_SHIFT;
int seg;
@@ -1907,15 +1594,14 @@ int kvm_write_guest(struct kvm *kvm, gpa_t gpa, const void *data,
return ret;
offset = 0;
len -= seg;
- data += seg;
+ //data += seg;
++gfn;
}
return 0;
}
-EXPORT_SYMBOL_GPL(kvm_write_guest);
int kvm_vcpu_write_guest(struct kvm_vcpu *vcpu, gpa_t gpa, const void *data,
- unsigned long len)
+ size_t len)
{
gfn_t gfn = gpa >> PAGE_SHIFT;
int seg;
@@ -1928,15 +1614,14 @@ int kvm_vcpu_write_guest(struct kvm_vcpu *vcpu, gpa_t gpa, const void *data,
return ret;
offset = 0;
len -= seg;
- data += seg;
+ //data += seg;
++gfn;
}
return 0;
}
-EXPORT_SYMBOL_GPL(kvm_vcpu_write_guest);
int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
- gpa_t gpa, unsigned long len)
+ gpa_t gpa, size_t len)
{
struct kvm_memslots *slots = kvm_memslots(kvm);
int offset = offset_in_page(gpa);
@@ -1970,10 +1655,9 @@ int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
}
return 0;
}
-EXPORT_SYMBOL_GPL(kvm_gfn_to_hva_cache_init);
int kvm_write_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
- void *data, unsigned long len)
+ void *data, size_t len)
{
struct kvm_memslots *slots = kvm_memslots(kvm);
int r;
@@ -1996,10 +1680,9 @@ int kvm_write_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
return 0;
}
-EXPORT_SYMBOL_GPL(kvm_write_guest_cached);
int kvm_read_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
- void *data, unsigned long len)
+ void *data, size_t len)
{
struct kvm_memslots *slots = kvm_memslots(kvm);
int r;
@@ -2021,17 +1704,13 @@ int kvm_read_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
return 0;
}
-EXPORT_SYMBOL_GPL(kvm_read_guest_cached);
int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len)
{
- const void *zero_page = (const void *) __va(page_to_phys(ZERO_PAGE(0)));
-
- return kvm_write_guest_page(kvm, gfn, zero_page, offset, len);
+ return kvm_write_guest_page(kvm, gfn, pZeroPage, offset, len);
}
-EXPORT_SYMBOL_GPL(kvm_clear_guest_page);
-int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len)
+int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, size_t len)
{
gfn_t gfn = gpa >> PAGE_SHIFT;
int seg;
@@ -2048,15 +1727,14 @@ int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len)
}
return 0;
}
-EXPORT_SYMBOL_GPL(kvm_clear_guest);
static void mark_page_dirty_in_slot(struct kvm_memory_slot *memslot,
gfn_t gfn)
{
if (memslot && memslot->dirty_bitmap) {
- unsigned long rel_gfn = gfn - memslot->base_gfn;
+ size_t rel_gfn = gfn - memslot->base_gfn;
- set_bit_le(rel_gfn, memslot->dirty_bitmap);
+ set_bit(rel_gfn, memslot->dirty_bitmap);
}
}
@@ -2067,7 +1745,6 @@ void mark_page_dirty(struct kvm *kvm, gfn_t gfn)
memslot = gfn_to_memslot(kvm, gfn);
mark_page_dirty_in_slot(memslot, gfn);
}
-EXPORT_SYMBOL_GPL(mark_page_dirty);
void kvm_vcpu_mark_page_dirty(struct kvm_vcpu *vcpu, gfn_t gfn)
{
@@ -2076,138 +1753,52 @@ void kvm_vcpu_mark_page_dirty(struct kvm_vcpu *vcpu, gfn_t gfn)
memslot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
mark_page_dirty_in_slot(memslot, gfn);
}
-EXPORT_SYMBOL_GPL(kvm_vcpu_mark_page_dirty);
-
-static void grow_halt_poll_ns(struct kvm_vcpu *vcpu)
-{
- unsigned int old, val, grow;
-
- old = val = vcpu->halt_poll_ns;
- grow = READ_ONCE(halt_poll_ns_grow);
- /* 10us base */
- if (val == 0 && grow)
- val = 10000;
- else
- val *= grow;
-
- if (val > halt_poll_ns)
- val = halt_poll_ns;
-
- vcpu->halt_poll_ns = val;
- trace_kvm_halt_poll_ns_grow(vcpu->vcpu_id, val, old);
-}
-
-static void shrink_halt_poll_ns(struct kvm_vcpu *vcpu)
-{
- unsigned int old, val, shrink;
-
- old = val = vcpu->halt_poll_ns;
- shrink = READ_ONCE(halt_poll_ns_shrink);
- if (shrink == 0)
- val = 0;
- else
- val /= shrink;
-
- vcpu->halt_poll_ns = val;
- trace_kvm_halt_poll_ns_shrink(vcpu->vcpu_id, val, old);
-}
static int kvm_vcpu_check_block(struct kvm_vcpu *vcpu)
{
if (kvm_arch_vcpu_runnable(vcpu)) {
- kvm_make_request(KVM_REQ_UNHALT, vcpu);
+ kvm_make_request(GVM_REQ_UNHALT, vcpu);
return -EINTR;
}
if (kvm_cpu_has_pending_timer(vcpu))
return -EINTR;
- if (signal_pending(current))
+ if (vcpu->run->user_event_pending)
return -EINTR;
return 0;
}
+static void hardware_disable_nolock(void *junk);
+static void hardware_enable_nolock(void *junk);
+
/*
* The vCPU has executed a HLT instruction with in-kernel mode enabled.
*/
void kvm_vcpu_block(struct kvm_vcpu *vcpu)
{
- ktime_t start, cur;
- DECLARE_SWAITQUEUE(wait);
- bool waited = false;
- u64 block_ns;
-
- start = cur = ktime_get();
- if (vcpu->halt_poll_ns) {
- ktime_t stop = ktime_add_ns(ktime_get(), vcpu->halt_poll_ns);
-
- ++vcpu->stat.halt_attempted_poll;
- do {
- /*
- * This sets KVM_REQ_UNHALT if an interrupt
- * arrives.
- */
- if (kvm_vcpu_check_block(vcpu) < 0) {
- ++vcpu->stat.halt_successful_poll;
- if (!vcpu_valid_wakeup(vcpu))
- ++vcpu->stat.halt_poll_invalid;
- goto out;
- }
- cur = ktime_get();
- } while (single_task_running() && ktime_before(cur, stop));
- }
+ LARGE_INTEGER expire;
+ expire.QuadPart = (u64)-1000000;
kvm_arch_vcpu_blocking(vcpu);
- for (;;) {
- prepare_to_swait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE);
-
- if (kvm_vcpu_check_block(vcpu) < 0)
+ vcpu->blocked = 1;
+ for (;;)
+ {
+ if (kvm_vcpu_check_block(vcpu))
break;
-
- waited = true;
- schedule();
+ KeWaitForSingleObject(&vcpu->kick_event, Executive, KernelMode, FALSE, &expire);
}
-
- finish_swait(&vcpu->wq, &wait);
- cur = ktime_get();
-
+ vcpu->blocked = 0;
+ KeClearEvent(&vcpu->kick_event);
kvm_arch_vcpu_unblocking(vcpu);
-out:
- block_ns = ktime_to_ns(cur) - ktime_to_ns(start);
-
- if (!vcpu_valid_wakeup(vcpu))
- shrink_halt_poll_ns(vcpu);
- else if (halt_poll_ns) {
- if (block_ns <= vcpu->halt_poll_ns)
- ;
- /* we had a long block, shrink polling */
- else if (vcpu->halt_poll_ns && block_ns > halt_poll_ns)
- shrink_halt_poll_ns(vcpu);
- /* we had a short halt and our poll time is too small */
- else if (vcpu->halt_poll_ns < halt_poll_ns &&
- block_ns < halt_poll_ns)
- grow_halt_poll_ns(vcpu);
- } else
- vcpu->halt_poll_ns = 0;
-
- trace_kvm_vcpu_wakeup(block_ns, waited, vcpu_valid_wakeup(vcpu));
kvm_arch_vcpu_block_finish(vcpu);
}
-EXPORT_SYMBOL_GPL(kvm_vcpu_block);
-#ifndef CONFIG_S390
void kvm_vcpu_wake_up(struct kvm_vcpu *vcpu)
{
- struct swait_queue_head *wqp;
-
- wqp = kvm_arch_vcpu_wq(vcpu);
- if (swait_active(wqp)) {
- swake_up(wqp);
- ++vcpu->stat.halt_wakeup;
- }
-
+ if(vcpu->blocked)
+ KeSetEvent(&vcpu->kick_event, IO_NO_INCREMENT, FALSE);
}
-EXPORT_SYMBOL_GPL(kvm_vcpu_wake_up);
/*
* Kick a sleeping VCPU, or a guest VCPU in guest mode, into host kernel mode.
@@ -2218,224 +1809,36 @@ void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
int cpu = vcpu->cpu;
kvm_vcpu_wake_up(vcpu);
- me = get_cpu();
- if (cpu != me && (unsigned)cpu < nr_cpu_ids && cpu_online(cpu))
+ me = smp_processor_id();
+ if (cpu != -1 && cpu != me && cpu_online(cpu))
if (kvm_arch_vcpu_should_kick(vcpu))
smp_send_reschedule(cpu);
- put_cpu();
-}
-EXPORT_SYMBOL_GPL(kvm_vcpu_kick);
-#endif /* !CONFIG_S390 */
-
-int kvm_vcpu_yield_to(struct kvm_vcpu *target)
-{
- struct pid *pid;
- struct task_struct *task = NULL;
- int ret = 0;
-
- rcu_read_lock();
- pid = rcu_dereference(target->pid);
- if (pid)
- task = get_pid_task(pid, PIDTYPE_PID);
- rcu_read_unlock();
- if (!task)
- return ret;
- ret = yield_to(task, 1);
- put_task_struct(task);
-
- return ret;
-}
-EXPORT_SYMBOL_GPL(kvm_vcpu_yield_to);
-
-/*
- * Helper that checks whether a VCPU is eligible for directed yield.
- * Most eligible candidate to yield is decided by following heuristics:
- *
- * (a) VCPU which has not done pl-exit or cpu relax intercepted recently
- * (preempted lock holder), indicated by @in_spin_loop.
- * Set at the beiginning and cleared at the end of interception/PLE handler.
- *
- * (b) VCPU which has done pl-exit/ cpu relax intercepted but did not get
- * chance last time (mostly it has become eligible now since we have probably
- * yielded to lockholder in last iteration. This is done by toggling
- * @dy_eligible each time a VCPU checked for eligibility.)
- *
- * Yielding to a recently pl-exited/cpu relax intercepted VCPU before yielding
- * to preempted lock-holder could result in wrong VCPU selection and CPU
- * burning. Giving priority for a potential lock-holder increases lock
- * progress.
- *
- * Since algorithm is based on heuristics, accessing another VCPU data without
- * locking does not harm. It may result in trying to yield to same VCPU, fail
- * and continue with next VCPU and so on.
- */
-static bool kvm_vcpu_eligible_for_directed_yield(struct kvm_vcpu *vcpu)
-{
-#ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT
- bool eligible;
-
- eligible = !vcpu->spin_loop.in_spin_loop ||
- vcpu->spin_loop.dy_eligible;
-
- if (vcpu->spin_loop.in_spin_loop)
- kvm_vcpu_set_dy_eligible(vcpu, !vcpu->spin_loop.dy_eligible);
-
- return eligible;
-#else
- return true;
-#endif
-}
-
-void kvm_vcpu_on_spin(struct kvm_vcpu *me)
-{
- struct kvm *kvm = me->kvm;
- struct kvm_vcpu *vcpu;
- int last_boosted_vcpu = me->kvm->last_boosted_vcpu;
- int yielded = 0;
- int try = 3;
- int pass;
- int i;
-
- kvm_vcpu_set_in_spin_loop(me, true);
- /*
- * We boost the priority of a VCPU that is runnable but not
- * currently running, because it got preempted by something
- * else and called schedule in __vcpu_run. Hopefully that
- * VCPU is holding the lock that we need and will release it.
- * We approximate round-robin by starting at the last boosted VCPU.
- */
- for (pass = 0; pass < 2 && !yielded && try; pass++) {
- kvm_for_each_vcpu(i, vcpu, kvm) {
- if (!pass && i <= last_boosted_vcpu) {
- i = last_boosted_vcpu;
- continue;
- } else if (pass && i > last_boosted_vcpu)
- break;
- if (!ACCESS_ONCE(vcpu->preempted))
- continue;
- if (vcpu == me)
- continue;
- if (swait_active(&vcpu->wq) && !kvm_arch_vcpu_runnable(vcpu))
- continue;
- if (!kvm_vcpu_eligible_for_directed_yield(vcpu))
- continue;
-
- yielded = kvm_vcpu_yield_to(vcpu);
- if (yielded > 0) {
- kvm->last_boosted_vcpu = i;
- break;
- } else if (yielded < 0) {
- try--;
- if (!try)
- break;
- }
- }
- }
- kvm_vcpu_set_in_spin_loop(me, false);
-
- /* Ensure vcpu is not eligible during next spinloop */
- kvm_vcpu_set_dy_eligible(me, false);
}
-EXPORT_SYMBOL_GPL(kvm_vcpu_on_spin);
-static int kvm_vcpu_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+NTSTATUS kvm_vcpu_release(PDEVICE_OBJECT pDevObj, PIRP pIrp)
{
- struct kvm_vcpu *vcpu = vma->vm_file->private_data;
- struct page *page;
-
- if (vmf->pgoff == 0)
- page = virt_to_page(vcpu->run);
-#ifdef CONFIG_X86
- else if (vmf->pgoff == KVM_PIO_PAGE_OFFSET)
- page = virt_to_page(vcpu->arch.pio_data);
-#endif
-#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
- else if (vmf->pgoff == KVM_COALESCED_MMIO_PAGE_OFFSET)
- page = virt_to_page(vcpu->kvm->coalesced_mmio_ring);
-#endif
- else
- return kvm_arch_vcpu_fault(vcpu, vmf);
- get_page(page);
- vmf->page = page;
- return 0;
-}
-
-static const struct vm_operations_struct kvm_vcpu_vm_ops = {
- .fault = kvm_vcpu_fault,
-};
+ struct gvm_device_extension *devext = pDevObj->DeviceExtension;
+ struct kvm_vcpu *vcpu = devext->PrivData;
-static int kvm_vcpu_mmap(struct file *file, struct vm_area_struct *vma)
-{
- vma->vm_ops = &kvm_vcpu_vm_ops;
- return 0;
-}
-
-static int kvm_vcpu_release(struct inode *inode, struct file *filp)
-{
- struct kvm_vcpu *vcpu = filp->private_data;
-
- debugfs_remove_recursive(vcpu->debugfs_dentry);
kvm_put_kvm(vcpu->kvm);
return 0;
}
-static struct file_operations kvm_vcpu_fops = {
- .release = kvm_vcpu_release,
- .unlocked_ioctl = kvm_vcpu_ioctl,
-#ifdef CONFIG_KVM_COMPAT
- .compat_ioctl = kvm_vcpu_compat_ioctl,
-#endif
- .mmap = kvm_vcpu_mmap,
- .llseek = noop_llseek,
-};
-
-/*
- * Allocates an inode for the vcpu.
- */
-static int create_vcpu_fd(struct kvm_vcpu *vcpu)
-{
- return anon_inode_getfd("kvm-vcpu", &kvm_vcpu_fops, vcpu, O_RDWR | O_CLOEXEC);
-}
-
-static int kvm_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
-{
- char dir_name[ITOA_MAX_LEN * 2];
- int ret;
-
- if (!kvm_arch_has_vcpu_debugfs())
- return 0;
-
- if (!debugfs_initialized())
- return 0;
-
- snprintf(dir_name, sizeof(dir_name), "vcpu%d", vcpu->vcpu_id);
- vcpu->debugfs_dentry = debugfs_create_dir(dir_name,
- vcpu->kvm->debugfs_dentry);
- if (!vcpu->debugfs_dentry)
- return -ENOMEM;
-
- ret = kvm_arch_create_vcpu_debugfs(vcpu);
- if (ret < 0) {
- debugfs_remove_recursive(vcpu->debugfs_dentry);
- return ret;
- }
-
- return 0;
-}
-
-/*
- * Creates some virtual cpus. Good luck creating more than one.
- */
-static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id)
+static int kvm_vm_ioctl_create_vcpu(PDEVICE_OBJECT pDevObj, PIRP pIrp, void *arg)
{
int r;
struct kvm_vcpu *vcpu;
+ struct gvm_device_extension *devext = pDevObj->DeviceExtension;
+ struct kvm *kvm = devext->PrivData;
+ HANDLE handle;
+ int id = *(int *)arg;
+ KAFFINITY Affinity;
- if (id >= KVM_MAX_VCPU_ID)
+ mutex_lock(&kvm->lock);
+ if (id >= GVM_MAX_VCPU_ID)
return -EINVAL;
- mutex_lock(&kvm->lock);
- if (kvm->created_vcpus == KVM_MAX_VCPUS) {
+ if (kvm->created_vcpus == GVM_MAX_VCPUS) {
mutex_unlock(&kvm->lock);
return -EINVAL;
}
@@ -2449,16 +1852,10 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id)
goto vcpu_decrement;
}
- preempt_notifier_init(&vcpu->preempt_notifier, &kvm_preempt_ops);
-
r = kvm_arch_vcpu_setup(vcpu);
if (r)
goto vcpu_destroy;
- r = kvm_create_vcpu_debugfs(vcpu);
- if (r)
- goto vcpu_destroy;
-
mutex_lock(&kvm->lock);
if (kvm_get_vcpu_by_id(kvm, id)) {
r = -EEXIST;
@@ -2469,8 +1866,14 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id)
/* Now it's all set up, let userspace reach it */
kvm_get_kvm(kvm);
- r = create_vcpu_fd(vcpu);
- if (r < 0) {
+ r = gvmCreateVMDevice(&handle, kvm->vm_id, id, vcpu);
+ if (!NT_SUCCESS(r)) {
+ kvm_put_kvm(kvm);
+ goto unlock_vcpu_destroy;
+ }
+ r = gvmUpdateReturnBuffer(pIrp, 0, &handle, sizeof(handle));
+ if (r) {
+ gvmDeleteVMDevice(NULL, 0, id);
kvm_put_kvm(kvm);
goto unlock_vcpu_destroy;
}
@@ -2486,11 +1889,16 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id)
mutex_unlock(&kvm->lock);
kvm_arch_vcpu_postcreate(vcpu);
+
+ Affinity = (KAFFINITY)1 << (
+ cpu_online_count - 1
+ - 2 * vcpu->vcpu_id / cpu_online_count % 2
+ - vcpu->vcpu_id * 2 % cpu_online_count);
+ KeSetSystemAffinityThread(Affinity);
return r;
unlock_vcpu_destroy:
mutex_unlock(&kvm->lock);
- debugfs_remove_recursive(vcpu->debugfs_dentry);
vcpu_destroy:
kvm_arch_vcpu_destroy(vcpu);
vcpu_decrement:
@@ -2500,64 +1908,171 @@ vcpu_decrement:
return r;
}
-static int kvm_vcpu_ioctl_set_sigmask(struct kvm_vcpu *vcpu, sigset_t *sigset)
+static int kvm_vm_ioctl_kick_vcpu(PDEVICE_OBJECT pDevObj, PIRP pIrp, void *arg)
{
- if (sigset) {
- sigdelsetmask(sigset, sigmask(SIGKILL)|sigmask(SIGSTOP));
- vcpu->sigset_active = 1;
- vcpu->sigset = *sigset;
- } else
- vcpu->sigset_active = 0;
+ struct kvm_vcpu *vcpu;
+ struct gvm_device_extension *devext = pDevObj->DeviceExtension;
+ struct kvm *kvm = devext->PrivData;
+ int id = *(int *)arg;
+
+ if (id >= GVM_MAX_VCPU_ID)
+ return -EINVAL;
+
+ vcpu = kvm_get_vcpu_by_id(kvm, id);
+ if (!vcpu)
+ return -EINVAL;
+
+ kvm_vcpu_kick(vcpu);
+
return 0;
}
-static long kvm_vcpu_ioctl(struct file *filp,
- unsigned int ioctl, unsigned long arg)
+static bool kvm_is_valid_prot_flags(u32 flags)
{
- struct kvm_vcpu *vcpu = filp->private_data;
- void __user *argp = (void __user *)arg;
- int r;
- struct kvm_fpu *fpu = NULL;
- struct kvm_sregs *kvm_sregs = NULL;
+ return (flags == RP_NOACCESS || flags == RP_RDWREX);
+}
- if (vcpu->kvm->mm != current->mm)
- return -EIO;
+static int kvm_adjust_rp_bitmap(struct kvm *kvm, u64 size)
+{
+ int old_size, new_size;
+ size_t *old_bitmap, *new_bitmap;
- if (unlikely(_IOC_TYPE(ioctl) != KVMIO))
- return -EINVAL;
+ if (kvm->rp_bitmap_size >= size)
+ return 0;
-#if defined(CONFIG_S390) || defined(CONFIG_PPC) || defined(CONFIG_MIPS)
- /*
- * Special cases: vcpu ioctls that are asynchronous to vcpu execution,
- * so vcpu_load() would break it.
- */
- if (ioctl == KVM_S390_INTERRUPT || ioctl == KVM_S390_IRQ || ioctl == KVM_INTERRUPT)
- return kvm_arch_vcpu_ioctl(filp, ioctl, arg);
-#endif
+ new_size = ALIGN(size, (u64)BITS_PER_LONG) / 8;
+ new_bitmap = kvm_kvzalloc(new_size);
+ if (!new_bitmap)
+ return -ENOMEM;
+
+ old_size = kvm->rp_bitmap_size;
+ old_bitmap = kvm->rp_bitmap;
+ memcpy(new_bitmap, old_bitmap, old_size);
+
+ kvm->rp_bitmap = new_bitmap;
+ kvm->rp_bitmap_size = new_size;
+
+ return 0;
+}
+
+/*
+ * For set bulk bitmap instead of looping set_bit
+ */
+static inline void set_bits_in_long(size_t *byte, int start, int nbits, bool set)
+{
+ size_t mask;
- r = vcpu_load(vcpu);
+ BUG_ON(byte == NULL);
+ BUG_ON(start < 0 || start > BITS_PER_LONG);
+ BUG_ON(nbits < 0 || start + nbits > BITS_PER_LONG);
+
+ mask = ((1 << nbits) - 1) << start;
+ if (set)
+ *byte |= mask;
+ else
+ *byte &= ~mask;
+}
+
+static void set_bit_block(size_t *bitmap, u64 start, u64 nbits, bool set)
+{
+ u64 first_long_index = start / BITS_PER_LONG;
+ u64 last_long_index = (start + nbits - 1) / BITS_PER_LONG;
+ u64 i;
+ int first_bit_index = (int)(start % BITS_PER_LONG);
+ int last_bit_index = (int)((start + nbits - 1) % BITS_PER_LONG);
+
+ if (first_long_index == last_long_index) {
+ set_bits_in_long(&bitmap[first_long_index], first_bit_index, (int)nbits,
+ set);
+ return;
+ }
+
+ set_bits_in_long(&bitmap[first_long_index], first_bit_index,
+ BITS_PER_LONG - first_bit_index, set);
+ for (i = first_long_index + 1; i < last_long_index; i++) {
+ bitmap[i] = set ? (size_t)-1 : 0;
+ }
+ set_bits_in_long(&bitmap[last_long_index], 0, last_bit_index + 1, set);
+}
+
+static int kvm_is_ram_prot(struct kvm *kvm, gfn_t gfn)
+{
+ if (!kvm->rp_bitmap)
+ return 0;
+
+ return test_bit(gfn, kvm->rp_bitmap);
+}
+
+static int kvm_vm_ioctl_ram_prot(struct kvm *kvm, struct gvm_ram_protect *rp)
+{
+ int r = -EFAULT;
+ gfn_t first_gfn = rp->pa >> PAGE_SHIFT;
+ gfn_t last_gfn = (rp->pa + rp->size - 1) >> PAGE_SHIFT;
+
+ if (!rp->reserved)
+ return -EINVAL;
+
+ if (!kvm_is_valid_prot_flags(rp->flags))
+ return -EINVAL;
+
+ r = kvm_adjust_rp_bitmap(kvm, last_gfn + 1);
if (r)
return r;
+
+ set_bit_block(kvm->rp_bitmap, first_gfn, last_gfn + 1 - first_gfn,
+ rp->flags == RP_NOACCESS);
+
+ /* only need flush shadow when page access right is lowered */
+ if (rp->flags == RP_NOACCESS)
+ kvm_arch_flush_shadow_all(kvm);
+
+ return 0;
+}
+
+NTSTATUS kvm_vcpu_ioctl(PDEVICE_OBJECT pDevObj, PIRP pIrp,
+ unsigned int ioctl)
+{
+ struct gvm_device_extension *devext = pDevObj->DeviceExtension;
+ struct kvm_vcpu *vcpu = devext->PrivData;
+ void __user *argp = (void __user *)pIrp->AssociatedIrp.SystemBuffer;
+ int r;
+ struct kvm_fpu *fpu = NULL;
+ struct kvm_sregs *kvm_sregs = NULL;
+
+ if (vcpu->kvm->process != IoGetCurrentProcess())
+ return -EIO;
+
switch (ioctl) {
- case KVM_RUN:
+ case GVM_RUN:
r = -EINVAL;
- if (arg)
- goto out;
- if (unlikely(vcpu->pid != current->pids[PIDTYPE_PID].pid)) {
- /* The thread running this VCPU changed. */
- struct pid *oldpid = vcpu->pid;
- struct pid *newpid = get_task_pid(current, PIDTYPE_PID);
-
- rcu_assign_pointer(vcpu->pid, newpid);
- if (oldpid)
- synchronize_rcu();
- put_pid(oldpid);
+ if (vcpu->thread != PsGetCurrentThread()) {
+ vcpu->thread = PsGetCurrentThread();
+ KeInitializeApc(&vcpu->apc, vcpu->thread,
+ OriginalApcEnvironment,
+ gvmWaitSuspend,
+ NULL,
+ NULL,
+ KernelMode,
+ NULL);
}
r = kvm_arch_vcpu_ioctl_run(vcpu, vcpu->run);
- trace_kvm_userspace_exit(vcpu->run->exit_reason, r);
break;
- case KVM_GET_REGS: {
+ case GVM_VCPU_MMAP:
+ r = -EINVAL;
+ size_t mmap_size = 2 * PAGE_SIZE;
+ size_t userva = __vm_mmap(NULL, 0, mmap_size, PROT_READ |PROT_WRITE,
+ MAP_SHARED | MAP_ANONYMOUS, 0, (size_t)vcpu->run);
+ if (!userva)
+ break;
+ r = gvmUpdateReturnBuffer(pIrp, 0, &userva, sizeof(userva));
+ if (r) {
+ __vm_munmap(userva, 2 * PAGE_SIZE, false);
+ break;
+ }
+ vcpu->run_userva = userva;
+ break;
+ case GVM_GET_REGS: {
struct kvm_regs *kvm_regs;
r = -ENOMEM;
@@ -2567,15 +2082,15 @@ static long kvm_vcpu_ioctl(struct file *filp,
r = kvm_arch_vcpu_ioctl_get_regs(vcpu, kvm_regs);
if (r)
goto out_free1;
- r = -EFAULT;
- if (copy_to_user(argp, kvm_regs, sizeof(struct kvm_regs)))
+ r = gvmUpdateReturnBuffer(pIrp, 0, kvm_regs, sizeof(struct kvm_regs));
+ if (r)
goto out_free1;
r = 0;
out_free1:
kfree(kvm_regs);
break;
}
- case KVM_SET_REGS: {
+ case GVM_SET_REGS: {
struct kvm_regs *kvm_regs;
r = -ENOMEM;
@@ -2588,7 +2103,7 @@ out_free1:
kfree(kvm_regs);
break;
}
- case KVM_GET_SREGS: {
+ case GVM_GET_SREGS: {
kvm_sregs = kzalloc(sizeof(struct kvm_sregs), GFP_KERNEL);
r = -ENOMEM;
if (!kvm_sregs)
@@ -2596,13 +2111,13 @@ out_free1:
r = kvm_arch_vcpu_ioctl_get_sregs(vcpu, kvm_sregs);
if (r)
goto out;
- r = -EFAULT;
- if (copy_to_user(argp, kvm_sregs, sizeof(struct kvm_sregs)))
+ r = gvmUpdateReturnBuffer(pIrp, 0, kvm_sregs, sizeof(struct kvm_sregs));
+ if (r)
goto out;
r = 0;
break;
}
- case KVM_SET_SREGS: {
+ case GVM_SET_SREGS: {
kvm_sregs = memdup_user(argp, sizeof(*kvm_sregs));
if (IS_ERR(kvm_sregs)) {
r = PTR_ERR(kvm_sregs);
@@ -2612,19 +2127,16 @@ out_free1:
r = kvm_arch_vcpu_ioctl_set_sregs(vcpu, kvm_sregs);
break;
}
- case KVM_GET_MP_STATE: {
+ case GVM_GET_MP_STATE: {
struct kvm_mp_state mp_state;
r = kvm_arch_vcpu_ioctl_get_mpstate(vcpu, &mp_state);
if (r)
goto out;
- r = -EFAULT;
- if (copy_to_user(argp, &mp_state, sizeof(mp_state)))
- goto out;
- r = 0;
+ r = gvmUpdateReturnBuffer(pIrp, 0, &mp_state, sizeof(mp_state));
break;
}
- case KVM_SET_MP_STATE: {
+ case GVM_SET_MP_STATE: {
struct kvm_mp_state mp_state;
r = -EFAULT;
@@ -2633,7 +2145,7 @@ out_free1:
r = kvm_arch_vcpu_ioctl_set_mpstate(vcpu, &mp_state);
break;
}
- case KVM_TRANSLATE: {
+ case GVM_TRANSLATE: {
struct kvm_translation tr;
r = -EFAULT;
@@ -2642,13 +2154,10 @@ out_free1:
r = kvm_arch_vcpu_ioctl_translate(vcpu, &tr);
if (r)
goto out;
- r = -EFAULT;
- if (copy_to_user(argp, &tr, sizeof(tr)))
- goto out;
- r = 0;
+ r = gvmUpdateReturnBuffer(pIrp, 0, &tr, sizeof(tr));
break;
}
- case KVM_SET_GUEST_DEBUG: {
+ case GVM_SET_GUEST_DEBUG: {
struct kvm_guest_debug dbg;
r = -EFAULT;
@@ -2657,30 +2166,7 @@ out_free1:
r = kvm_arch_vcpu_ioctl_set_guest_debug(vcpu, &dbg);
break;
}
- case KVM_SET_SIGNAL_MASK: {
- struct kvm_signal_mask __user *sigmask_arg = argp;
- struct kvm_signal_mask kvm_sigmask;
- sigset_t sigset, *p;
-
- p = NULL;
- if (argp) {
- r = -EFAULT;
- if (copy_from_user(&kvm_sigmask, argp,
- sizeof(kvm_sigmask)))
- goto out;
- r = -EINVAL;
- if (kvm_sigmask.len != sizeof(sigset))
- goto out;
- r = -EFAULT;
- if (copy_from_user(&sigset, sigmask_arg->sigset,
- sizeof(sigset)))
- goto out;
- p = &sigset;
- }
- r = kvm_vcpu_ioctl_set_sigmask(vcpu, p);
- break;
- }
- case KVM_GET_FPU: {
+ case GVM_GET_FPU: {
fpu = kzalloc(sizeof(struct kvm_fpu), GFP_KERNEL);
r = -ENOMEM;
if (!fpu)
@@ -2688,13 +2174,10 @@ out_free1:
r = kvm_arch_vcpu_ioctl_get_fpu(vcpu, fpu);
if (r)
goto out;
- r = -EFAULT;
- if (copy_to_user(argp, fpu, sizeof(struct kvm_fpu)))
- goto out;
- r = 0;
+ r = gvmUpdateReturnBuffer(pIrp, 0, fpu, sizeof(struct kvm_fpu));
break;
}
- case KVM_SET_FPU: {
+ case GVM_SET_FPU: {
fpu = memdup_user(argp, sizeof(*fpu));
if (IS_ERR(fpu)) {
r = PTR_ERR(fpu);
@@ -2705,260 +2188,57 @@ out_free1:
break;
}
default:
- r = kvm_arch_vcpu_ioctl(filp, ioctl, arg);
+ r = kvm_arch_vcpu_ioctl(devext, pIrp, ioctl);
}
out:
- vcpu_put(vcpu);
kfree(fpu);
kfree(kvm_sregs);
return r;
}
-#ifdef CONFIG_KVM_COMPAT
-static long kvm_vcpu_compat_ioctl(struct file *filp,
- unsigned int ioctl, unsigned long arg)
-{
- struct kvm_vcpu *vcpu = filp->private_data;
- void __user *argp = compat_ptr(arg);
- int r;
-
- if (vcpu->kvm->mm != current->mm)
- return -EIO;
-
- switch (ioctl) {
- case KVM_SET_SIGNAL_MASK: {
- struct kvm_signal_mask __user *sigmask_arg = argp;
- struct kvm_signal_mask kvm_sigmask;
- compat_sigset_t csigset;
- sigset_t sigset;
-
- if (argp) {
- r = -EFAULT;
- if (copy_from_user(&kvm_sigmask, argp,
- sizeof(kvm_sigmask)))
- goto out;
- r = -EINVAL;
- if (kvm_sigmask.len != sizeof(csigset))
- goto out;
- r = -EFAULT;
- if (copy_from_user(&csigset, sigmask_arg->sigset,
- sizeof(csigset)))
- goto out;
- sigset_from_compat(&sigset, &csigset);
- r = kvm_vcpu_ioctl_set_sigmask(vcpu, &sigset);
- } else
- r = kvm_vcpu_ioctl_set_sigmask(vcpu, NULL);
- break;
- }
- default:
- r = kvm_vcpu_ioctl(filp, ioctl, arg);
- }
-
-out:
- return r;
-}
-#endif
-
-static int kvm_device_ioctl_attr(struct kvm_device *dev,
- int (*accessor)(struct kvm_device *dev,
- struct kvm_device_attr *attr),
- unsigned long arg)
-{
- struct kvm_device_attr attr;
-
- if (!accessor)
- return -EPERM;
-
- if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
- return -EFAULT;
-
- return accessor(dev, &attr);
-}
-
-static long kvm_device_ioctl(struct file *filp, unsigned int ioctl,
- unsigned long arg)
-{
- struct kvm_device *dev = filp->private_data;
-
- switch (ioctl) {
- case KVM_SET_DEVICE_ATTR:
- return kvm_device_ioctl_attr(dev, dev->ops->set_attr, arg);
- case KVM_GET_DEVICE_ATTR:
- return kvm_device_ioctl_attr(dev, dev->ops->get_attr, arg);
- case KVM_HAS_DEVICE_ATTR:
- return kvm_device_ioctl_attr(dev, dev->ops->has_attr, arg);
- default:
- if (dev->ops->ioctl)
- return dev->ops->ioctl(dev, ioctl, arg);
-
- return -ENOTTY;
- }
-}
-
-static int kvm_device_release(struct inode *inode, struct file *filp)
-{
- struct kvm_device *dev = filp->private_data;
- struct kvm *kvm = dev->kvm;
-
- kvm_put_kvm(kvm);
- return 0;
-}
-
-static const struct file_operations kvm_device_fops = {
- .unlocked_ioctl = kvm_device_ioctl,
-#ifdef CONFIG_KVM_COMPAT
- .compat_ioctl = kvm_device_ioctl,
-#endif
- .release = kvm_device_release,
-};
-
-struct kvm_device *kvm_device_from_filp(struct file *filp)
-{
- if (filp->f_op != &kvm_device_fops)
- return NULL;
-
- return filp->private_data;
-}
-
-static struct kvm_device_ops *kvm_device_ops_table[KVM_DEV_TYPE_MAX] = {
-#ifdef CONFIG_KVM_MPIC
- [KVM_DEV_TYPE_FSL_MPIC_20] = &kvm_mpic_ops,
- [KVM_DEV_TYPE_FSL_MPIC_42] = &kvm_mpic_ops,
-#endif
-
-#ifdef CONFIG_KVM_XICS
- [KVM_DEV_TYPE_XICS] = &kvm_xics_ops,
-#endif
-};
-
-int kvm_register_device_ops(struct kvm_device_ops *ops, u32 type)
-{
- if (type >= ARRAY_SIZE(kvm_device_ops_table))
- return -ENOSPC;
-
- if (kvm_device_ops_table[type] != NULL)
- return -EEXIST;
-
- kvm_device_ops_table[type] = ops;
- return 0;
-}
-
-void kvm_unregister_device_ops(u32 type)
-{
- if (kvm_device_ops_table[type] != NULL)
- kvm_device_ops_table[type] = NULL;
-}
-
-static int kvm_ioctl_create_device(struct kvm *kvm,
- struct kvm_create_device *cd)
-{
- struct kvm_device_ops *ops = NULL;
- struct kvm_device *dev;
- bool test = cd->flags & KVM_CREATE_DEVICE_TEST;
- int ret;
-
- if (cd->type >= ARRAY_SIZE(kvm_device_ops_table))
- return -ENODEV;
-
- ops = kvm_device_ops_table[cd->type];
- if (ops == NULL)
- return -ENODEV;
-
- if (test)
- return 0;
-
- dev = kzalloc(sizeof(*dev), GFP_KERNEL);
- if (!dev)
- return -ENOMEM;
-
- dev->ops = ops;
- dev->kvm = kvm;
-
- mutex_lock(&kvm->lock);
- ret = ops->create(dev, cd->type);
- if (ret < 0) {
- mutex_unlock(&kvm->lock);
- kfree(dev);
- return ret;
- }
- list_add(&dev->vm_node, &kvm->devices);
- mutex_unlock(&kvm->lock);
-
- if (ops->init)
- ops->init(dev);
-
- ret = anon_inode_getfd(ops->name, &kvm_device_fops, dev, O_RDWR | O_CLOEXEC);
- if (ret < 0) {
- ops->destroy(dev);
- mutex_lock(&kvm->lock);
- list_del(&dev->vm_node);
- mutex_unlock(&kvm->lock);
- return ret;
- }
-
- kvm_get_kvm(kvm);
- cd->fd = ret;
- return 0;
-}
-
static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg)
{
switch (arg) {
- case KVM_CAP_USER_MEMORY:
- case KVM_CAP_DESTROY_MEMORY_REGION_WORKS:
- case KVM_CAP_JOIN_MEMORY_REGIONS_WORKS:
- case KVM_CAP_INTERNAL_ERROR_DATA:
-#ifdef CONFIG_HAVE_KVM_MSI
- case KVM_CAP_SIGNAL_MSI:
-#endif
-#ifdef CONFIG_HAVE_KVM_IRQFD
- case KVM_CAP_IRQFD:
- case KVM_CAP_IRQFD_RESAMPLE:
-#endif
- case KVM_CAP_IOEVENTFD_ANY_LENGTH:
- case KVM_CAP_CHECK_EXTENSION_VM:
- return 1;
-#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
- case KVM_CAP_IRQ_ROUTING:
- return KVM_MAX_IRQ_ROUTES;
+#ifdef CONFIG_HAVE_GVM_MSI
+ case GVM_CAP_SIGNAL_MSI:
#endif
-#if KVM_ADDRESS_SPACE_NUM > 1
- case KVM_CAP_MULTI_ADDRESS_SPACE:
- return KVM_ADDRESS_SPACE_NUM;
+ case GVM_CAP_IRQ_ROUTING:
+ return GVM_MAX_IRQ_ROUTES;
+#if GVM_ADDRESS_SPACE_NUM > 1
+ case GVM_CAP_MULTI_ADDRESS_SPACE:
+ return GVM_ADDRESS_SPACE_NUM;
#endif
- case KVM_CAP_MAX_VCPU_ID:
- return KVM_MAX_VCPU_ID;
+ case GVM_CAP_MAX_VCPU_ID:
+ return GVM_MAX_VCPU_ID;
default:
break;
}
return kvm_vm_ioctl_check_extension(kvm, arg);
}
-static long kvm_vm_ioctl(struct file *filp,
- unsigned int ioctl, unsigned long arg)
+NTSTATUS kvm_vm_ioctl(PDEVICE_OBJECT pDevObj, PIRP pIrp,
+ unsigned int ioctl)
{
- struct kvm *kvm = filp->private_data;
- void __user *argp = (void __user *)arg;
+ struct gvm_device_extension *devext = pDevObj->DeviceExtension;
+ struct kvm *kvm = devext->PrivData;
+ void __user *argp = (void __user *)pIrp->AssociatedIrp.SystemBuffer;
int r;
- if (kvm->mm != current->mm)
+ if (kvm->process != IoGetCurrentProcess())
return -EIO;
switch (ioctl) {
- case KVM_CREATE_VCPU:
- r = kvm_vm_ioctl_create_vcpu(kvm, arg);
+ case GVM_CREATE_VCPU:
+ r = kvm_vm_ioctl_create_vcpu(pDevObj, pIrp, argp);
break;
- case KVM_SET_USER_MEMORY_REGION: {
+ case GVM_SET_USER_MEMORY_REGION: {
struct kvm_userspace_memory_region kvm_userspace_mem;
r = -EFAULT;
- if (copy_from_user(&kvm_userspace_mem, argp,
- sizeof(kvm_userspace_mem)))
- goto out;
-
+ RtlCopyBytes(&kvm_userspace_mem, pIrp->AssociatedIrp.SystemBuffer, sizeof(kvm_userspace_mem));
r = kvm_vm_ioctl_set_memory_region(kvm, &kvm_userspace_mem);
break;
}
- case KVM_GET_DIRTY_LOG: {
+ case GVM_GET_DIRTY_LOG: {
struct kvm_dirty_log log;
r = -EFAULT;
@@ -2967,46 +2247,18 @@ static long kvm_vm_ioctl(struct file *filp,
r = kvm_vm_ioctl_get_dirty_log(kvm, &log);
break;
}
-#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
- case KVM_REGISTER_COALESCED_MMIO: {
- struct kvm_coalesced_mmio_zone zone;
-
- r = -EFAULT;
- if (copy_from_user(&zone, argp, sizeof(zone)))
- goto out;
- r = kvm_vm_ioctl_register_coalesced_mmio(kvm, &zone);
+ case GVM_KICK_VCPU:
+ r = kvm_vm_ioctl_kick_vcpu(pDevObj, pIrp, argp);
break;
- }
- case KVM_UNREGISTER_COALESCED_MMIO: {
- struct kvm_coalesced_mmio_zone zone;
+ case GVM_RAM_PROTECT:
+ struct gvm_ram_protect rp;
r = -EFAULT;
- if (copy_from_user(&zone, argp, sizeof(zone)))
- goto out;
- r = kvm_vm_ioctl_unregister_coalesced_mmio(kvm, &zone);
+ RtlCopyBytes(&rp, pIrp->AssociatedIrp.SystemBuffer, sizeof(rp));
+ r = kvm_vm_ioctl_ram_prot(kvm, &rp);
break;
- }
-#endif
- case KVM_IRQFD: {
- struct kvm_irqfd data;
-
- r = -EFAULT;
- if (copy_from_user(&data, argp, sizeof(data)))
- goto out;
- r = kvm_irqfd(kvm, &data);
- break;
- }
- case KVM_IOEVENTFD: {
- struct kvm_ioeventfd data;
-
- r = -EFAULT;
- if (copy_from_user(&data, argp, sizeof(data)))
- goto out;
- r = kvm_ioeventfd(kvm, &data);
- break;
- }
-#ifdef CONFIG_HAVE_KVM_MSI
- case KVM_SIGNAL_MSI: {
+#ifdef CONFIG_HAVE_GVM_MSI
+ case GVM_SIGNAL_MSI: {
struct kvm_msi msi;
r = -EFAULT;
@@ -3016,32 +2268,28 @@ static long kvm_vm_ioctl(struct file *filp,
break;
}
#endif
-#ifdef __KVM_HAVE_IRQ_LINE
- case KVM_IRQ_LINE_STATUS:
- case KVM_IRQ_LINE: {
+ case GVM_IRQ_LINE_STATUS: {
struct kvm_irq_level irq_event;
r = -EFAULT;
if (copy_from_user(&irq_event, argp, sizeof(irq_event)))
goto out;
- r = kvm_vm_ioctl_irq_line(kvm, &irq_event,
- ioctl == KVM_IRQ_LINE_STATUS);
+ r = kvm_vm_ioctl_irq_line(kvm, &irq_event, true);
if (r)
goto out;
- r = -EFAULT;
- if (ioctl == KVM_IRQ_LINE_STATUS) {
- if (copy_to_user(argp, &irq_event, sizeof(irq_event)))
+ if (ioctl == GVM_IRQ_LINE_STATUS) {
+ r = gvmUpdateReturnBuffer(pIrp, 0, &irq_event,
+ sizeof(irq_event));
+ if (r)
goto out;
}
r = 0;
break;
}
-#endif
-#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
- case KVM_SET_GSI_ROUTING: {
+ case GVM_SET_GSI_ROUTING: {
struct kvm_irq_routing routing;
struct kvm_irq_routing __user *urouting;
struct kvm_irq_routing_entry *entries = NULL;
@@ -3050,7 +2298,7 @@ static long kvm_vm_ioctl(struct file *filp,
if (copy_from_user(&routing, argp, sizeof(routing)))
goto out;
r = -EINVAL;
- if (routing.nr > KVM_MAX_IRQ_ROUTES)
+ if (routing.nr > GVM_MAX_IRQ_ROUTES)
goto out;
if (routing.flags)
goto out;
@@ -3071,178 +2319,66 @@ out_free_irq_routing:
vfree(entries);
break;
}
-#endif /* CONFIG_HAVE_KVM_IRQ_ROUTING */
- case KVM_CREATE_DEVICE: {
- struct kvm_create_device cd;
-
- r = -EFAULT;
- if (copy_from_user(&cd, argp, sizeof(cd)))
- goto out;
-
- r = kvm_ioctl_create_device(kvm, &cd);
- if (r)
- goto out;
-
- r = -EFAULT;
- if (copy_to_user(argp, &cd, sizeof(cd)))
- goto out;
-
- r = 0;
- break;
- }
- case KVM_CHECK_EXTENSION:
- r = kvm_vm_ioctl_check_extension_generic(kvm, arg);
+ case GVM_CHECK_EXTENSION:
+ r = kvm_vm_ioctl_check_extension_generic(kvm, *(long *)argp);
+ gvmUpdateReturnBuffer(pIrp, 0, &r, sizeof(r));
+ r = STATUS_SUCCESS;
break;
default:
- r = kvm_arch_vm_ioctl(filp, ioctl, arg);
+ r = kvm_arch_vm_ioctl(devext, pIrp, ioctl);
}
out:
return r;
}
-#ifdef CONFIG_KVM_COMPAT
-struct compat_kvm_dirty_log {
- __u32 slot;
- __u32 padding1;
- union {
- compat_uptr_t dirty_bitmap; /* one bit per page */
- __u64 padding2;
- };
-};
-
-static long kvm_vm_compat_ioctl(struct file *filp,
- unsigned int ioctl, unsigned long arg)
+static int kvm_dev_ioctl_create_vm(PDEVICE_OBJECT pDevObj, PIRP pIrp, unsigned long arg)
{
- struct kvm *kvm = filp->private_data;
- int r;
-
- if (kvm->mm != current->mm)
- return -EIO;
- switch (ioctl) {
- case KVM_GET_DIRTY_LOG: {
- struct compat_kvm_dirty_log compat_log;
- struct kvm_dirty_log log;
-
- r = -EFAULT;
- if (copy_from_user(&compat_log, (void __user *)arg,
- sizeof(compat_log)))
- goto out;
- log.slot = compat_log.slot;
- log.padding1 = compat_log.padding1;
- log.padding2 = compat_log.padding2;
- log.dirty_bitmap = compat_ptr(compat_log.dirty_bitmap);
-
- r = kvm_vm_ioctl_get_dirty_log(kvm, &log);
- break;
- }
- default:
- r = kvm_vm_ioctl(filp, ioctl, arg);
- }
-
-out:
- return r;
-}
-#endif
-
-static struct file_operations kvm_vm_fops = {
- .release = kvm_vm_release,
- .unlocked_ioctl = kvm_vm_ioctl,
-#ifdef CONFIG_KVM_COMPAT
- .compat_ioctl = kvm_vm_compat_ioctl,
-#endif
- .llseek = noop_llseek,
-};
-
-static int kvm_dev_ioctl_create_vm(unsigned long type)
-{
- int r;
struct kvm *kvm;
- struct file *file;
+ NTSTATUS rc;
+ HANDLE handle;
+ unsigned int type = arg;
kvm = kvm_create_vm(type);
if (IS_ERR(kvm))
return PTR_ERR(kvm);
-#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
- r = kvm_coalesced_mmio_init(kvm);
- if (r < 0) {
- kvm_put_kvm(kvm);
- return r;
- }
-#endif
- r = get_unused_fd_flags(O_CLOEXEC);
- if (r < 0) {
- kvm_put_kvm(kvm);
- return r;
- }
- file = anon_inode_getfile("kvm-vm", &kvm_vm_fops, kvm, O_RDWR);
- if (IS_ERR(file)) {
- put_unused_fd(r);
- kvm_put_kvm(kvm);
- return PTR_ERR(file);
- }
- if (kvm_create_vm_debugfs(kvm, r) < 0) {
- put_unused_fd(r);
- fput(file);
- return -ENOMEM;
- }
-
- fd_install(r, file);
- return r;
+ rc = gvmCreateVMDevice(&handle, kvm->vm_id, -1, kvm);
+ if (NT_SUCCESS(rc))
+ gvmUpdateReturnBuffer(pIrp, 0, &handle, sizeof(handle));
+ return rc;
}
-static long kvm_dev_ioctl(struct file *filp,
- unsigned int ioctl, unsigned long arg)
+NTSTATUS kvm_dev_ioctl(PDEVICE_OBJECT pDevObj, PIRP pIrp,
+ unsigned int ioctl)
{
long r = -EINVAL;
+ struct gvm_device_extension *devext = pDevObj->DeviceExtension;
+ void* pin = pIrp->AssociatedIrp.SystemBuffer;
switch (ioctl) {
- case KVM_GET_API_VERSION:
- if (arg)
- goto out;
- r = KVM_API_VERSION;
+ case GVM_GET_API_VERSION:
+ r = GVM_VERSION;
+ gvmUpdateReturnBuffer(pIrp, 0, &r, sizeof(r));
+ r = STATUS_SUCCESS;
break;
- case KVM_CREATE_VM:
- r = kvm_dev_ioctl_create_vm(arg);
+ case GVM_CREATE_VM:
+ r = kvm_dev_ioctl_create_vm(pDevObj, pIrp, 0);
break;
- case KVM_CHECK_EXTENSION:
- r = kvm_vm_ioctl_check_extension_generic(NULL, arg);
+ case GVM_CHECK_EXTENSION:
+ r = kvm_vm_ioctl_check_extension_generic(NULL, *(long *)pin);
+ gvmUpdateReturnBuffer(pIrp, 0, &r, sizeof(r));
+ r = STATUS_SUCCESS;
break;
- case KVM_GET_VCPU_MMAP_SIZE:
- if (arg)
- goto out;
- r = PAGE_SIZE; /* struct kvm_run */
-#ifdef CONFIG_X86
- r += PAGE_SIZE; /* pio data page */
-#endif
-#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
- r += PAGE_SIZE; /* coalesced mmio ring page */
-#endif
- break;
- case KVM_TRACE_ENABLE:
- case KVM_TRACE_PAUSE:
- case KVM_TRACE_DISABLE:
- r = -EOPNOTSUPP;
+ case GVM_GET_VCPU_MMAP_SIZE:
+ long mmap_size = 2 * PAGE_SIZE;
+ r = gvmUpdateReturnBuffer(pIrp, 0, &mmap_size, sizeof(mmap_size));
break;
default:
- return kvm_arch_dev_ioctl(filp, ioctl, arg);
+ return kvm_arch_dev_ioctl(devext, pIrp, ioctl);
}
-out:
return r;
}
-static struct file_operations kvm_chardev_ops = {
- .unlocked_ioctl = kvm_dev_ioctl,
- .compat_ioctl = kvm_dev_ioctl,
- .llseek = noop_llseek,
-};
-
-static struct miscdevice kvm_dev = {
- KVM_MINOR,
- "kvm",
- &kvm_chardev_ops,
-};
-
static void hardware_enable_nolock(void *junk)
{
int cpu = raw_smp_processor_id();
@@ -3260,6 +2396,8 @@ static void hardware_enable_nolock(void *junk)
atomic_inc(&hardware_enable_failed);
pr_info("kvm: enabling virtualization on CPU%d failed\n", cpu);
}
+
+ return;
}
static int kvm_starting_cpu(unsigned int cpu)
@@ -3296,7 +2434,7 @@ static void hardware_disable_all_nolock(void)
kvm_usage_count--;
if (!kvm_usage_count)
- on_each_cpu(hardware_disable_nolock, NULL, 1);
+ smp_call_function_many(cpu_online_mask, hardware_disable_nolock, NULL, 1);
}
static void hardware_disable_all(void)
@@ -3315,8 +2453,7 @@ static int hardware_enable_all(void)
kvm_usage_count++;
if (kvm_usage_count == 1) {
atomic_set(&hardware_enable_failed, 0);
- on_each_cpu(hardware_enable_nolock, NULL, 1);
-
+ smp_call_function_many(cpu_online_mask, hardware_enable_nolock, NULL, 1);
if (atomic_read(&hardware_enable_failed)) {
hardware_disable_all_nolock();
r = -EBUSY;
@@ -3328,26 +2465,6 @@ static int hardware_enable_all(void)
return r;
}
-static int kvm_reboot(struct notifier_block *notifier, unsigned long val,
- void *v)
-{
- /*
- * Some (well, at least mine) BIOSes hang on reboot if
- * in vmx root mode.
- *
- * And Intel TXT required VMX off for all cpu when system shutdown.
- */
- pr_info("kvm: exiting hardware virtualization\n");
- kvm_rebooting = true;
- on_each_cpu(hardware_disable_nolock, NULL, 1);
- return NOTIFY_OK;
-}
-
-static struct notifier_block kvm_reboot_notifier = {
- .notifier_call = kvm_reboot,
- .priority = 0,
-};
-
static void kvm_io_bus_destroy(struct kvm_io_bus *bus)
{
int i;
@@ -3463,6 +2580,7 @@ int kvm_io_bus_write(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr,
};
bus = srcu_dereference(vcpu->kvm->buses[bus_idx], &vcpu->kvm->srcu);
+ bus = vcpu->kvm->buses[bus_idx];
r = __kvm_io_bus_write(vcpu, bus, &range, val);
return r < 0 ? r : 0;
}
@@ -3480,6 +2598,7 @@ int kvm_io_bus_write_cookie(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx,
};
bus = srcu_dereference(vcpu->kvm->buses[bus_idx], &vcpu->kvm->srcu);
+ bus = vcpu->kvm->buses[bus_idx];
/* First try the device referenced by cookie. */
if ((cookie >= 0) && (cookie < bus->dev_count) &&
@@ -3514,7 +2633,6 @@ static int __kvm_io_bus_read(struct kvm_vcpu *vcpu, struct kvm_io_bus *bus,
return -EOPNOTSUPP;
}
-EXPORT_SYMBOL_GPL(kvm_io_bus_write);
/* kvm_io_bus_read - called under kvm->slots_lock */
int kvm_io_bus_read(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr,
@@ -3530,6 +2648,7 @@ int kvm_io_bus_read(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr,
};
bus = srcu_dereference(vcpu->kvm->buses[bus_idx], &vcpu->kvm->srcu);
+ bus = vcpu->kvm->buses[bus_idx];
r = __kvm_io_bus_read(vcpu, bus, &range, val);
return r < 0 ? r : 0;
}
@@ -3542,8 +2661,7 @@ int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
struct kvm_io_bus *new_bus, *bus;
bus = kvm->buses[bus_idx];
- /* exclude ioeventfd which is limited by maximum fd */
- if (bus->dev_count - bus->ioeventfd_count > NR_IOBUS_DEVS - 1)
+ if (bus->dev_count > NR_IOBUS_DEVS - 1)
return -ENOSPC;
new_bus = kmalloc(sizeof(*bus) + ((bus->dev_count + 1) *
@@ -3604,6 +2722,7 @@ struct kvm_io_device *kvm_io_bus_get_dev(struct kvm *kvm, enum kvm_bus bus_idx,
srcu_idx = srcu_read_lock(&kvm->srcu);
bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu);
+ bus = kvm->buses[bus_idx];
dev_idx = kvm_io_bus_get_first_dev(bus, addr, 1);
if (dev_idx < 0)
@@ -3616,241 +2735,35 @@ out_unlock:
return iodev;
}
-EXPORT_SYMBOL_GPL(kvm_io_bus_get_dev);
-
-static int kvm_debugfs_open(struct inode *inode, struct file *file,
- int (*get)(void *, u64 *), int (*set)(void *, u64),
- const char *fmt)
-{
- struct kvm_stat_data *stat_data = (struct kvm_stat_data *)
- inode->i_private;
-
- /* The debugfs files are a reference to the kvm struct which
- * is still valid when kvm_destroy_vm is called.
- * To avoid the race between open and the removal of the debugfs
- * directory we test against the users count.
- */
- if (!atomic_add_unless(&stat_data->kvm->users_count, 1, 0))
- return -ENOENT;
-
- if (simple_attr_open(inode, file, get, set, fmt)) {
- kvm_put_kvm(stat_data->kvm);
- return -ENOMEM;
- }
-
- return 0;
-}
-
-static int kvm_debugfs_release(struct inode *inode, struct file *file)
-{
- struct kvm_stat_data *stat_data = (struct kvm_stat_data *)
- inode->i_private;
-
- simple_attr_release(inode, file);
- kvm_put_kvm(stat_data->kvm);
-
- return 0;
-}
-
-static int vm_stat_get_per_vm(void *data, u64 *val)
-{
- struct kvm_stat_data *stat_data = (struct kvm_stat_data *)data;
-
- *val = *(ulong *)((void *)stat_data->kvm + stat_data->offset);
-
- return 0;
-}
-
-static int vm_stat_get_per_vm_open(struct inode *inode, struct file *file)
-{
- __simple_attr_check_format("%llu\n", 0ull);
- return kvm_debugfs_open(inode, file, vm_stat_get_per_vm,
- NULL, "%llu\n");
-}
-
-static const struct file_operations vm_stat_get_per_vm_fops = {
- .owner = THIS_MODULE,
- .open = vm_stat_get_per_vm_open,
- .release = kvm_debugfs_release,
- .read = simple_attr_read,
- .write = simple_attr_write,
- .llseek = generic_file_llseek,
-};
-
-static int vcpu_stat_get_per_vm(void *data, u64 *val)
-{
- int i;
- struct kvm_stat_data *stat_data = (struct kvm_stat_data *)data;
- struct kvm_vcpu *vcpu;
-
- *val = 0;
- kvm_for_each_vcpu(i, vcpu, stat_data->kvm)
- *val += *(u64 *)((void *)vcpu + stat_data->offset);
-
- return 0;
-}
-
-static int vcpu_stat_get_per_vm_open(struct inode *inode, struct file *file)
-{
- __simple_attr_check_format("%llu\n", 0ull);
- return kvm_debugfs_open(inode, file, vcpu_stat_get_per_vm,
- NULL, "%llu\n");
-}
-
-static const struct file_operations vcpu_stat_get_per_vm_fops = {
- .owner = THIS_MODULE,
- .open = vcpu_stat_get_per_vm_open,
- .release = kvm_debugfs_release,
- .read = simple_attr_read,
- .write = simple_attr_write,
- .llseek = generic_file_llseek,
-};
-
-static const struct file_operations *stat_fops_per_vm[] = {
- [KVM_STAT_VCPU] = &vcpu_stat_get_per_vm_fops,
- [KVM_STAT_VM] = &vm_stat_get_per_vm_fops,
-};
-
-static int vm_stat_get(void *_offset, u64 *val)
-{
- unsigned offset = (long)_offset;
- struct kvm *kvm;
- struct kvm_stat_data stat_tmp = {.offset = offset};
- u64 tmp_val;
-
- *val = 0;
- spin_lock(&kvm_lock);
- list_for_each_entry(kvm, &vm_list, vm_list) {
- stat_tmp.kvm = kvm;
- vm_stat_get_per_vm((void *)&stat_tmp, &tmp_val);
- *val += tmp_val;
- }
- spin_unlock(&kvm_lock);
- return 0;
-}
-
-DEFINE_SIMPLE_ATTRIBUTE(vm_stat_fops, vm_stat_get, NULL, "%llu\n");
-
-static int vcpu_stat_get(void *_offset, u64 *val)
-{
- unsigned offset = (long)_offset;
- struct kvm *kvm;
- struct kvm_stat_data stat_tmp = {.offset = offset};
- u64 tmp_val;
-
- *val = 0;
- spin_lock(&kvm_lock);
- list_for_each_entry(kvm, &vm_list, vm_list) {
- stat_tmp.kvm = kvm;
- vcpu_stat_get_per_vm((void *)&stat_tmp, &tmp_val);
- *val += tmp_val;
- }
- spin_unlock(&kvm_lock);
- return 0;
-}
-
-DEFINE_SIMPLE_ATTRIBUTE(vcpu_stat_fops, vcpu_stat_get, NULL, "%llu\n");
-
-static const struct file_operations *stat_fops[] = {
- [KVM_STAT_VCPU] = &vcpu_stat_fops,
- [KVM_STAT_VM] = &vm_stat_fops,
-};
-
-static int kvm_init_debug(void)
-{
- int r = -EEXIST;
- struct kvm_stats_debugfs_item *p;
-
- kvm_debugfs_dir = debugfs_create_dir("kvm", NULL);
- if (kvm_debugfs_dir == NULL)
- goto out;
-
- kvm_debugfs_num_entries = 0;
- for (p = debugfs_entries; p->name; ++p, kvm_debugfs_num_entries++) {
- if (!debugfs_create_file(p->name, 0444, kvm_debugfs_dir,
- (void *)(long)p->offset,
- stat_fops[p->kind]))
- goto out_dir;
- }
-
- return 0;
-
-out_dir:
- debugfs_remove_recursive(kvm_debugfs_dir);
-out:
- return r;
-}
-
-static int kvm_suspend(void)
+/*
+ * The following two functions are kept here so that they
+ * could be used once hooking driver with Windows Power State
+ * chage.
+ */
+int kvm_suspend(void)
{
if (kvm_usage_count)
- hardware_disable_nolock(NULL);
+ smp_call_function_many(cpu_online_mask,
+ hardware_disable_nolock, NULL, 1);
return 0;
}
-static void kvm_resume(void)
-{
- if (kvm_usage_count) {
- WARN_ON(raw_spin_is_locked(&kvm_count_lock));
- hardware_enable_nolock(NULL);
- }
-}
-
-static struct syscore_ops kvm_syscore_ops = {
- .suspend = kvm_suspend,
- .resume = kvm_resume,
-};
-
-static inline
-struct kvm_vcpu *preempt_notifier_to_vcpu(struct preempt_notifier *pn)
-{
- return container_of(pn, struct kvm_vcpu, preempt_notifier);
-}
-
-static void kvm_sched_in(struct preempt_notifier *pn, int cpu)
+void kvm_resume(void)
{
- struct kvm_vcpu *vcpu = preempt_notifier_to_vcpu(pn);
-
- if (vcpu->preempted)
- vcpu->preempted = false;
-
- kvm_arch_sched_in(vcpu, cpu);
-
- kvm_arch_vcpu_load(vcpu, cpu);
-}
-
-static void kvm_sched_out(struct preempt_notifier *pn,
- struct task_struct *next)
-{
- struct kvm_vcpu *vcpu = preempt_notifier_to_vcpu(pn);
-
- if (current->state == TASK_RUNNING)
- vcpu->preempted = true;
- kvm_arch_vcpu_put(vcpu);
+ if (kvm_usage_count)
+ smp_call_function_many(cpu_online_mask,
+ hardware_enable_nolock, NULL, 1);
}
-int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
- struct module *module)
+int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align)
{
int r;
- int cpu;
r = kvm_arch_init(opaque);
if (r)
goto out_fail;
- /*
- * kvm_arch_init makes sure there's at most one caller
- * for architectures that support multiple implementations,
- * like intel and amd on x86.
- * kvm_arch_init must be called before kvm_irqfd_init to avoid creating
- * conflicts in case kvm is already setup for another implementation.
- */
- r = kvm_irqfd_init();
- if (r)
- goto out_irqfd;
-
if (!zalloc_cpumask_var(&cpus_hardware_enabled, GFP_KERNEL)) {
r = -ENOMEM;
goto out_free_0;
@@ -3860,98 +2773,27 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
if (r < 0)
goto out_free_0a;
- for_each_online_cpu(cpu) {
- smp_call_function_single(cpu,
- kvm_arch_check_processor_compat,
- &r, 1);
- if (r < 0)
- goto out_free_1;
- }
-
- r = cpuhp_setup_state_nocalls(CPUHP_AP_KVM_STARTING, "AP_KVM_STARTING",
- kvm_starting_cpu, kvm_dying_cpu);
- if (r)
- goto out_free_2;
- register_reboot_notifier(&kvm_reboot_notifier);
-
- /* A kmem cache lets us meet the alignment requirements of fx_save. */
- if (!vcpu_align)
- vcpu_align = __alignof__(struct kvm_vcpu);
- kvm_vcpu_cache = kmem_cache_create("kvm_vcpu", vcpu_size, vcpu_align,
- 0, NULL);
- if (!kvm_vcpu_cache) {
- r = -ENOMEM;
- goto out_free_3;
- }
-
- r = kvm_async_pf_init();
- if (r)
- goto out_free;
-
- kvm_chardev_ops.owner = module;
- kvm_vm_fops.owner = module;
- kvm_vcpu_fops.owner = module;
-
- r = misc_register(&kvm_dev);
- if (r) {
- pr_err("kvm: misc device register failed\n");
- goto out_unreg;
- }
-
- register_syscore_ops(&kvm_syscore_ops);
-
- kvm_preempt_ops.sched_in = kvm_sched_in;
- kvm_preempt_ops.sched_out = kvm_sched_out;
-
- r = kvm_init_debug();
- if (r) {
- pr_err("kvm: create debugfs files failed\n");
- goto out_undebugfs;
- }
-
- r = kvm_vfio_ops_init();
- WARN_ON(r);
+ kvm_arch_check_processor_compat(&r);
+ if (r < 0)
+ goto out_free_1;
return 0;
-out_undebugfs:
- unregister_syscore_ops(&kvm_syscore_ops);
- misc_deregister(&kvm_dev);
-out_unreg:
- kvm_async_pf_deinit();
-out_free:
- kmem_cache_destroy(kvm_vcpu_cache);
-out_free_3:
- unregister_reboot_notifier(&kvm_reboot_notifier);
- cpuhp_remove_state_nocalls(CPUHP_AP_KVM_STARTING);
-out_free_2:
out_free_1:
kvm_arch_hardware_unsetup();
out_free_0a:
free_cpumask_var(cpus_hardware_enabled);
out_free_0:
- kvm_irqfd_exit();
-out_irqfd:
kvm_arch_exit();
out_fail:
return r;
}
-EXPORT_SYMBOL_GPL(kvm_init);
void kvm_exit(void)
{
- debugfs_remove_recursive(kvm_debugfs_dir);
- misc_deregister(&kvm_dev);
- kmem_cache_destroy(kvm_vcpu_cache);
- kvm_async_pf_deinit();
- unregister_syscore_ops(&kvm_syscore_ops);
- unregister_reboot_notifier(&kvm_reboot_notifier);
- cpuhp_remove_state_nocalls(CPUHP_AP_KVM_STARTING);
- on_each_cpu(hardware_disable_nolock, NULL, 1);
+ smp_call_function_many(cpu_online_mask,
+ hardware_disable_nolock, NULL, 1);
kvm_arch_hardware_unsetup();
kvm_arch_exit();
- kvm_irqfd_exit();
free_cpumask_var(cpus_hardware_enabled);
- kvm_vfio_ops_exit();
}
-EXPORT_SYMBOL_GPL(kvm_exit);
diff --git a/virt/kvm/vfio.c b/virt/kvm/vfio.c
deleted file mode 100644
index 1dd087d..0000000
--- a/virt/kvm/vfio.c
+++ /dev/null
@@ -1,295 +0,0 @@
-/*
- * VFIO-KVM bridge pseudo device
- *
- * Copyright (C) 2013 Red Hat, Inc. All rights reserved.
- * Author: Alex Williamson <alex.williamson@redhat.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/errno.h>
-#include <linux/file.h>
-#include <linux/kvm_host.h>
-#include <linux/list.h>
-#include <linux/module.h>
-#include <linux/mutex.h>
-#include <linux/slab.h>
-#include <linux/uaccess.h>
-#include <linux/vfio.h>
-#include "vfio.h"
-
-struct kvm_vfio_group {
- struct list_head node;
- struct vfio_group *vfio_group;
-};
-
-struct kvm_vfio {
- struct list_head group_list;
- struct mutex lock;
- bool noncoherent;
-};
-
-static struct vfio_group *kvm_vfio_group_get_external_user(struct file *filep)
-{
- struct vfio_group *vfio_group;
- struct vfio_group *(*fn)(struct file *);
-
- fn = symbol_get(vfio_group_get_external_user);
- if (!fn)
- return ERR_PTR(-EINVAL);
-
- vfio_group = fn(filep);
-
- symbol_put(vfio_group_get_external_user);
-
- return vfio_group;
-}
-
-static void kvm_vfio_group_put_external_user(struct vfio_group *vfio_group)
-{
- void (*fn)(struct vfio_group *);
-
- fn = symbol_get(vfio_group_put_external_user);
- if (!fn)
- return;
-
- fn(vfio_group);
-
- symbol_put(vfio_group_put_external_user);
-}
-
-static bool kvm_vfio_group_is_coherent(struct vfio_group *vfio_group)
-{
- long (*fn)(struct vfio_group *, unsigned long);
- long ret;
-
- fn = symbol_get(vfio_external_check_extension);
- if (!fn)
- return false;
-
- ret = fn(vfio_group, VFIO_DMA_CC_IOMMU);
-
- symbol_put(vfio_external_check_extension);
-
- return ret > 0;
-}
-
-/*
- * Groups can use the same or different IOMMU domains. If the same then
- * adding a new group may change the coherency of groups we've previously
- * been told about. We don't want to care about any of that so we retest
- * each group and bail as soon as we find one that's noncoherent. This
- * means we only ever [un]register_noncoherent_dma once for the whole device.
- */
-static void kvm_vfio_update_coherency(struct kvm_device *dev)
-{
- struct kvm_vfio *kv = dev->private;
- bool noncoherent = false;
- struct kvm_vfio_group *kvg;
-
- mutex_lock(&kv->lock);
-
- list_for_each_entry(kvg, &kv->group_list, node) {
- if (!kvm_vfio_group_is_coherent(kvg->vfio_group)) {
- noncoherent = true;
- break;
- }
- }
-
- if (noncoherent != kv->noncoherent) {
- kv->noncoherent = noncoherent;
-
- if (kv->noncoherent)
- kvm_arch_register_noncoherent_dma(dev->kvm);
- else
- kvm_arch_unregister_noncoherent_dma(dev->kvm);
- }
-
- mutex_unlock(&kv->lock);
-}
-
-static int kvm_vfio_set_group(struct kvm_device *dev, long attr, u64 arg)
-{
- struct kvm_vfio *kv = dev->private;
- struct vfio_group *vfio_group;
- struct kvm_vfio_group *kvg;
- int32_t __user *argp = (int32_t __user *)(unsigned long)arg;
- struct fd f;
- int32_t fd;
- int ret;
-
- switch (attr) {
- case KVM_DEV_VFIO_GROUP_ADD:
- if (get_user(fd, argp))
- return -EFAULT;
-
- f = fdget(fd);
- if (!f.file)
- return -EBADF;
-
- vfio_group = kvm_vfio_group_get_external_user(f.file);
- fdput(f);
-
- if (IS_ERR(vfio_group))
- return PTR_ERR(vfio_group);
-
- mutex_lock(&kv->lock);
-
- list_for_each_entry(kvg, &kv->group_list, node) {
- if (kvg->vfio_group == vfio_group) {
- mutex_unlock(&kv->lock);
- kvm_vfio_group_put_external_user(vfio_group);
- return -EEXIST;
- }
- }
-
- kvg = kzalloc(sizeof(*kvg), GFP_KERNEL);
- if (!kvg) {
- mutex_unlock(&kv->lock);
- kvm_vfio_group_put_external_user(vfio_group);
- return -ENOMEM;
- }
-
- list_add_tail(&kvg->node, &kv->group_list);
- kvg->vfio_group = vfio_group;
-
- kvm_arch_start_assignment(dev->kvm);
-
- mutex_unlock(&kv->lock);
-
- kvm_vfio_update_coherency(dev);
-
- return 0;
-
- case KVM_DEV_VFIO_GROUP_DEL:
- if (get_user(fd, argp))
- return -EFAULT;
-
- f = fdget(fd);
- if (!f.file)
- return -EBADF;
-
- vfio_group = kvm_vfio_group_get_external_user(f.file);
- fdput(f);
-
- if (IS_ERR(vfio_group))
- return PTR_ERR(vfio_group);
-
- ret = -ENOENT;
-
- mutex_lock(&kv->lock);
-
- list_for_each_entry(kvg, &kv->group_list, node) {
- if (kvg->vfio_group != vfio_group)
- continue;
-
- list_del(&kvg->node);
- kvm_vfio_group_put_external_user(kvg->vfio_group);
- kfree(kvg);
- ret = 0;
- break;
- }
-
- kvm_arch_end_assignment(dev->kvm);
-
- mutex_unlock(&kv->lock);
-
- kvm_vfio_group_put_external_user(vfio_group);
-
- kvm_vfio_update_coherency(dev);
-
- return ret;
- }
-
- return -ENXIO;
-}
-
-static int kvm_vfio_set_attr(struct kvm_device *dev,
- struct kvm_device_attr *attr)
-{
- switch (attr->group) {
- case KVM_DEV_VFIO_GROUP:
- return kvm_vfio_set_group(dev, attr->attr, attr->addr);
- }
-
- return -ENXIO;
-}
-
-static int kvm_vfio_has_attr(struct kvm_device *dev,
- struct kvm_device_attr *attr)
-{
- switch (attr->group) {
- case KVM_DEV_VFIO_GROUP:
- switch (attr->attr) {
- case KVM_DEV_VFIO_GROUP_ADD:
- case KVM_DEV_VFIO_GROUP_DEL:
- return 0;
- }
-
- break;
- }
-
- return -ENXIO;
-}
-
-static void kvm_vfio_destroy(struct kvm_device *dev)
-{
- struct kvm_vfio *kv = dev->private;
- struct kvm_vfio_group *kvg, *tmp;
-
- list_for_each_entry_safe(kvg, tmp, &kv->group_list, node) {
- kvm_vfio_group_put_external_user(kvg->vfio_group);
- list_del(&kvg->node);
- kfree(kvg);
- kvm_arch_end_assignment(dev->kvm);
- }
-
- kvm_vfio_update_coherency(dev);
-
- kfree(kv);
- kfree(dev); /* alloc by kvm_ioctl_create_device, free by .destroy */
-}
-
-static int kvm_vfio_create(struct kvm_device *dev, u32 type);
-
-static struct kvm_device_ops kvm_vfio_ops = {
- .name = "kvm-vfio",
- .create = kvm_vfio_create,
- .destroy = kvm_vfio_destroy,
- .set_attr = kvm_vfio_set_attr,
- .has_attr = kvm_vfio_has_attr,
-};
-
-static int kvm_vfio_create(struct kvm_device *dev, u32 type)
-{
- struct kvm_device *tmp;
- struct kvm_vfio *kv;
-
- /* Only one VFIO "device" per VM */
- list_for_each_entry(tmp, &dev->kvm->devices, vm_node)
- if (tmp->ops == &kvm_vfio_ops)
- return -EBUSY;
-
- kv = kzalloc(sizeof(*kv), GFP_KERNEL);
- if (!kv)
- return -ENOMEM;
-
- INIT_LIST_HEAD(&kv->group_list);
- mutex_init(&kv->lock);
-
- dev->private = kv;
-
- return 0;
-}
-
-int kvm_vfio_ops_init(void)
-{
- return kvm_register_device_ops(&kvm_vfio_ops, KVM_DEV_TYPE_VFIO);
-}
-
-void kvm_vfio_ops_exit(void)
-{
- kvm_unregister_device_ops(KVM_DEV_TYPE_VFIO);
-}
diff --git a/virt/kvm/vfio.h b/virt/kvm/vfio.h
deleted file mode 100644
index ab88c7d..0000000
--- a/virt/kvm/vfio.h
+++ /dev/null
@@ -1,17 +0,0 @@
-#ifndef __KVM_VFIO_H
-#define __KVM_VFIO_H
-
-#ifdef CONFIG_KVM_VFIO
-int kvm_vfio_ops_init(void);
-void kvm_vfio_ops_exit(void);
-#else
-static inline int kvm_vfio_ops_init(void)
-{
- return 0;
-}
-static inline void kvm_vfio_ops_exit(void)
-{
-}
-#endif
-
-#endif