diff options
Diffstat (limited to 'virt')
29 files changed, 688 insertions, 11644 deletions
diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig deleted file mode 100644 index b0cc1a3..0000000 --- a/virt/kvm/Kconfig +++ /dev/null @@ -1,52 +0,0 @@ -# KVM common configuration items and defaults - -config HAVE_KVM - bool - -config HAVE_KVM_IRQCHIP - bool - -config HAVE_KVM_IRQFD - bool - -config HAVE_KVM_IRQ_ROUTING - bool - -config HAVE_KVM_EVENTFD - bool - select EVENTFD - -config KVM_MMIO - bool - -config KVM_ASYNC_PF - bool - -# Toggle to switch between direct notification and batch job -config KVM_ASYNC_PF_SYNC - bool - -config HAVE_KVM_MSI - bool - -config HAVE_KVM_CPU_RELAX_INTERCEPT - bool - -config KVM_VFIO - bool - -config HAVE_KVM_ARCH_TLB_FLUSH_ALL - bool - -config HAVE_KVM_INVALID_WAKEUPS - bool - -config KVM_GENERIC_DIRTYLOG_READ_PROTECT - bool - -config KVM_COMPAT - def_bool y - depends on KVM && COMPAT && !S390 - -config HAVE_KVM_IRQ_BYPASS - bool diff --git a/virt/kvm/arm/aarch32.c b/virt/kvm/arm/aarch32.c deleted file mode 100644 index 528af4b..0000000 --- a/virt/kvm/arm/aarch32.c +++ /dev/null @@ -1,152 +0,0 @@ -/* - * (not much of an) Emulation layer for 32bit guests. - * - * Copyright (C) 2012,2013 - ARM Ltd - * Author: Marc Zyngier <marc.zyngier@arm.com> - * - * based on arch/arm/kvm/emulate.c - * Copyright (C) 2012 - Virtual Open Systems and Columbia University - * Author: Christoffer Dall <c.dall@virtualopensystems.com> - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - -#include <linux/kvm_host.h> -#include <asm/kvm_emulate.h> -#include <asm/kvm_hyp.h> - -#ifndef CONFIG_ARM64 -#define COMPAT_PSR_T_BIT PSR_T_BIT -#define COMPAT_PSR_IT_MASK PSR_IT_MASK -#endif - -/* - * stolen from arch/arm/kernel/opcodes.c - * - * condition code lookup table - * index into the table is test code: EQ, NE, ... LT, GT, AL, NV - * - * bit position in short is condition code: NZCV - */ -static const unsigned short cc_map[16] = { - 0xF0F0, /* EQ == Z set */ - 0x0F0F, /* NE */ - 0xCCCC, /* CS == C set */ - 0x3333, /* CC */ - 0xFF00, /* MI == N set */ - 0x00FF, /* PL */ - 0xAAAA, /* VS == V set */ - 0x5555, /* VC */ - 0x0C0C, /* HI == C set && Z clear */ - 0xF3F3, /* LS == C clear || Z set */ - 0xAA55, /* GE == (N==V) */ - 0x55AA, /* LT == (N!=V) */ - 0x0A05, /* GT == (!Z && (N==V)) */ - 0xF5FA, /* LE == (Z || (N!=V)) */ - 0xFFFF, /* AL always */ - 0 /* NV */ -}; - -/* - * Check if a trapped instruction should have been executed or not. - */ -bool kvm_condition_valid32(const struct kvm_vcpu *vcpu) -{ - unsigned long cpsr; - u32 cpsr_cond; - int cond; - - /* Top two bits non-zero? Unconditional. */ - if (kvm_vcpu_get_hsr(vcpu) >> 30) - return true; - - /* Is condition field valid? */ - cond = kvm_vcpu_get_condition(vcpu); - if (cond == 0xE) - return true; - - cpsr = *vcpu_cpsr(vcpu); - - if (cond < 0) { - /* This can happen in Thumb mode: examine IT state. */ - unsigned long it; - - it = ((cpsr >> 8) & 0xFC) | ((cpsr >> 25) & 0x3); - - /* it == 0 => unconditional. */ - if (it == 0) - return true; - - /* The cond for this insn works out as the top 4 bits. */ - cond = (it >> 4); - } - - cpsr_cond = cpsr >> 28; - - if (!((cc_map[cond] >> cpsr_cond) & 1)) - return false; - - return true; -} - -/** - * adjust_itstate - adjust ITSTATE when emulating instructions in IT-block - * @vcpu: The VCPU pointer - * - * When exceptions occur while instructions are executed in Thumb IF-THEN - * blocks, the ITSTATE field of the CPSR is not advanced (updated), so we have - * to do this little bit of work manually. The fields map like this: - * - * IT[7:0] -> CPSR[26:25],CPSR[15:10] - */ -static void __hyp_text kvm_adjust_itstate(struct kvm_vcpu *vcpu) -{ - unsigned long itbits, cond; - unsigned long cpsr = *vcpu_cpsr(vcpu); - bool is_arm = !(cpsr & COMPAT_PSR_T_BIT); - - if (is_arm || !(cpsr & COMPAT_PSR_IT_MASK)) - return; - - cond = (cpsr & 0xe000) >> 13; - itbits = (cpsr & 0x1c00) >> (10 - 2); - itbits |= (cpsr & (0x3 << 25)) >> 25; - - /* Perform ITAdvance (see page A2-52 in ARM DDI 0406C) */ - if ((itbits & 0x7) == 0) - itbits = cond = 0; - else - itbits = (itbits << 1) & 0x1f; - - cpsr &= ~COMPAT_PSR_IT_MASK; - cpsr |= cond << 13; - cpsr |= (itbits & 0x1c) << (10 - 2); - cpsr |= (itbits & 0x3) << 25; - *vcpu_cpsr(vcpu) = cpsr; -} - -/** - * kvm_skip_instr - skip a trapped instruction and proceed to the next - * @vcpu: The vcpu pointer - */ -void __hyp_text kvm_skip_instr32(struct kvm_vcpu *vcpu, bool is_wide_instr) -{ - bool is_thumb; - - is_thumb = !!(*vcpu_cpsr(vcpu) & COMPAT_PSR_T_BIT); - if (is_thumb && !is_wide_instr) - *vcpu_pc(vcpu) += 2; - else - *vcpu_pc(vcpu) += 4; - kvm_adjust_itstate(vcpu); -} diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c deleted file mode 100644 index 27a1f63..0000000 --- a/virt/kvm/arm/arch_timer.c +++ /dev/null @@ -1,519 +0,0 @@ -/* - * Copyright (C) 2012 ARM Ltd. - * Author: Marc Zyngier <marc.zyngier@arm.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#include <linux/cpu.h> -#include <linux/kvm.h> -#include <linux/kvm_host.h> -#include <linux/interrupt.h> -#include <linux/irq.h> - -#include <clocksource/arm_arch_timer.h> -#include <asm/arch_timer.h> - -#include <kvm/arm_vgic.h> -#include <kvm/arm_arch_timer.h> - -#include "trace.h" - -static struct timecounter *timecounter; -static unsigned int host_vtimer_irq; -static u32 host_vtimer_irq_flags; - -void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu) -{ - vcpu->arch.timer_cpu.active_cleared_last = false; -} - -static cycle_t kvm_phys_timer_read(void) -{ - return timecounter->cc->read(timecounter->cc); -} - -static bool timer_is_armed(struct arch_timer_cpu *timer) -{ - return timer->armed; -} - -/* timer_arm: as in "arm the timer", not as in ARM the company */ -static void timer_arm(struct arch_timer_cpu *timer, u64 ns) -{ - timer->armed = true; - hrtimer_start(&timer->timer, ktime_add_ns(ktime_get(), ns), - HRTIMER_MODE_ABS); -} - -static void timer_disarm(struct arch_timer_cpu *timer) -{ - if (timer_is_armed(timer)) { - hrtimer_cancel(&timer->timer); - cancel_work_sync(&timer->expired); - timer->armed = false; - } -} - -static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id) -{ - struct kvm_vcpu *vcpu = *(struct kvm_vcpu **)dev_id; - - /* - * We disable the timer in the world switch and let it be - * handled by kvm_timer_sync_hwstate(). Getting a timer - * interrupt at this point is a sure sign of some major - * breakage. - */ - pr_warn("Unexpected interrupt %d on vcpu %p\n", irq, vcpu); - return IRQ_HANDLED; -} - -/* - * Work function for handling the backup timer that we schedule when a vcpu is - * no longer running, but had a timer programmed to fire in the future. - */ -static void kvm_timer_inject_irq_work(struct work_struct *work) -{ - struct kvm_vcpu *vcpu; - - vcpu = container_of(work, struct kvm_vcpu, arch.timer_cpu.expired); - vcpu->arch.timer_cpu.armed = false; - - WARN_ON(!kvm_timer_should_fire(vcpu)); - - /* - * If the vcpu is blocked we want to wake it up so that it will see - * the timer has expired when entering the guest. - */ - kvm_vcpu_kick(vcpu); -} - -static u64 kvm_timer_compute_delta(struct kvm_vcpu *vcpu) -{ - cycle_t cval, now; - - cval = vcpu->arch.timer_cpu.cntv_cval; - now = kvm_phys_timer_read() - vcpu->kvm->arch.timer.cntvoff; - - if (now < cval) { - u64 ns; - - ns = cyclecounter_cyc2ns(timecounter->cc, - cval - now, - timecounter->mask, - &timecounter->frac); - return ns; - } - - return 0; -} - -static enum hrtimer_restart kvm_timer_expire(struct hrtimer *hrt) -{ - struct arch_timer_cpu *timer; - struct kvm_vcpu *vcpu; - u64 ns; - - timer = container_of(hrt, struct arch_timer_cpu, timer); - vcpu = container_of(timer, struct kvm_vcpu, arch.timer_cpu); - - /* - * Check that the timer has really expired from the guest's - * PoV (NTP on the host may have forced it to expire - * early). If we should have slept longer, restart it. - */ - ns = kvm_timer_compute_delta(vcpu); - if (unlikely(ns)) { - hrtimer_forward_now(hrt, ns_to_ktime(ns)); - return HRTIMER_RESTART; - } - - schedule_work(&timer->expired); - return HRTIMER_NORESTART; -} - -static bool kvm_timer_irq_can_fire(struct kvm_vcpu *vcpu) -{ - struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; - - return !(timer->cntv_ctl & ARCH_TIMER_CTRL_IT_MASK) && - (timer->cntv_ctl & ARCH_TIMER_CTRL_ENABLE); -} - -bool kvm_timer_should_fire(struct kvm_vcpu *vcpu) -{ - struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; - cycle_t cval, now; - - if (!kvm_timer_irq_can_fire(vcpu)) - return false; - - cval = timer->cntv_cval; - now = kvm_phys_timer_read() - vcpu->kvm->arch.timer.cntvoff; - - return cval <= now; -} - -static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level) -{ - int ret; - struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; - - BUG_ON(!vgic_initialized(vcpu->kvm)); - - timer->active_cleared_last = false; - timer->irq.level = new_level; - trace_kvm_timer_update_irq(vcpu->vcpu_id, timer->irq.irq, - timer->irq.level); - ret = kvm_vgic_inject_mapped_irq(vcpu->kvm, vcpu->vcpu_id, - timer->irq.irq, - timer->irq.level); - WARN_ON(ret); -} - -/* - * Check if there was a change in the timer state (should we raise or lower - * the line level to the GIC). - */ -static int kvm_timer_update_state(struct kvm_vcpu *vcpu) -{ - struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; - - /* - * If userspace modified the timer registers via SET_ONE_REG before - * the vgic was initialized, we mustn't set the timer->irq.level value - * because the guest would never see the interrupt. Instead wait - * until we call this function from kvm_timer_flush_hwstate. - */ - if (!vgic_initialized(vcpu->kvm) || !timer->enabled) - return -ENODEV; - - if (kvm_timer_should_fire(vcpu) != timer->irq.level) - kvm_timer_update_irq(vcpu, !timer->irq.level); - - return 0; -} - -/* - * Schedule the background timer before calling kvm_vcpu_block, so that this - * thread is removed from its waitqueue and made runnable when there's a timer - * interrupt to handle. - */ -void kvm_timer_schedule(struct kvm_vcpu *vcpu) -{ - struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; - - BUG_ON(timer_is_armed(timer)); - - /* - * No need to schedule a background timer if the guest timer has - * already expired, because kvm_vcpu_block will return before putting - * the thread to sleep. - */ - if (kvm_timer_should_fire(vcpu)) - return; - - /* - * If the timer is not capable of raising interrupts (disabled or - * masked), then there's no more work for us to do. - */ - if (!kvm_timer_irq_can_fire(vcpu)) - return; - - /* The timer has not yet expired, schedule a background timer */ - timer_arm(timer, kvm_timer_compute_delta(vcpu)); -} - -void kvm_timer_unschedule(struct kvm_vcpu *vcpu) -{ - struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; - timer_disarm(timer); -} - -/** - * kvm_timer_flush_hwstate - prepare to move the virt timer to the cpu - * @vcpu: The vcpu pointer - * - * Check if the virtual timer has expired while we were running in the host, - * and inject an interrupt if that was the case. - */ -void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu) -{ - struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; - bool phys_active; - int ret; - - if (kvm_timer_update_state(vcpu)) - return; - - /* - * If we enter the guest with the virtual input level to the VGIC - * asserted, then we have already told the VGIC what we need to, and - * we don't need to exit from the guest until the guest deactivates - * the already injected interrupt, so therefore we should set the - * hardware active state to prevent unnecessary exits from the guest. - * - * Also, if we enter the guest with the virtual timer interrupt active, - * then it must be active on the physical distributor, because we set - * the HW bit and the guest must be able to deactivate the virtual and - * physical interrupt at the same time. - * - * Conversely, if the virtual input level is deasserted and the virtual - * interrupt is not active, then always clear the hardware active state - * to ensure that hardware interrupts from the timer triggers a guest - * exit. - */ - phys_active = timer->irq.level || - kvm_vgic_map_is_active(vcpu, timer->irq.irq); - - /* - * We want to avoid hitting the (re)distributor as much as - * possible, as this is a potentially expensive MMIO access - * (not to mention locks in the irq layer), and a solution for - * this is to cache the "active" state in memory. - * - * Things to consider: we cannot cache an "active set" state, - * because the HW can change this behind our back (it becomes - * "clear" in the HW). We must then restrict the caching to - * the "clear" state. - * - * The cache is invalidated on: - * - vcpu put, indicating that the HW cannot be trusted to be - * in a sane state on the next vcpu load, - * - any change in the interrupt state - * - * Usage conditions: - * - cached value is "active clear" - * - value to be programmed is "active clear" - */ - if (timer->active_cleared_last && !phys_active) - return; - - ret = irq_set_irqchip_state(host_vtimer_irq, - IRQCHIP_STATE_ACTIVE, - phys_active); - WARN_ON(ret); - - timer->active_cleared_last = !phys_active; -} - -/** - * kvm_timer_sync_hwstate - sync timer state from cpu - * @vcpu: The vcpu pointer - * - * Check if the virtual timer has expired while we were running in the guest, - * and inject an interrupt if that was the case. - */ -void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu) -{ - struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; - - BUG_ON(timer_is_armed(timer)); - - /* - * The guest could have modified the timer registers or the timer - * could have expired, update the timer state. - */ - kvm_timer_update_state(vcpu); -} - -int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu, - const struct kvm_irq_level *irq) -{ - struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; - - /* - * The vcpu timer irq number cannot be determined in - * kvm_timer_vcpu_init() because it is called much before - * kvm_vcpu_set_target(). To handle this, we determine - * vcpu timer irq number when the vcpu is reset. - */ - timer->irq.irq = irq->irq; - - /* - * The bits in CNTV_CTL are architecturally reset to UNKNOWN for ARMv8 - * and to 0 for ARMv7. We provide an implementation that always - * resets the timer to be disabled and unmasked and is compliant with - * the ARMv7 architecture. - */ - timer->cntv_ctl = 0; - kvm_timer_update_state(vcpu); - - return 0; -} - -void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu) -{ - struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; - - INIT_WORK(&timer->expired, kvm_timer_inject_irq_work); - hrtimer_init(&timer->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); - timer->timer.function = kvm_timer_expire; -} - -static void kvm_timer_init_interrupt(void *info) -{ - enable_percpu_irq(host_vtimer_irq, host_vtimer_irq_flags); -} - -int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value) -{ - struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; - - switch (regid) { - case KVM_REG_ARM_TIMER_CTL: - timer->cntv_ctl = value; - break; - case KVM_REG_ARM_TIMER_CNT: - vcpu->kvm->arch.timer.cntvoff = kvm_phys_timer_read() - value; - break; - case KVM_REG_ARM_TIMER_CVAL: - timer->cntv_cval = value; - break; - default: - return -1; - } - - kvm_timer_update_state(vcpu); - return 0; -} - -u64 kvm_arm_timer_get_reg(struct kvm_vcpu *vcpu, u64 regid) -{ - struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; - - switch (regid) { - case KVM_REG_ARM_TIMER_CTL: - return timer->cntv_ctl; - case KVM_REG_ARM_TIMER_CNT: - return kvm_phys_timer_read() - vcpu->kvm->arch.timer.cntvoff; - case KVM_REG_ARM_TIMER_CVAL: - return timer->cntv_cval; - } - return (u64)-1; -} - -static int kvm_timer_starting_cpu(unsigned int cpu) -{ - kvm_timer_init_interrupt(NULL); - return 0; -} - -static int kvm_timer_dying_cpu(unsigned int cpu) -{ - disable_percpu_irq(host_vtimer_irq); - return 0; -} - -int kvm_timer_hyp_init(void) -{ - struct arch_timer_kvm_info *info; - int err; - - info = arch_timer_get_kvm_info(); - timecounter = &info->timecounter; - - if (info->virtual_irq <= 0) { - kvm_err("kvm_arch_timer: invalid virtual timer IRQ: %d\n", - info->virtual_irq); - return -ENODEV; - } - host_vtimer_irq = info->virtual_irq; - - host_vtimer_irq_flags = irq_get_trigger_type(host_vtimer_irq); - if (host_vtimer_irq_flags != IRQF_TRIGGER_HIGH && - host_vtimer_irq_flags != IRQF_TRIGGER_LOW) { - kvm_err("Invalid trigger for IRQ%d, assuming level low\n", - host_vtimer_irq); - host_vtimer_irq_flags = IRQF_TRIGGER_LOW; - } - - err = request_percpu_irq(host_vtimer_irq, kvm_arch_timer_handler, - "kvm guest timer", kvm_get_running_vcpus()); - if (err) { - kvm_err("kvm_arch_timer: can't request interrupt %d (%d)\n", - host_vtimer_irq, err); - return err; - } - - kvm_info("virtual timer IRQ%d\n", host_vtimer_irq); - - cpuhp_setup_state(CPUHP_AP_KVM_ARM_TIMER_STARTING, - "AP_KVM_ARM_TIMER_STARTING", kvm_timer_starting_cpu, - kvm_timer_dying_cpu); - return err; -} - -void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu) -{ - struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; - - timer_disarm(timer); - kvm_vgic_unmap_phys_irq(vcpu, timer->irq.irq); -} - -int kvm_timer_enable(struct kvm_vcpu *vcpu) -{ - struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; - struct irq_desc *desc; - struct irq_data *data; - int phys_irq; - int ret; - - if (timer->enabled) - return 0; - - /* - * Find the physical IRQ number corresponding to the host_vtimer_irq - */ - desc = irq_to_desc(host_vtimer_irq); - if (!desc) { - kvm_err("%s: no interrupt descriptor\n", __func__); - return -EINVAL; - } - - data = irq_desc_get_irq_data(desc); - while (data->parent_data) - data = data->parent_data; - - phys_irq = data->hwirq; - - /* - * Tell the VGIC that the virtual interrupt is tied to a - * physical interrupt. We do that once per VCPU. - */ - ret = kvm_vgic_map_phys_irq(vcpu, timer->irq.irq, phys_irq); - if (ret) - return ret; - - - /* - * There is a potential race here between VCPUs starting for the first - * time, which may be enabling the timer multiple times. That doesn't - * hurt though, because we're just setting a variable to the same - * variable that it already was. The important thing is that all - * VCPUs have the enabled variable set, before entering the guest, if - * the arch timers are enabled. - */ - if (timecounter) - timer->enabled = 1; - - return 0; -} - -void kvm_timer_init(struct kvm *kvm) -{ - kvm->arch.timer.cntvoff = kvm_phys_timer_read(); -} diff --git a/virt/kvm/arm/hyp/timer-sr.c b/virt/kvm/arm/hyp/timer-sr.c deleted file mode 100644 index 798866a..0000000 --- a/virt/kvm/arm/hyp/timer-sr.c +++ /dev/null @@ -1,68 +0,0 @@ -/* - * Copyright (C) 2012-2015 - ARM Ltd - * Author: Marc Zyngier <marc.zyngier@arm.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - -#include <clocksource/arm_arch_timer.h> -#include <linux/compiler.h> -#include <linux/kvm_host.h> - -#include <asm/kvm_hyp.h> - -/* vcpu is already in the HYP VA space */ -void __hyp_text __timer_save_state(struct kvm_vcpu *vcpu) -{ - struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; - u64 val; - - if (timer->enabled) { - timer->cntv_ctl = read_sysreg_el0(cntv_ctl); - timer->cntv_cval = read_sysreg_el0(cntv_cval); - } - - /* Disable the virtual timer */ - write_sysreg_el0(0, cntv_ctl); - - /* Allow physical timer/counter access for the host */ - val = read_sysreg(cnthctl_el2); - val |= CNTHCTL_EL1PCTEN | CNTHCTL_EL1PCEN; - write_sysreg(val, cnthctl_el2); - - /* Clear cntvoff for the host */ - write_sysreg(0, cntvoff_el2); -} - -void __hyp_text __timer_restore_state(struct kvm_vcpu *vcpu) -{ - struct kvm *kvm = kern_hyp_va(vcpu->kvm); - struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; - u64 val; - - /* - * Disallow physical timer access for the guest - * Physical counter access is allowed - */ - val = read_sysreg(cnthctl_el2); - val &= ~CNTHCTL_EL1PCEN; - val |= CNTHCTL_EL1PCTEN; - write_sysreg(val, cnthctl_el2); - - if (timer->enabled) { - write_sysreg(kvm->arch.timer.cntvoff, cntvoff_el2); - write_sysreg_el0(timer->cntv_cval, cntv_cval); - isb(); - write_sysreg_el0(timer->cntv_ctl, cntv_ctl); - } -} diff --git a/virt/kvm/arm/hyp/vgic-v2-sr.c b/virt/kvm/arm/hyp/vgic-v2-sr.c deleted file mode 100644 index c8aeb7b..0000000 --- a/virt/kvm/arm/hyp/vgic-v2-sr.c +++ /dev/null @@ -1,226 +0,0 @@ -/* - * Copyright (C) 2012-2015 - ARM Ltd - * Author: Marc Zyngier <marc.zyngier@arm.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - -#include <linux/compiler.h> -#include <linux/irqchip/arm-gic.h> -#include <linux/kvm_host.h> - -#include <asm/kvm_emulate.h> -#include <asm/kvm_hyp.h> - -static void __hyp_text save_maint_int_state(struct kvm_vcpu *vcpu, - void __iomem *base) -{ - struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; - int nr_lr = (kern_hyp_va(&kvm_vgic_global_state))->nr_lr; - u32 eisr0, eisr1; - int i; - bool expect_mi; - - expect_mi = !!(cpu_if->vgic_hcr & GICH_HCR_UIE); - - for (i = 0; i < nr_lr; i++) { - if (!(vcpu->arch.vgic_cpu.live_lrs & (1UL << i))) - continue; - - expect_mi |= (!(cpu_if->vgic_lr[i] & GICH_LR_HW) && - (cpu_if->vgic_lr[i] & GICH_LR_EOI)); - } - - if (expect_mi) { - cpu_if->vgic_misr = readl_relaxed(base + GICH_MISR); - - if (cpu_if->vgic_misr & GICH_MISR_EOI) { - eisr0 = readl_relaxed(base + GICH_EISR0); - if (unlikely(nr_lr > 32)) - eisr1 = readl_relaxed(base + GICH_EISR1); - else - eisr1 = 0; - } else { - eisr0 = eisr1 = 0; - } - } else { - cpu_if->vgic_misr = 0; - eisr0 = eisr1 = 0; - } - -#ifdef CONFIG_CPU_BIG_ENDIAN - cpu_if->vgic_eisr = ((u64)eisr0 << 32) | eisr1; -#else - cpu_if->vgic_eisr = ((u64)eisr1 << 32) | eisr0; -#endif -} - -static void __hyp_text save_elrsr(struct kvm_vcpu *vcpu, void __iomem *base) -{ - struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; - int nr_lr = (kern_hyp_va(&kvm_vgic_global_state))->nr_lr; - u32 elrsr0, elrsr1; - - elrsr0 = readl_relaxed(base + GICH_ELRSR0); - if (unlikely(nr_lr > 32)) - elrsr1 = readl_relaxed(base + GICH_ELRSR1); - else - elrsr1 = 0; - -#ifdef CONFIG_CPU_BIG_ENDIAN - cpu_if->vgic_elrsr = ((u64)elrsr0 << 32) | elrsr1; -#else - cpu_if->vgic_elrsr = ((u64)elrsr1 << 32) | elrsr0; -#endif -} - -static void __hyp_text save_lrs(struct kvm_vcpu *vcpu, void __iomem *base) -{ - struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; - int nr_lr = (kern_hyp_va(&kvm_vgic_global_state))->nr_lr; - int i; - - for (i = 0; i < nr_lr; i++) { - if (!(vcpu->arch.vgic_cpu.live_lrs & (1UL << i))) - continue; - - if (cpu_if->vgic_elrsr & (1UL << i)) - cpu_if->vgic_lr[i] &= ~GICH_LR_STATE; - else - cpu_if->vgic_lr[i] = readl_relaxed(base + GICH_LR0 + (i * 4)); - - writel_relaxed(0, base + GICH_LR0 + (i * 4)); - } -} - -/* vcpu is already in the HYP VA space */ -void __hyp_text __vgic_v2_save_state(struct kvm_vcpu *vcpu) -{ - struct kvm *kvm = kern_hyp_va(vcpu->kvm); - struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; - struct vgic_dist *vgic = &kvm->arch.vgic; - void __iomem *base = kern_hyp_va(vgic->vctrl_base); - - if (!base) - return; - - cpu_if->vgic_vmcr = readl_relaxed(base + GICH_VMCR); - - if (vcpu->arch.vgic_cpu.live_lrs) { - cpu_if->vgic_apr = readl_relaxed(base + GICH_APR); - - save_maint_int_state(vcpu, base); - save_elrsr(vcpu, base); - save_lrs(vcpu, base); - - writel_relaxed(0, base + GICH_HCR); - - vcpu->arch.vgic_cpu.live_lrs = 0; - } else { - cpu_if->vgic_eisr = 0; - cpu_if->vgic_elrsr = ~0UL; - cpu_if->vgic_misr = 0; - cpu_if->vgic_apr = 0; - } -} - -/* vcpu is already in the HYP VA space */ -void __hyp_text __vgic_v2_restore_state(struct kvm_vcpu *vcpu) -{ - struct kvm *kvm = kern_hyp_va(vcpu->kvm); - struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; - struct vgic_dist *vgic = &kvm->arch.vgic; - void __iomem *base = kern_hyp_va(vgic->vctrl_base); - int nr_lr = (kern_hyp_va(&kvm_vgic_global_state))->nr_lr; - int i; - u64 live_lrs = 0; - - if (!base) - return; - - - for (i = 0; i < nr_lr; i++) - if (cpu_if->vgic_lr[i] & GICH_LR_STATE) - live_lrs |= 1UL << i; - - if (live_lrs) { - writel_relaxed(cpu_if->vgic_hcr, base + GICH_HCR); - writel_relaxed(cpu_if->vgic_apr, base + GICH_APR); - for (i = 0; i < nr_lr; i++) { - if (!(live_lrs & (1UL << i))) - continue; - - writel_relaxed(cpu_if->vgic_lr[i], - base + GICH_LR0 + (i * 4)); - } - } - - writel_relaxed(cpu_if->vgic_vmcr, base + GICH_VMCR); - vcpu->arch.vgic_cpu.live_lrs = live_lrs; -} - -#ifdef CONFIG_ARM64 -/* - * __vgic_v2_perform_cpuif_access -- perform a GICV access on behalf of the - * guest. - * - * @vcpu: the offending vcpu - * - * Returns: - * 1: GICV access successfully performed - * 0: Not a GICV access - * -1: Illegal GICV access - */ -int __hyp_text __vgic_v2_perform_cpuif_access(struct kvm_vcpu *vcpu) -{ - struct kvm *kvm = kern_hyp_va(vcpu->kvm); - struct vgic_dist *vgic = &kvm->arch.vgic; - phys_addr_t fault_ipa; - void __iomem *addr; - int rd; - - /* Build the full address */ - fault_ipa = kvm_vcpu_get_fault_ipa(vcpu); - fault_ipa |= kvm_vcpu_get_hfar(vcpu) & GENMASK(11, 0); - - /* If not for GICV, move on */ - if (fault_ipa < vgic->vgic_cpu_base || - fault_ipa >= (vgic->vgic_cpu_base + KVM_VGIC_V2_CPU_SIZE)) - return 0; - - /* Reject anything but a 32bit access */ - if (kvm_vcpu_dabt_get_as(vcpu) != sizeof(u32)) - return -1; - - /* Not aligned? Don't bother */ - if (fault_ipa & 3) - return -1; - - rd = kvm_vcpu_dabt_get_rd(vcpu); - addr = kern_hyp_va((kern_hyp_va(&kvm_vgic_global_state))->vcpu_base_va); - addr += fault_ipa - vgic->vgic_cpu_base; - - if (kvm_vcpu_dabt_iswrite(vcpu)) { - u32 data = vcpu_data_guest_to_host(vcpu, - vcpu_get_reg(vcpu, rd), - sizeof(u32)); - writel_relaxed(data, addr); - } else { - u32 data = readl_relaxed(addr); - vcpu_set_reg(vcpu, rd, vcpu_data_host_to_guest(vcpu, data, - sizeof(u32))); - } - - return 1; -} -#endif diff --git a/virt/kvm/arm/hyp/vgic-v3-sr.c b/virt/kvm/arm/hyp/vgic-v3-sr.c deleted file mode 100644 index 3947095..0000000 --- a/virt/kvm/arm/hyp/vgic-v3-sr.c +++ /dev/null @@ -1,328 +0,0 @@ -/* - * Copyright (C) 2012-2015 - ARM Ltd - * Author: Marc Zyngier <marc.zyngier@arm.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - -#include <linux/compiler.h> -#include <linux/irqchip/arm-gic-v3.h> -#include <linux/kvm_host.h> - -#include <asm/kvm_hyp.h> - -#define vtr_to_max_lr_idx(v) ((v) & 0xf) -#define vtr_to_nr_pri_bits(v) (((u32)(v) >> 29) + 1) - -static u64 __hyp_text __gic_v3_get_lr(unsigned int lr) -{ - switch (lr & 0xf) { - case 0: - return read_gicreg(ICH_LR0_EL2); - case 1: - return read_gicreg(ICH_LR1_EL2); - case 2: - return read_gicreg(ICH_LR2_EL2); - case 3: - return read_gicreg(ICH_LR3_EL2); - case 4: - return read_gicreg(ICH_LR4_EL2); - case 5: - return read_gicreg(ICH_LR5_EL2); - case 6: - return read_gicreg(ICH_LR6_EL2); - case 7: - return read_gicreg(ICH_LR7_EL2); - case 8: - return read_gicreg(ICH_LR8_EL2); - case 9: - return read_gicreg(ICH_LR9_EL2); - case 10: - return read_gicreg(ICH_LR10_EL2); - case 11: - return read_gicreg(ICH_LR11_EL2); - case 12: - return read_gicreg(ICH_LR12_EL2); - case 13: - return read_gicreg(ICH_LR13_EL2); - case 14: - return read_gicreg(ICH_LR14_EL2); - case 15: - return read_gicreg(ICH_LR15_EL2); - } - - unreachable(); -} - -static void __hyp_text __gic_v3_set_lr(u64 val, int lr) -{ - switch (lr & 0xf) { - case 0: - write_gicreg(val, ICH_LR0_EL2); - break; - case 1: - write_gicreg(val, ICH_LR1_EL2); - break; - case 2: - write_gicreg(val, ICH_LR2_EL2); - break; - case 3: - write_gicreg(val, ICH_LR3_EL2); - break; - case 4: - write_gicreg(val, ICH_LR4_EL2); - break; - case 5: - write_gicreg(val, ICH_LR5_EL2); - break; - case 6: - write_gicreg(val, ICH_LR6_EL2); - break; - case 7: - write_gicreg(val, ICH_LR7_EL2); - break; - case 8: - write_gicreg(val, ICH_LR8_EL2); - break; - case 9: - write_gicreg(val, ICH_LR9_EL2); - break; - case 10: - write_gicreg(val, ICH_LR10_EL2); - break; - case 11: - write_gicreg(val, ICH_LR11_EL2); - break; - case 12: - write_gicreg(val, ICH_LR12_EL2); - break; - case 13: - write_gicreg(val, ICH_LR13_EL2); - break; - case 14: - write_gicreg(val, ICH_LR14_EL2); - break; - case 15: - write_gicreg(val, ICH_LR15_EL2); - break; - } -} - -static void __hyp_text save_maint_int_state(struct kvm_vcpu *vcpu, int nr_lr) -{ - struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; - int i; - bool expect_mi; - - expect_mi = !!(cpu_if->vgic_hcr & ICH_HCR_UIE); - - for (i = 0; i < nr_lr; i++) { - if (!(vcpu->arch.vgic_cpu.live_lrs & (1UL << i))) - continue; - - expect_mi |= (!(cpu_if->vgic_lr[i] & ICH_LR_HW) && - (cpu_if->vgic_lr[i] & ICH_LR_EOI)); - } - - if (expect_mi) { - cpu_if->vgic_misr = read_gicreg(ICH_MISR_EL2); - - if (cpu_if->vgic_misr & ICH_MISR_EOI) - cpu_if->vgic_eisr = read_gicreg(ICH_EISR_EL2); - else - cpu_if->vgic_eisr = 0; - } else { - cpu_if->vgic_misr = 0; - cpu_if->vgic_eisr = 0; - } -} - -void __hyp_text __vgic_v3_save_state(struct kvm_vcpu *vcpu) -{ - struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; - u64 val; - - /* - * Make sure stores to the GIC via the memory mapped interface - * are now visible to the system register interface. - */ - if (!cpu_if->vgic_sre) - dsb(st); - - cpu_if->vgic_vmcr = read_gicreg(ICH_VMCR_EL2); - - if (vcpu->arch.vgic_cpu.live_lrs) { - int i; - u32 max_lr_idx, nr_pri_bits; - - cpu_if->vgic_elrsr = read_gicreg(ICH_ELSR_EL2); - - write_gicreg(0, ICH_HCR_EL2); - val = read_gicreg(ICH_VTR_EL2); - max_lr_idx = vtr_to_max_lr_idx(val); - nr_pri_bits = vtr_to_nr_pri_bits(val); - - save_maint_int_state(vcpu, max_lr_idx + 1); - - for (i = 0; i <= max_lr_idx; i++) { - if (!(vcpu->arch.vgic_cpu.live_lrs & (1UL << i))) - continue; - - if (cpu_if->vgic_elrsr & (1 << i)) - cpu_if->vgic_lr[i] &= ~ICH_LR_STATE; - else - cpu_if->vgic_lr[i] = __gic_v3_get_lr(i); - - __gic_v3_set_lr(0, i); - } - - switch (nr_pri_bits) { - case 7: - cpu_if->vgic_ap0r[3] = read_gicreg(ICH_AP0R3_EL2); - cpu_if->vgic_ap0r[2] = read_gicreg(ICH_AP0R2_EL2); - case 6: - cpu_if->vgic_ap0r[1] = read_gicreg(ICH_AP0R1_EL2); - default: - cpu_if->vgic_ap0r[0] = read_gicreg(ICH_AP0R0_EL2); - } - - switch (nr_pri_bits) { - case 7: - cpu_if->vgic_ap1r[3] = read_gicreg(ICH_AP1R3_EL2); - cpu_if->vgic_ap1r[2] = read_gicreg(ICH_AP1R2_EL2); - case 6: - cpu_if->vgic_ap1r[1] = read_gicreg(ICH_AP1R1_EL2); - default: - cpu_if->vgic_ap1r[0] = read_gicreg(ICH_AP1R0_EL2); - } - - vcpu->arch.vgic_cpu.live_lrs = 0; - } else { - cpu_if->vgic_misr = 0; - cpu_if->vgic_eisr = 0; - cpu_if->vgic_elrsr = 0xffff; - cpu_if->vgic_ap0r[0] = 0; - cpu_if->vgic_ap0r[1] = 0; - cpu_if->vgic_ap0r[2] = 0; - cpu_if->vgic_ap0r[3] = 0; - cpu_if->vgic_ap1r[0] = 0; - cpu_if->vgic_ap1r[1] = 0; - cpu_if->vgic_ap1r[2] = 0; - cpu_if->vgic_ap1r[3] = 0; - } - - val = read_gicreg(ICC_SRE_EL2); - write_gicreg(val | ICC_SRE_EL2_ENABLE, ICC_SRE_EL2); - - if (!cpu_if->vgic_sre) { - /* Make sure ENABLE is set at EL2 before setting SRE at EL1 */ - isb(); - write_gicreg(1, ICC_SRE_EL1); - } -} - -void __hyp_text __vgic_v3_restore_state(struct kvm_vcpu *vcpu) -{ - struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; - u64 val; - u32 max_lr_idx, nr_pri_bits; - u16 live_lrs = 0; - int i; - - /* - * VFIQEn is RES1 if ICC_SRE_EL1.SRE is 1. This causes a - * Group0 interrupt (as generated in GICv2 mode) to be - * delivered as a FIQ to the guest, with potentially fatal - * consequences. So we must make sure that ICC_SRE_EL1 has - * been actually programmed with the value we want before - * starting to mess with the rest of the GIC. - */ - if (!cpu_if->vgic_sre) { - write_gicreg(0, ICC_SRE_EL1); - isb(); - } - - val = read_gicreg(ICH_VTR_EL2); - max_lr_idx = vtr_to_max_lr_idx(val); - nr_pri_bits = vtr_to_nr_pri_bits(val); - - for (i = 0; i <= max_lr_idx; i++) { - if (cpu_if->vgic_lr[i] & ICH_LR_STATE) - live_lrs |= (1 << i); - } - - write_gicreg(cpu_if->vgic_vmcr, ICH_VMCR_EL2); - - if (live_lrs) { - write_gicreg(cpu_if->vgic_hcr, ICH_HCR_EL2); - - switch (nr_pri_bits) { - case 7: - write_gicreg(cpu_if->vgic_ap0r[3], ICH_AP0R3_EL2); - write_gicreg(cpu_if->vgic_ap0r[2], ICH_AP0R2_EL2); - case 6: - write_gicreg(cpu_if->vgic_ap0r[1], ICH_AP0R1_EL2); - default: - write_gicreg(cpu_if->vgic_ap0r[0], ICH_AP0R0_EL2); - } - - switch (nr_pri_bits) { - case 7: - write_gicreg(cpu_if->vgic_ap1r[3], ICH_AP1R3_EL2); - write_gicreg(cpu_if->vgic_ap1r[2], ICH_AP1R2_EL2); - case 6: - write_gicreg(cpu_if->vgic_ap1r[1], ICH_AP1R1_EL2); - default: - write_gicreg(cpu_if->vgic_ap1r[0], ICH_AP1R0_EL2); - } - - for (i = 0; i <= max_lr_idx; i++) { - if (!(live_lrs & (1 << i))) - continue; - - __gic_v3_set_lr(cpu_if->vgic_lr[i], i); - } - } - - /* - * Ensures that the above will have reached the - * (re)distributors. This ensure the guest will read the - * correct values from the memory-mapped interface. - */ - if (!cpu_if->vgic_sre) { - isb(); - dsb(sy); - } - vcpu->arch.vgic_cpu.live_lrs = live_lrs; - - /* - * Prevent the guest from touching the GIC system registers if - * SRE isn't enabled for GICv3 emulation. - */ - write_gicreg(read_gicreg(ICC_SRE_EL2) & ~ICC_SRE_EL2_ENABLE, - ICC_SRE_EL2); -} - -void __hyp_text __vgic_v3_init_lrs(void) -{ - int max_lr_idx = vtr_to_max_lr_idx(read_gicreg(ICH_VTR_EL2)); - int i; - - for (i = 0; i <= max_lr_idx; i++) - __gic_v3_set_lr(0, i); -} - -u64 __hyp_text __vgic_v3_get_ich_vtr_el2(void) -{ - return read_gicreg(ICH_VTR_EL2); -} diff --git a/virt/kvm/arm/pmu.c b/virt/kvm/arm/pmu.c deleted file mode 100644 index 69ccce3..0000000 --- a/virt/kvm/arm/pmu.c +++ /dev/null @@ -1,543 +0,0 @@ -/* - * Copyright (C) 2015 Linaro Ltd. - * Author: Shannon Zhao <shannon.zhao@linaro.org> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - -#include <linux/cpu.h> -#include <linux/kvm.h> -#include <linux/kvm_host.h> -#include <linux/perf_event.h> -#include <linux/uaccess.h> -#include <asm/kvm_emulate.h> -#include <kvm/arm_pmu.h> -#include <kvm/arm_vgic.h> - -/** - * kvm_pmu_get_counter_value - get PMU counter value - * @vcpu: The vcpu pointer - * @select_idx: The counter index - */ -u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx) -{ - u64 counter, reg, enabled, running; - struct kvm_pmu *pmu = &vcpu->arch.pmu; - struct kvm_pmc *pmc = &pmu->pmc[select_idx]; - - reg = (select_idx == ARMV8_PMU_CYCLE_IDX) - ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + select_idx; - counter = vcpu_sys_reg(vcpu, reg); - - /* The real counter value is equal to the value of counter register plus - * the value perf event counts. - */ - if (pmc->perf_event) - counter += perf_event_read_value(pmc->perf_event, &enabled, - &running); - - return counter & pmc->bitmask; -} - -/** - * kvm_pmu_set_counter_value - set PMU counter value - * @vcpu: The vcpu pointer - * @select_idx: The counter index - * @val: The counter value - */ -void kvm_pmu_set_counter_value(struct kvm_vcpu *vcpu, u64 select_idx, u64 val) -{ - u64 reg; - - reg = (select_idx == ARMV8_PMU_CYCLE_IDX) - ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + select_idx; - vcpu_sys_reg(vcpu, reg) += (s64)val - kvm_pmu_get_counter_value(vcpu, select_idx); -} - -/** - * kvm_pmu_stop_counter - stop PMU counter - * @pmc: The PMU counter pointer - * - * If this counter has been configured to monitor some event, release it here. - */ -static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, struct kvm_pmc *pmc) -{ - u64 counter, reg; - - if (pmc->perf_event) { - counter = kvm_pmu_get_counter_value(vcpu, pmc->idx); - reg = (pmc->idx == ARMV8_PMU_CYCLE_IDX) - ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + pmc->idx; - vcpu_sys_reg(vcpu, reg) = counter; - perf_event_disable(pmc->perf_event); - perf_event_release_kernel(pmc->perf_event); - pmc->perf_event = NULL; - } -} - -/** - * kvm_pmu_vcpu_reset - reset pmu state for cpu - * @vcpu: The vcpu pointer - * - */ -void kvm_pmu_vcpu_reset(struct kvm_vcpu *vcpu) -{ - int i; - struct kvm_pmu *pmu = &vcpu->arch.pmu; - - for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) { - kvm_pmu_stop_counter(vcpu, &pmu->pmc[i]); - pmu->pmc[i].idx = i; - pmu->pmc[i].bitmask = 0xffffffffUL; - } -} - -/** - * kvm_pmu_vcpu_destroy - free perf event of PMU for cpu - * @vcpu: The vcpu pointer - * - */ -void kvm_pmu_vcpu_destroy(struct kvm_vcpu *vcpu) -{ - int i; - struct kvm_pmu *pmu = &vcpu->arch.pmu; - - for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) { - struct kvm_pmc *pmc = &pmu->pmc[i]; - - if (pmc->perf_event) { - perf_event_disable(pmc->perf_event); - perf_event_release_kernel(pmc->perf_event); - pmc->perf_event = NULL; - } - } -} - -u64 kvm_pmu_valid_counter_mask(struct kvm_vcpu *vcpu) -{ - u64 val = vcpu_sys_reg(vcpu, PMCR_EL0) >> ARMV8_PMU_PMCR_N_SHIFT; - - val &= ARMV8_PMU_PMCR_N_MASK; - if (val == 0) - return BIT(ARMV8_PMU_CYCLE_IDX); - else - return GENMASK(val - 1, 0) | BIT(ARMV8_PMU_CYCLE_IDX); -} - -/** - * kvm_pmu_enable_counter - enable selected PMU counter - * @vcpu: The vcpu pointer - * @val: the value guest writes to PMCNTENSET register - * - * Call perf_event_enable to start counting the perf event - */ -void kvm_pmu_enable_counter(struct kvm_vcpu *vcpu, u64 val) -{ - int i; - struct kvm_pmu *pmu = &vcpu->arch.pmu; - struct kvm_pmc *pmc; - - if (!(vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E) || !val) - return; - - for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) { - if (!(val & BIT(i))) - continue; - - pmc = &pmu->pmc[i]; - if (pmc->perf_event) { - perf_event_enable(pmc->perf_event); - if (pmc->perf_event->state != PERF_EVENT_STATE_ACTIVE) - kvm_debug("fail to enable perf event\n"); - } - } -} - -/** - * kvm_pmu_disable_counter - disable selected PMU counter - * @vcpu: The vcpu pointer - * @val: the value guest writes to PMCNTENCLR register - * - * Call perf_event_disable to stop counting the perf event - */ -void kvm_pmu_disable_counter(struct kvm_vcpu *vcpu, u64 val) -{ - int i; - struct kvm_pmu *pmu = &vcpu->arch.pmu; - struct kvm_pmc *pmc; - - if (!val) - return; - - for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) { - if (!(val & BIT(i))) - continue; - - pmc = &pmu->pmc[i]; - if (pmc->perf_event) - perf_event_disable(pmc->perf_event); - } -} - -static u64 kvm_pmu_overflow_status(struct kvm_vcpu *vcpu) -{ - u64 reg = 0; - - if ((vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E)) { - reg = vcpu_sys_reg(vcpu, PMOVSSET_EL0); - reg &= vcpu_sys_reg(vcpu, PMCNTENSET_EL0); - reg &= vcpu_sys_reg(vcpu, PMINTENSET_EL1); - reg &= kvm_pmu_valid_counter_mask(vcpu); - } - - return reg; -} - -/** - * kvm_pmu_overflow_set - set PMU overflow interrupt - * @vcpu: The vcpu pointer - * @val: the value guest writes to PMOVSSET register - */ -void kvm_pmu_overflow_set(struct kvm_vcpu *vcpu, u64 val) -{ - u64 reg; - - if (val == 0) - return; - - vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= val; - reg = kvm_pmu_overflow_status(vcpu); - if (reg != 0) - kvm_vcpu_kick(vcpu); -} - -static void kvm_pmu_update_state(struct kvm_vcpu *vcpu) -{ - struct kvm_pmu *pmu = &vcpu->arch.pmu; - bool overflow; - - if (!kvm_arm_pmu_v3_ready(vcpu)) - return; - - overflow = !!kvm_pmu_overflow_status(vcpu); - if (pmu->irq_level != overflow) { - pmu->irq_level = overflow; - kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id, - pmu->irq_num, overflow); - } -} - -/** - * kvm_pmu_flush_hwstate - flush pmu state to cpu - * @vcpu: The vcpu pointer - * - * Check if the PMU has overflowed while we were running in the host, and inject - * an interrupt if that was the case. - */ -void kvm_pmu_flush_hwstate(struct kvm_vcpu *vcpu) -{ - kvm_pmu_update_state(vcpu); -} - -/** - * kvm_pmu_sync_hwstate - sync pmu state from cpu - * @vcpu: The vcpu pointer - * - * Check if the PMU has overflowed while we were running in the guest, and - * inject an interrupt if that was the case. - */ -void kvm_pmu_sync_hwstate(struct kvm_vcpu *vcpu) -{ - kvm_pmu_update_state(vcpu); -} - -static inline struct kvm_vcpu *kvm_pmc_to_vcpu(struct kvm_pmc *pmc) -{ - struct kvm_pmu *pmu; - struct kvm_vcpu_arch *vcpu_arch; - - pmc -= pmc->idx; - pmu = container_of(pmc, struct kvm_pmu, pmc[0]); - vcpu_arch = container_of(pmu, struct kvm_vcpu_arch, pmu); - return container_of(vcpu_arch, struct kvm_vcpu, arch); -} - -/** - * When perf event overflows, call kvm_pmu_overflow_set to set overflow status. - */ -static void kvm_pmu_perf_overflow(struct perf_event *perf_event, - struct perf_sample_data *data, - struct pt_regs *regs) -{ - struct kvm_pmc *pmc = perf_event->overflow_handler_context; - struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc); - int idx = pmc->idx; - - kvm_pmu_overflow_set(vcpu, BIT(idx)); -} - -/** - * kvm_pmu_software_increment - do software increment - * @vcpu: The vcpu pointer - * @val: the value guest writes to PMSWINC register - */ -void kvm_pmu_software_increment(struct kvm_vcpu *vcpu, u64 val) -{ - int i; - u64 type, enable, reg; - - if (val == 0) - return; - - enable = vcpu_sys_reg(vcpu, PMCNTENSET_EL0); - for (i = 0; i < ARMV8_PMU_CYCLE_IDX; i++) { - if (!(val & BIT(i))) - continue; - type = vcpu_sys_reg(vcpu, PMEVTYPER0_EL0 + i) - & ARMV8_PMU_EVTYPE_EVENT; - if ((type == ARMV8_PMUV3_PERFCTR_SW_INCR) - && (enable & BIT(i))) { - reg = vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) + 1; - reg = lower_32_bits(reg); - vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) = reg; - if (!reg) - kvm_pmu_overflow_set(vcpu, BIT(i)); - } - } -} - -/** - * kvm_pmu_handle_pmcr - handle PMCR register - * @vcpu: The vcpu pointer - * @val: the value guest writes to PMCR register - */ -void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val) -{ - struct kvm_pmu *pmu = &vcpu->arch.pmu; - struct kvm_pmc *pmc; - u64 mask; - int i; - - mask = kvm_pmu_valid_counter_mask(vcpu); - if (val & ARMV8_PMU_PMCR_E) { - kvm_pmu_enable_counter(vcpu, - vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & mask); - } else { - kvm_pmu_disable_counter(vcpu, mask); - } - - if (val & ARMV8_PMU_PMCR_C) - kvm_pmu_set_counter_value(vcpu, ARMV8_PMU_CYCLE_IDX, 0); - - if (val & ARMV8_PMU_PMCR_P) { - for (i = 0; i < ARMV8_PMU_CYCLE_IDX; i++) - kvm_pmu_set_counter_value(vcpu, i, 0); - } - - if (val & ARMV8_PMU_PMCR_LC) { - pmc = &pmu->pmc[ARMV8_PMU_CYCLE_IDX]; - pmc->bitmask = 0xffffffffffffffffUL; - } -} - -static bool kvm_pmu_counter_is_enabled(struct kvm_vcpu *vcpu, u64 select_idx) -{ - return (vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E) && - (vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & BIT(select_idx)); -} - -/** - * kvm_pmu_set_counter_event_type - set selected counter to monitor some event - * @vcpu: The vcpu pointer - * @data: The data guest writes to PMXEVTYPER_EL0 - * @select_idx: The number of selected counter - * - * When OS accesses PMXEVTYPER_EL0, that means it wants to set a PMC to count an - * event with given hardware event number. Here we call perf_event API to - * emulate this action and create a kernel perf event for it. - */ -void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data, - u64 select_idx) -{ - struct kvm_pmu *pmu = &vcpu->arch.pmu; - struct kvm_pmc *pmc = &pmu->pmc[select_idx]; - struct perf_event *event; - struct perf_event_attr attr; - u64 eventsel, counter; - - kvm_pmu_stop_counter(vcpu, pmc); - eventsel = data & ARMV8_PMU_EVTYPE_EVENT; - - /* Software increment event does't need to be backed by a perf event */ - if (eventsel == ARMV8_PMUV3_PERFCTR_SW_INCR && - select_idx != ARMV8_PMU_CYCLE_IDX) - return; - - memset(&attr, 0, sizeof(struct perf_event_attr)); - attr.type = PERF_TYPE_RAW; - attr.size = sizeof(attr); - attr.pinned = 1; - attr.disabled = !kvm_pmu_counter_is_enabled(vcpu, select_idx); - attr.exclude_user = data & ARMV8_PMU_EXCLUDE_EL0 ? 1 : 0; - attr.exclude_kernel = data & ARMV8_PMU_EXCLUDE_EL1 ? 1 : 0; - attr.exclude_hv = 1; /* Don't count EL2 events */ - attr.exclude_host = 1; /* Don't count host events */ - attr.config = (select_idx == ARMV8_PMU_CYCLE_IDX) ? - ARMV8_PMUV3_PERFCTR_CPU_CYCLES : eventsel; - - counter = kvm_pmu_get_counter_value(vcpu, select_idx); - /* The initial sample period (overflow count) of an event. */ - attr.sample_period = (-counter) & pmc->bitmask; - - event = perf_event_create_kernel_counter(&attr, -1, current, - kvm_pmu_perf_overflow, pmc); - if (IS_ERR(event)) { - pr_err_once("kvm: pmu event creation failed %ld\n", - PTR_ERR(event)); - return; - } - - pmc->perf_event = event; -} - -bool kvm_arm_support_pmu_v3(void) -{ - /* - * Check if HW_PERF_EVENTS are supported by checking the number of - * hardware performance counters. This could ensure the presence of - * a physical PMU and CONFIG_PERF_EVENT is selected. - */ - return (perf_num_counters() > 0); -} - -static int kvm_arm_pmu_v3_init(struct kvm_vcpu *vcpu) -{ - if (!kvm_arm_support_pmu_v3()) - return -ENODEV; - - /* - * We currently require an in-kernel VGIC to use the PMU emulation, - * because we do not support forwarding PMU overflow interrupts to - * userspace yet. - */ - if (!irqchip_in_kernel(vcpu->kvm) || !vgic_initialized(vcpu->kvm)) - return -ENODEV; - - if (!test_bit(KVM_ARM_VCPU_PMU_V3, vcpu->arch.features) || - !kvm_arm_pmu_irq_initialized(vcpu)) - return -ENXIO; - - if (kvm_arm_pmu_v3_ready(vcpu)) - return -EBUSY; - - kvm_pmu_vcpu_reset(vcpu); - vcpu->arch.pmu.ready = true; - - return 0; -} - -#define irq_is_ppi(irq) ((irq) >= VGIC_NR_SGIS && (irq) < VGIC_NR_PRIVATE_IRQS) - -/* - * For one VM the interrupt type must be same for each vcpu. - * As a PPI, the interrupt number is the same for all vcpus, - * while as an SPI it must be a separate number per vcpu. - */ -static bool pmu_irq_is_valid(struct kvm *kvm, int irq) -{ - int i; - struct kvm_vcpu *vcpu; - - kvm_for_each_vcpu(i, vcpu, kvm) { - if (!kvm_arm_pmu_irq_initialized(vcpu)) - continue; - - if (irq_is_ppi(irq)) { - if (vcpu->arch.pmu.irq_num != irq) - return false; - } else { - if (vcpu->arch.pmu.irq_num == irq) - return false; - } - } - - return true; -} - -int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) -{ - switch (attr->attr) { - case KVM_ARM_VCPU_PMU_V3_IRQ: { - int __user *uaddr = (int __user *)(long)attr->addr; - int irq; - - if (!test_bit(KVM_ARM_VCPU_PMU_V3, vcpu->arch.features)) - return -ENODEV; - - if (get_user(irq, uaddr)) - return -EFAULT; - - /* The PMU overflow interrupt can be a PPI or a valid SPI. */ - if (!(irq_is_ppi(irq) || vgic_valid_spi(vcpu->kvm, irq))) - return -EINVAL; - - if (!pmu_irq_is_valid(vcpu->kvm, irq)) - return -EINVAL; - - if (kvm_arm_pmu_irq_initialized(vcpu)) - return -EBUSY; - - kvm_debug("Set kvm ARM PMU irq: %d\n", irq); - vcpu->arch.pmu.irq_num = irq; - return 0; - } - case KVM_ARM_VCPU_PMU_V3_INIT: - return kvm_arm_pmu_v3_init(vcpu); - } - - return -ENXIO; -} - -int kvm_arm_pmu_v3_get_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) -{ - switch (attr->attr) { - case KVM_ARM_VCPU_PMU_V3_IRQ: { - int __user *uaddr = (int __user *)(long)attr->addr; - int irq; - - if (!test_bit(KVM_ARM_VCPU_PMU_V3, vcpu->arch.features)) - return -ENODEV; - - if (!kvm_arm_pmu_irq_initialized(vcpu)) - return -ENXIO; - - irq = vcpu->arch.pmu.irq_num; - return put_user(irq, uaddr); - } - } - - return -ENXIO; -} - -int kvm_arm_pmu_v3_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) -{ - switch (attr->attr) { - case KVM_ARM_VCPU_PMU_V3_IRQ: - case KVM_ARM_VCPU_PMU_V3_INIT: - if (kvm_arm_support_pmu_v3() && - test_bit(KVM_ARM_VCPU_PMU_V3, vcpu->arch.features)) - return 0; - } - - return -ENXIO; -} diff --git a/virt/kvm/arm/trace.h b/virt/kvm/arm/trace.h deleted file mode 100644 index 37d8b98..0000000 --- a/virt/kvm/arm/trace.h +++ /dev/null @@ -1,63 +0,0 @@ -#if !defined(_TRACE_KVM_H) || defined(TRACE_HEADER_MULTI_READ) -#define _TRACE_KVM_H - -#include <linux/tracepoint.h> - -#undef TRACE_SYSTEM -#define TRACE_SYSTEM kvm - -/* - * Tracepoints for vgic - */ -TRACE_EVENT(vgic_update_irq_pending, - TP_PROTO(unsigned long vcpu_id, __u32 irq, bool level), - TP_ARGS(vcpu_id, irq, level), - - TP_STRUCT__entry( - __field( unsigned long, vcpu_id ) - __field( __u32, irq ) - __field( bool, level ) - ), - - TP_fast_assign( - __entry->vcpu_id = vcpu_id; - __entry->irq = irq; - __entry->level = level; - ), - - TP_printk("VCPU: %ld, IRQ %d, level: %d", - __entry->vcpu_id, __entry->irq, __entry->level) -); - -/* - * Tracepoints for arch_timer - */ -TRACE_EVENT(kvm_timer_update_irq, - TP_PROTO(unsigned long vcpu_id, __u32 irq, int level), - TP_ARGS(vcpu_id, irq, level), - - TP_STRUCT__entry( - __field( unsigned long, vcpu_id ) - __field( __u32, irq ) - __field( int, level ) - ), - - TP_fast_assign( - __entry->vcpu_id = vcpu_id; - __entry->irq = irq; - __entry->level = level; - ), - - TP_printk("VCPU: %ld, IRQ %d, level %d", - __entry->vcpu_id, __entry->irq, __entry->level) -); - -#endif /* _TRACE_KVM_H */ - -#undef TRACE_INCLUDE_PATH -#define TRACE_INCLUDE_PATH ../../../virt/kvm/arm -#undef TRACE_INCLUDE_FILE -#define TRACE_INCLUDE_FILE trace - -/* This part must be outside protection */ -#include <trace/define_trace.h> diff --git a/virt/kvm/arm/vgic/vgic-init.c b/virt/kvm/arm/vgic/vgic-init.c deleted file mode 100644 index 8cebfbc..0000000 --- a/virt/kvm/arm/vgic/vgic-init.c +++ /dev/null @@ -1,445 +0,0 @@ -/* - * Copyright (C) 2015, 2016 ARM Ltd. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - -#include <linux/uaccess.h> -#include <linux/interrupt.h> -#include <linux/cpu.h> -#include <linux/kvm_host.h> -#include <kvm/arm_vgic.h> -#include <asm/kvm_mmu.h> -#include "vgic.h" - -/* - * Initialization rules: there are multiple stages to the vgic - * initialization, both for the distributor and the CPU interfaces. - * - * Distributor: - * - * - kvm_vgic_early_init(): initialization of static data that doesn't - * depend on any sizing information or emulation type. No allocation - * is allowed there. - * - * - vgic_init(): allocation and initialization of the generic data - * structures that depend on sizing information (number of CPUs, - * number of interrupts). Also initializes the vcpu specific data - * structures. Can be executed lazily for GICv2. - * - * CPU Interface: - * - * - kvm_vgic_cpu_early_init(): initialization of static data that - * doesn't depend on any sizing information or emulation type. No - * allocation is allowed there. - */ - -/* EARLY INIT */ - -/* - * Those 2 functions should not be needed anymore but they - * still are called from arm.c - */ -void kvm_vgic_early_init(struct kvm *kvm) -{ -} - -void kvm_vgic_vcpu_early_init(struct kvm_vcpu *vcpu) -{ -} - -/* CREATION */ - -/** - * kvm_vgic_create: triggered by the instantiation of the VGIC device by - * user space, either through the legacy KVM_CREATE_IRQCHIP ioctl (v2 only) - * or through the generic KVM_CREATE_DEVICE API ioctl. - * irqchip_in_kernel() tells you if this function succeeded or not. - * @kvm: kvm struct pointer - * @type: KVM_DEV_TYPE_ARM_VGIC_V[23] - */ -int kvm_vgic_create(struct kvm *kvm, u32 type) -{ - int i, vcpu_lock_idx = -1, ret; - struct kvm_vcpu *vcpu; - - if (irqchip_in_kernel(kvm)) - return -EEXIST; - - /* - * This function is also called by the KVM_CREATE_IRQCHIP handler, - * which had no chance yet to check the availability of the GICv2 - * emulation. So check this here again. KVM_CREATE_DEVICE does - * the proper checks already. - */ - if (type == KVM_DEV_TYPE_ARM_VGIC_V2 && - !kvm_vgic_global_state.can_emulate_gicv2) - return -ENODEV; - - /* - * Any time a vcpu is run, vcpu_load is called which tries to grab the - * vcpu->mutex. By grabbing the vcpu->mutex of all VCPUs we ensure - * that no other VCPUs are run while we create the vgic. - */ - ret = -EBUSY; - kvm_for_each_vcpu(i, vcpu, kvm) { - if (!mutex_trylock(&vcpu->mutex)) - goto out_unlock; - vcpu_lock_idx = i; - } - - kvm_for_each_vcpu(i, vcpu, kvm) { - if (vcpu->arch.has_run_once) - goto out_unlock; - } - ret = 0; - - if (type == KVM_DEV_TYPE_ARM_VGIC_V2) - kvm->arch.max_vcpus = VGIC_V2_MAX_CPUS; - else - kvm->arch.max_vcpus = VGIC_V3_MAX_CPUS; - - if (atomic_read(&kvm->online_vcpus) > kvm->arch.max_vcpus) { - ret = -E2BIG; - goto out_unlock; - } - - kvm->arch.vgic.in_kernel = true; - kvm->arch.vgic.vgic_model = type; - - /* - * kvm_vgic_global_state.vctrl_base is set on vgic probe (kvm_arch_init) - * it is stored in distributor struct for asm save/restore purpose - */ - kvm->arch.vgic.vctrl_base = kvm_vgic_global_state.vctrl_base; - - kvm->arch.vgic.vgic_dist_base = VGIC_ADDR_UNDEF; - kvm->arch.vgic.vgic_cpu_base = VGIC_ADDR_UNDEF; - kvm->arch.vgic.vgic_redist_base = VGIC_ADDR_UNDEF; - -out_unlock: - for (; vcpu_lock_idx >= 0; vcpu_lock_idx--) { - vcpu = kvm_get_vcpu(kvm, vcpu_lock_idx); - mutex_unlock(&vcpu->mutex); - } - return ret; -} - -/* INIT/DESTROY */ - -/** - * kvm_vgic_dist_init: initialize the dist data structures - * @kvm: kvm struct pointer - * @nr_spis: number of spis, frozen by caller - */ -static int kvm_vgic_dist_init(struct kvm *kvm, unsigned int nr_spis) -{ - struct vgic_dist *dist = &kvm->arch.vgic; - struct kvm_vcpu *vcpu0 = kvm_get_vcpu(kvm, 0); - int i; - - INIT_LIST_HEAD(&dist->lpi_list_head); - spin_lock_init(&dist->lpi_list_lock); - - dist->spis = kcalloc(nr_spis, sizeof(struct vgic_irq), GFP_KERNEL); - if (!dist->spis) - return -ENOMEM; - - /* - * In the following code we do not take the irq struct lock since - * no other action on irq structs can happen while the VGIC is - * not initialized yet: - * If someone wants to inject an interrupt or does a MMIO access, we - * require prior initialization in case of a virtual GICv3 or trigger - * initialization when using a virtual GICv2. - */ - for (i = 0; i < nr_spis; i++) { - struct vgic_irq *irq = &dist->spis[i]; - - irq->intid = i + VGIC_NR_PRIVATE_IRQS; - INIT_LIST_HEAD(&irq->ap_list); - spin_lock_init(&irq->irq_lock); - irq->vcpu = NULL; - irq->target_vcpu = vcpu0; - kref_init(&irq->refcount); - if (dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V2) - irq->targets = 0; - else - irq->mpidr = 0; - } - return 0; -} - -/** - * kvm_vgic_vcpu_init: initialize the vcpu data structures and - * enable the VCPU interface - * @vcpu: the VCPU which's VGIC should be initialized - */ -static void kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu) -{ - struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; - int i; - - INIT_LIST_HEAD(&vgic_cpu->ap_list_head); - spin_lock_init(&vgic_cpu->ap_list_lock); - - /* - * Enable and configure all SGIs to be edge-triggered and - * configure all PPIs as level-triggered. - */ - for (i = 0; i < VGIC_NR_PRIVATE_IRQS; i++) { - struct vgic_irq *irq = &vgic_cpu->private_irqs[i]; - - INIT_LIST_HEAD(&irq->ap_list); - spin_lock_init(&irq->irq_lock); - irq->intid = i; - irq->vcpu = NULL; - irq->target_vcpu = vcpu; - irq->targets = 1U << vcpu->vcpu_id; - kref_init(&irq->refcount); - if (vgic_irq_is_sgi(i)) { - /* SGIs */ - irq->enabled = 1; - irq->config = VGIC_CONFIG_EDGE; - } else { - /* PPIs */ - irq->config = VGIC_CONFIG_LEVEL; - } - } - if (kvm_vgic_global_state.type == VGIC_V2) - vgic_v2_enable(vcpu); - else - vgic_v3_enable(vcpu); -} - -/* - * vgic_init: allocates and initializes dist and vcpu data structures - * depending on two dimensioning parameters: - * - the number of spis - * - the number of vcpus - * The function is generally called when nr_spis has been explicitly set - * by the guest through the KVM DEVICE API. If not nr_spis is set to 256. - * vgic_initialized() returns true when this function has succeeded. - * Must be called with kvm->lock held! - */ -int vgic_init(struct kvm *kvm) -{ - struct vgic_dist *dist = &kvm->arch.vgic; - struct kvm_vcpu *vcpu; - int ret = 0, i; - - if (vgic_initialized(kvm)) - return 0; - - /* freeze the number of spis */ - if (!dist->nr_spis) - dist->nr_spis = VGIC_NR_IRQS_LEGACY - VGIC_NR_PRIVATE_IRQS; - - ret = kvm_vgic_dist_init(kvm, dist->nr_spis); - if (ret) - goto out; - - if (vgic_has_its(kvm)) - dist->msis_require_devid = true; - - kvm_for_each_vcpu(i, vcpu, kvm) - kvm_vgic_vcpu_init(vcpu); - - ret = kvm_vgic_setup_default_irq_routing(kvm); - if (ret) - goto out; - - dist->initialized = true; -out: - return ret; -} - -static void kvm_vgic_dist_destroy(struct kvm *kvm) -{ - struct vgic_dist *dist = &kvm->arch.vgic; - - mutex_lock(&kvm->lock); - - dist->ready = false; - dist->initialized = false; - - kfree(dist->spis); - dist->nr_spis = 0; - - mutex_unlock(&kvm->lock); -} - -void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu) -{ - struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; - - INIT_LIST_HEAD(&vgic_cpu->ap_list_head); -} - -void kvm_vgic_destroy(struct kvm *kvm) -{ - struct kvm_vcpu *vcpu; - int i; - - kvm_vgic_dist_destroy(kvm); - - kvm_for_each_vcpu(i, vcpu, kvm) - kvm_vgic_vcpu_destroy(vcpu); -} - -/** - * vgic_lazy_init: Lazy init is only allowed if the GIC exposed to the guest - * is a GICv2. A GICv3 must be explicitly initialized by the guest using the - * KVM_DEV_ARM_VGIC_GRP_CTRL KVM_DEVICE group. - * @kvm: kvm struct pointer - */ -int vgic_lazy_init(struct kvm *kvm) -{ - int ret = 0; - - if (unlikely(!vgic_initialized(kvm))) { - /* - * We only provide the automatic initialization of the VGIC - * for the legacy case of a GICv2. Any other type must - * be explicitly initialized once setup with the respective - * KVM device call. - */ - if (kvm->arch.vgic.vgic_model != KVM_DEV_TYPE_ARM_VGIC_V2) - return -EBUSY; - - mutex_lock(&kvm->lock); - ret = vgic_init(kvm); - mutex_unlock(&kvm->lock); - } - - return ret; -} - -/* RESOURCE MAPPING */ - -/** - * Map the MMIO regions depending on the VGIC model exposed to the guest - * called on the first VCPU run. - * Also map the virtual CPU interface into the VM. - * v2/v3 derivatives call vgic_init if not already done. - * vgic_ready() returns true if this function has succeeded. - * @kvm: kvm struct pointer - */ -int kvm_vgic_map_resources(struct kvm *kvm) -{ - struct vgic_dist *dist = &kvm->arch.vgic; - int ret = 0; - - mutex_lock(&kvm->lock); - if (!irqchip_in_kernel(kvm)) - goto out; - - if (dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V2) - ret = vgic_v2_map_resources(kvm); - else - ret = vgic_v3_map_resources(kvm); -out: - mutex_unlock(&kvm->lock); - return ret; -} - -/* GENERIC PROBE */ - -static int vgic_init_cpu_starting(unsigned int cpu) -{ - enable_percpu_irq(kvm_vgic_global_state.maint_irq, 0); - return 0; -} - - -static int vgic_init_cpu_dying(unsigned int cpu) -{ - disable_percpu_irq(kvm_vgic_global_state.maint_irq); - return 0; -} - -static irqreturn_t vgic_maintenance_handler(int irq, void *data) -{ - /* - * We cannot rely on the vgic maintenance interrupt to be - * delivered synchronously. This means we can only use it to - * exit the VM, and we perform the handling of EOIed - * interrupts on the exit path (see vgic_process_maintenance). - */ - return IRQ_HANDLED; -} - -/** - * kvm_vgic_hyp_init: populates the kvm_vgic_global_state variable - * according to the host GIC model. Accordingly calls either - * vgic_v2/v3_probe which registers the KVM_DEVICE that can be - * instantiated by a guest later on . - */ -int kvm_vgic_hyp_init(void) -{ - const struct gic_kvm_info *gic_kvm_info; - int ret; - - gic_kvm_info = gic_get_kvm_info(); - if (!gic_kvm_info) - return -ENODEV; - - if (!gic_kvm_info->maint_irq) { - kvm_err("No vgic maintenance irq\n"); - return -ENXIO; - } - - switch (gic_kvm_info->type) { - case GIC_V2: - ret = vgic_v2_probe(gic_kvm_info); - break; - case GIC_V3: - ret = vgic_v3_probe(gic_kvm_info); - if (!ret) { - static_branch_enable(&kvm_vgic_global_state.gicv3_cpuif); - kvm_info("GIC system register CPU interface enabled\n"); - } - break; - default: - ret = -ENODEV; - }; - - if (ret) - return ret; - - kvm_vgic_global_state.maint_irq = gic_kvm_info->maint_irq; - ret = request_percpu_irq(kvm_vgic_global_state.maint_irq, - vgic_maintenance_handler, - "vgic", kvm_get_running_vcpus()); - if (ret) { - kvm_err("Cannot register interrupt %d\n", - kvm_vgic_global_state.maint_irq); - return ret; - } - - ret = cpuhp_setup_state(CPUHP_AP_KVM_ARM_VGIC_INIT_STARTING, - "AP_KVM_ARM_VGIC_INIT_STARTING", - vgic_init_cpu_starting, vgic_init_cpu_dying); - if (ret) { - kvm_err("Cannot register vgic CPU notifier\n"); - goto out_free_irq; - } - - kvm_info("vgic interrupt IRQ%d\n", kvm_vgic_global_state.maint_irq); - return 0; - -out_free_irq: - free_percpu_irq(kvm_vgic_global_state.maint_irq, - kvm_get_running_vcpus()); - return ret; -} diff --git a/virt/kvm/arm/vgic/vgic-irqfd.c b/virt/kvm/arm/vgic/vgic-irqfd.c deleted file mode 100644 index d918dcf..0000000 --- a/virt/kvm/arm/vgic/vgic-irqfd.c +++ /dev/null @@ -1,126 +0,0 @@ -/* - * Copyright (C) 2015, 2016 ARM Ltd. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - -#include <linux/kvm.h> -#include <linux/kvm_host.h> -#include <trace/events/kvm.h> -#include <kvm/arm_vgic.h> -#include "vgic.h" - -/** - * vgic_irqfd_set_irq: inject the IRQ corresponding to the - * irqchip routing entry - * - * This is the entry point for irqfd IRQ injection - */ -static int vgic_irqfd_set_irq(struct kvm_kernel_irq_routing_entry *e, - struct kvm *kvm, int irq_source_id, - int level, bool line_status) -{ - unsigned int spi_id = e->irqchip.pin + VGIC_NR_PRIVATE_IRQS; - - if (!vgic_valid_spi(kvm, spi_id)) - return -EINVAL; - return kvm_vgic_inject_irq(kvm, 0, spi_id, level); -} - -/** - * kvm_set_routing_entry: populate a kvm routing entry - * from a user routing entry - * - * @kvm: the VM this entry is applied to - * @e: kvm kernel routing entry handle - * @ue: user api routing entry handle - * return 0 on success, -EINVAL on errors. - */ -int kvm_set_routing_entry(struct kvm *kvm, - struct kvm_kernel_irq_routing_entry *e, - const struct kvm_irq_routing_entry *ue) -{ - int r = -EINVAL; - - switch (ue->type) { - case KVM_IRQ_ROUTING_IRQCHIP: - e->set = vgic_irqfd_set_irq; - e->irqchip.irqchip = ue->u.irqchip.irqchip; - e->irqchip.pin = ue->u.irqchip.pin; - if ((e->irqchip.pin >= KVM_IRQCHIP_NUM_PINS) || - (e->irqchip.irqchip >= KVM_NR_IRQCHIPS)) - goto out; - break; - case KVM_IRQ_ROUTING_MSI: - e->set = kvm_set_msi; - e->msi.address_lo = ue->u.msi.address_lo; - e->msi.address_hi = ue->u.msi.address_hi; - e->msi.data = ue->u.msi.data; - e->msi.flags = ue->flags; - e->msi.devid = ue->u.msi.devid; - break; - default: - goto out; - } - r = 0; -out: - return r; -} - -/** - * kvm_set_msi: inject the MSI corresponding to the - * MSI routing entry - * - * This is the entry point for irqfd MSI injection - * and userspace MSI injection. - */ -int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, - struct kvm *kvm, int irq_source_id, - int level, bool line_status) -{ - struct kvm_msi msi; - - msi.address_lo = e->msi.address_lo; - msi.address_hi = e->msi.address_hi; - msi.data = e->msi.data; - msi.flags = e->msi.flags; - msi.devid = e->msi.devid; - - if (!vgic_has_its(kvm)) - return -ENODEV; - - return vgic_its_inject_msi(kvm, &msi); -} - -int kvm_vgic_setup_default_irq_routing(struct kvm *kvm) -{ - struct kvm_irq_routing_entry *entries; - struct vgic_dist *dist = &kvm->arch.vgic; - u32 nr = dist->nr_spis; - int i, ret; - - entries = kcalloc(nr, sizeof(struct kvm_kernel_irq_routing_entry), - GFP_KERNEL); - if (!entries) - return -ENOMEM; - - for (i = 0; i < nr; i++) { - entries[i].gsi = i; - entries[i].type = KVM_IRQ_ROUTING_IRQCHIP; - entries[i].u.irqchip.irqchip = 0; - entries[i].u.irqchip.pin = i; - } - ret = kvm_set_irq_routing(kvm, entries, nr, 0); - kfree(entries); - return ret; -} diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c deleted file mode 100644 index 4660a7d..0000000 --- a/virt/kvm/arm/vgic/vgic-its.c +++ /dev/null @@ -1,1570 +0,0 @@ -/* - * GICv3 ITS emulation - * - * Copyright (C) 2015,2016 ARM Ltd. - * Author: Andre Przywara <andre.przywara@arm.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - -#include <linux/cpu.h> -#include <linux/kvm.h> -#include <linux/kvm_host.h> -#include <linux/interrupt.h> -#include <linux/list.h> -#include <linux/uaccess.h> - -#include <linux/irqchip/arm-gic-v3.h> - -#include <asm/kvm_emulate.h> -#include <asm/kvm_arm.h> -#include <asm/kvm_mmu.h> - -#include "vgic.h" -#include "vgic-mmio.h" - -/* - * Creates a new (reference to a) struct vgic_irq for a given LPI. - * If this LPI is already mapped on another ITS, we increase its refcount - * and return a pointer to the existing structure. - * If this is a "new" LPI, we allocate and initialize a new struct vgic_irq. - * This function returns a pointer to the _unlocked_ structure. - */ -static struct vgic_irq *vgic_add_lpi(struct kvm *kvm, u32 intid) -{ - struct vgic_dist *dist = &kvm->arch.vgic; - struct vgic_irq *irq = vgic_get_irq(kvm, NULL, intid), *oldirq; - - /* In this case there is no put, since we keep the reference. */ - if (irq) - return irq; - - irq = kzalloc(sizeof(struct vgic_irq), GFP_KERNEL); - if (!irq) - return ERR_PTR(-ENOMEM); - - INIT_LIST_HEAD(&irq->lpi_list); - INIT_LIST_HEAD(&irq->ap_list); - spin_lock_init(&irq->irq_lock); - - irq->config = VGIC_CONFIG_EDGE; - kref_init(&irq->refcount); - irq->intid = intid; - - spin_lock(&dist->lpi_list_lock); - - /* - * There could be a race with another vgic_add_lpi(), so we need to - * check that we don't add a second list entry with the same LPI. - */ - list_for_each_entry(oldirq, &dist->lpi_list_head, lpi_list) { - if (oldirq->intid != intid) - continue; - - /* Someone was faster with adding this LPI, lets use that. */ - kfree(irq); - irq = oldirq; - - /* - * This increases the refcount, the caller is expected to - * call vgic_put_irq() on the returned pointer once it's - * finished with the IRQ. - */ - vgic_get_irq_kref(irq); - - goto out_unlock; - } - - list_add_tail(&irq->lpi_list, &dist->lpi_list_head); - dist->lpi_list_count++; - -out_unlock: - spin_unlock(&dist->lpi_list_lock); - - return irq; -} - -struct its_device { - struct list_head dev_list; - - /* the head for the list of ITTEs */ - struct list_head itt_head; - u32 device_id; -}; - -#define COLLECTION_NOT_MAPPED ((u32)~0) - -struct its_collection { - struct list_head coll_list; - - u32 collection_id; - u32 target_addr; -}; - -#define its_is_collection_mapped(coll) ((coll) && \ - ((coll)->target_addr != COLLECTION_NOT_MAPPED)) - -struct its_itte { - struct list_head itte_list; - - struct vgic_irq *irq; - struct its_collection *collection; - u32 lpi; - u32 event_id; -}; - -/* - * Find and returns a device in the device table for an ITS. - * Must be called with the its_lock mutex held. - */ -static struct its_device *find_its_device(struct vgic_its *its, u32 device_id) -{ - struct its_device *device; - - list_for_each_entry(device, &its->device_list, dev_list) - if (device_id == device->device_id) - return device; - - return NULL; -} - -/* - * Find and returns an interrupt translation table entry (ITTE) for a given - * Device ID/Event ID pair on an ITS. - * Must be called with the its_lock mutex held. - */ -static struct its_itte *find_itte(struct vgic_its *its, u32 device_id, - u32 event_id) -{ - struct its_device *device; - struct its_itte *itte; - - device = find_its_device(its, device_id); - if (device == NULL) - return NULL; - - list_for_each_entry(itte, &device->itt_head, itte_list) - if (itte->event_id == event_id) - return itte; - - return NULL; -} - -/* To be used as an iterator this macro misses the enclosing parentheses */ -#define for_each_lpi_its(dev, itte, its) \ - list_for_each_entry(dev, &(its)->device_list, dev_list) \ - list_for_each_entry(itte, &(dev)->itt_head, itte_list) - -/* - * We only implement 48 bits of PA at the moment, although the ITS - * supports more. Let's be restrictive here. - */ -#define BASER_ADDRESS(x) ((x) & GENMASK_ULL(47, 16)) -#define CBASER_ADDRESS(x) ((x) & GENMASK_ULL(47, 12)) -#define PENDBASER_ADDRESS(x) ((x) & GENMASK_ULL(47, 16)) -#define PROPBASER_ADDRESS(x) ((x) & GENMASK_ULL(47, 12)) - -#define GIC_LPI_OFFSET 8192 - -/* - * Finds and returns a collection in the ITS collection table. - * Must be called with the its_lock mutex held. - */ -static struct its_collection *find_collection(struct vgic_its *its, int coll_id) -{ - struct its_collection *collection; - - list_for_each_entry(collection, &its->collection_list, coll_list) { - if (coll_id == collection->collection_id) - return collection; - } - - return NULL; -} - -#define LPI_PROP_ENABLE_BIT(p) ((p) & LPI_PROP_ENABLED) -#define LPI_PROP_PRIORITY(p) ((p) & 0xfc) - -/* - * Reads the configuration data for a given LPI from guest memory and - * updates the fields in struct vgic_irq. - * If filter_vcpu is not NULL, applies only if the IRQ is targeting this - * VCPU. Unconditionally applies if filter_vcpu is NULL. - */ -static int update_lpi_config(struct kvm *kvm, struct vgic_irq *irq, - struct kvm_vcpu *filter_vcpu) -{ - u64 propbase = PROPBASER_ADDRESS(kvm->arch.vgic.propbaser); - u8 prop; - int ret; - - ret = kvm_read_guest(kvm, propbase + irq->intid - GIC_LPI_OFFSET, - &prop, 1); - - if (ret) - return ret; - - spin_lock(&irq->irq_lock); - - if (!filter_vcpu || filter_vcpu == irq->target_vcpu) { - irq->priority = LPI_PROP_PRIORITY(prop); - irq->enabled = LPI_PROP_ENABLE_BIT(prop); - - vgic_queue_irq_unlock(kvm, irq); - } else { - spin_unlock(&irq->irq_lock); - } - - return 0; -} - -/* - * Create a snapshot of the current LPI list, so that we can enumerate all - * LPIs without holding any lock. - * Returns the array length and puts the kmalloc'ed array into intid_ptr. - */ -static int vgic_copy_lpi_list(struct kvm *kvm, u32 **intid_ptr) -{ - struct vgic_dist *dist = &kvm->arch.vgic; - struct vgic_irq *irq; - u32 *intids; - int irq_count = dist->lpi_list_count, i = 0; - - /* - * We use the current value of the list length, which may change - * after the kmalloc. We don't care, because the guest shouldn't - * change anything while the command handling is still running, - * and in the worst case we would miss a new IRQ, which one wouldn't - * expect to be covered by this command anyway. - */ - intids = kmalloc_array(irq_count, sizeof(intids[0]), GFP_KERNEL); - if (!intids) - return -ENOMEM; - - spin_lock(&dist->lpi_list_lock); - list_for_each_entry(irq, &dist->lpi_list_head, lpi_list) { - /* We don't need to "get" the IRQ, as we hold the list lock. */ - intids[i] = irq->intid; - if (++i == irq_count) - break; - } - spin_unlock(&dist->lpi_list_lock); - - *intid_ptr = intids; - return irq_count; -} - -/* - * Promotes the ITS view of affinity of an ITTE (which redistributor this LPI - * is targeting) to the VGIC's view, which deals with target VCPUs. - * Needs to be called whenever either the collection for a LPIs has - * changed or the collection itself got retargeted. - */ -static void update_affinity_itte(struct kvm *kvm, struct its_itte *itte) -{ - struct kvm_vcpu *vcpu; - - if (!its_is_collection_mapped(itte->collection)) - return; - - vcpu = kvm_get_vcpu(kvm, itte->collection->target_addr); - - spin_lock(&itte->irq->irq_lock); - itte->irq->target_vcpu = vcpu; - spin_unlock(&itte->irq->irq_lock); -} - -/* - * Updates the target VCPU for every LPI targeting this collection. - * Must be called with the its_lock mutex held. - */ -static void update_affinity_collection(struct kvm *kvm, struct vgic_its *its, - struct its_collection *coll) -{ - struct its_device *device; - struct its_itte *itte; - - for_each_lpi_its(device, itte, its) { - if (!itte->collection || coll != itte->collection) - continue; - - update_affinity_itte(kvm, itte); - } -} - -static u32 max_lpis_propbaser(u64 propbaser) -{ - int nr_idbits = (propbaser & 0x1f) + 1; - - return 1U << min(nr_idbits, INTERRUPT_ID_BITS_ITS); -} - -/* - * Scan the whole LPI pending table and sync the pending bit in there - * with our own data structures. This relies on the LPI being - * mapped before. - */ -static int its_sync_lpi_pending_table(struct kvm_vcpu *vcpu) -{ - gpa_t pendbase = PENDBASER_ADDRESS(vcpu->arch.vgic_cpu.pendbaser); - struct vgic_irq *irq; - int last_byte_offset = -1; - int ret = 0; - u32 *intids; - int nr_irqs, i; - - nr_irqs = vgic_copy_lpi_list(vcpu->kvm, &intids); - if (nr_irqs < 0) - return nr_irqs; - - for (i = 0; i < nr_irqs; i++) { - int byte_offset, bit_nr; - u8 pendmask; - - byte_offset = intids[i] / BITS_PER_BYTE; - bit_nr = intids[i] % BITS_PER_BYTE; - - /* - * For contiguously allocated LPIs chances are we just read - * this very same byte in the last iteration. Reuse that. - */ - if (byte_offset != last_byte_offset) { - ret = kvm_read_guest(vcpu->kvm, pendbase + byte_offset, - &pendmask, 1); - if (ret) { - kfree(intids); - return ret; - } - last_byte_offset = byte_offset; - } - - irq = vgic_get_irq(vcpu->kvm, NULL, intids[i]); - spin_lock(&irq->irq_lock); - irq->pending = pendmask & (1U << bit_nr); - vgic_queue_irq_unlock(vcpu->kvm, irq); - vgic_put_irq(vcpu->kvm, irq); - } - - kfree(intids); - - return ret; -} - -static unsigned long vgic_mmio_read_its_ctlr(struct kvm *vcpu, - struct vgic_its *its, - gpa_t addr, unsigned int len) -{ - u32 reg = 0; - - mutex_lock(&its->cmd_lock); - if (its->creadr == its->cwriter) - reg |= GITS_CTLR_QUIESCENT; - if (its->enabled) - reg |= GITS_CTLR_ENABLE; - mutex_unlock(&its->cmd_lock); - - return reg; -} - -static void vgic_mmio_write_its_ctlr(struct kvm *kvm, struct vgic_its *its, - gpa_t addr, unsigned int len, - unsigned long val) -{ - its->enabled = !!(val & GITS_CTLR_ENABLE); -} - -static unsigned long vgic_mmio_read_its_typer(struct kvm *kvm, - struct vgic_its *its, - gpa_t addr, unsigned int len) -{ - u64 reg = GITS_TYPER_PLPIS; - - /* - * We use linear CPU numbers for redistributor addressing, - * so GITS_TYPER.PTA is 0. - * Also we force all PROPBASER registers to be the same, so - * CommonLPIAff is 0 as well. - * To avoid memory waste in the guest, we keep the number of IDBits and - * DevBits low - as least for the time being. - */ - reg |= 0x0f << GITS_TYPER_DEVBITS_SHIFT; - reg |= 0x0f << GITS_TYPER_IDBITS_SHIFT; - - return extract_bytes(reg, addr & 7, len); -} - -static unsigned long vgic_mmio_read_its_iidr(struct kvm *kvm, - struct vgic_its *its, - gpa_t addr, unsigned int len) -{ - return (PRODUCT_ID_KVM << 24) | (IMPLEMENTER_ARM << 0); -} - -static unsigned long vgic_mmio_read_its_idregs(struct kvm *kvm, - struct vgic_its *its, - gpa_t addr, unsigned int len) -{ - switch (addr & 0xffff) { - case GITS_PIDR0: - return 0x92; /* part number, bits[7:0] */ - case GITS_PIDR1: - return 0xb4; /* part number, bits[11:8] */ - case GITS_PIDR2: - return GIC_PIDR2_ARCH_GICv3 | 0x0b; - case GITS_PIDR4: - return 0x40; /* This is a 64K software visible page */ - /* The following are the ID registers for (any) GIC. */ - case GITS_CIDR0: - return 0x0d; - case GITS_CIDR1: - return 0xf0; - case GITS_CIDR2: - return 0x05; - case GITS_CIDR3: - return 0xb1; - } - - return 0; -} - -/* - * Find the target VCPU and the LPI number for a given devid/eventid pair - * and make this IRQ pending, possibly injecting it. - * Must be called with the its_lock mutex held. - * Returns 0 on success, a positive error value for any ITS mapping - * related errors and negative error values for generic errors. - */ -static int vgic_its_trigger_msi(struct kvm *kvm, struct vgic_its *its, - u32 devid, u32 eventid) -{ - struct kvm_vcpu *vcpu; - struct its_itte *itte; - - if (!its->enabled) - return -EBUSY; - - itte = find_itte(its, devid, eventid); - if (!itte || !its_is_collection_mapped(itte->collection)) - return E_ITS_INT_UNMAPPED_INTERRUPT; - - vcpu = kvm_get_vcpu(kvm, itte->collection->target_addr); - if (!vcpu) - return E_ITS_INT_UNMAPPED_INTERRUPT; - - if (!vcpu->arch.vgic_cpu.lpis_enabled) - return -EBUSY; - - spin_lock(&itte->irq->irq_lock); - itte->irq->pending = true; - vgic_queue_irq_unlock(kvm, itte->irq); - - return 0; -} - -static struct vgic_io_device *vgic_get_its_iodev(struct kvm_io_device *dev) -{ - struct vgic_io_device *iodev; - - if (dev->ops != &kvm_io_gic_ops) - return NULL; - - iodev = container_of(dev, struct vgic_io_device, dev); - - if (iodev->iodev_type != IODEV_ITS) - return NULL; - - return iodev; -} - -/* - * Queries the KVM IO bus framework to get the ITS pointer from the given - * doorbell address. - * We then call vgic_its_trigger_msi() with the decoded data. - * According to the KVM_SIGNAL_MSI API description returns 1 on success. - */ -int vgic_its_inject_msi(struct kvm *kvm, struct kvm_msi *msi) -{ - u64 address; - struct kvm_io_device *kvm_io_dev; - struct vgic_io_device *iodev; - int ret; - - if (!vgic_has_its(kvm)) - return -ENODEV; - - if (!(msi->flags & KVM_MSI_VALID_DEVID)) - return -EINVAL; - - address = (u64)msi->address_hi << 32 | msi->address_lo; - - kvm_io_dev = kvm_io_bus_get_dev(kvm, KVM_MMIO_BUS, address); - if (!kvm_io_dev) - return -EINVAL; - - iodev = vgic_get_its_iodev(kvm_io_dev); - if (!iodev) - return -EINVAL; - - mutex_lock(&iodev->its->its_lock); - ret = vgic_its_trigger_msi(kvm, iodev->its, msi->devid, msi->data); - mutex_unlock(&iodev->its->its_lock); - - if (ret < 0) - return ret; - - /* - * KVM_SIGNAL_MSI demands a return value > 0 for success and 0 - * if the guest has blocked the MSI. So we map any LPI mapping - * related error to that. - */ - if (ret) - return 0; - else - return 1; -} - -/* Requires the its_lock to be held. */ -static void its_free_itte(struct kvm *kvm, struct its_itte *itte) -{ - list_del(&itte->itte_list); - - /* This put matches the get in vgic_add_lpi. */ - if (itte->irq) - vgic_put_irq(kvm, itte->irq); - - kfree(itte); -} - -static u64 its_cmd_mask_field(u64 *its_cmd, int word, int shift, int size) -{ - return (le64_to_cpu(its_cmd[word]) >> shift) & (BIT_ULL(size) - 1); -} - -#define its_cmd_get_command(cmd) its_cmd_mask_field(cmd, 0, 0, 8) -#define its_cmd_get_deviceid(cmd) its_cmd_mask_field(cmd, 0, 32, 32) -#define its_cmd_get_id(cmd) its_cmd_mask_field(cmd, 1, 0, 32) -#define its_cmd_get_physical_id(cmd) its_cmd_mask_field(cmd, 1, 32, 32) -#define its_cmd_get_collection(cmd) its_cmd_mask_field(cmd, 2, 0, 16) -#define its_cmd_get_target_addr(cmd) its_cmd_mask_field(cmd, 2, 16, 32) -#define its_cmd_get_validbit(cmd) its_cmd_mask_field(cmd, 2, 63, 1) - -/* - * The DISCARD command frees an Interrupt Translation Table Entry (ITTE). - * Must be called with the its_lock mutex held. - */ -static int vgic_its_cmd_handle_discard(struct kvm *kvm, struct vgic_its *its, - u64 *its_cmd) -{ - u32 device_id = its_cmd_get_deviceid(its_cmd); - u32 event_id = its_cmd_get_id(its_cmd); - struct its_itte *itte; - - - itte = find_itte(its, device_id, event_id); - if (itte && itte->collection) { - /* - * Though the spec talks about removing the pending state, we - * don't bother here since we clear the ITTE anyway and the - * pending state is a property of the ITTE struct. - */ - its_free_itte(kvm, itte); - return 0; - } - - return E_ITS_DISCARD_UNMAPPED_INTERRUPT; -} - -/* - * The MOVI command moves an ITTE to a different collection. - * Must be called with the its_lock mutex held. - */ -static int vgic_its_cmd_handle_movi(struct kvm *kvm, struct vgic_its *its, - u64 *its_cmd) -{ - u32 device_id = its_cmd_get_deviceid(its_cmd); - u32 event_id = its_cmd_get_id(its_cmd); - u32 coll_id = its_cmd_get_collection(its_cmd); - struct kvm_vcpu *vcpu; - struct its_itte *itte; - struct its_collection *collection; - - itte = find_itte(its, device_id, event_id); - if (!itte) - return E_ITS_MOVI_UNMAPPED_INTERRUPT; - - if (!its_is_collection_mapped(itte->collection)) - return E_ITS_MOVI_UNMAPPED_COLLECTION; - - collection = find_collection(its, coll_id); - if (!its_is_collection_mapped(collection)) - return E_ITS_MOVI_UNMAPPED_COLLECTION; - - itte->collection = collection; - vcpu = kvm_get_vcpu(kvm, collection->target_addr); - - spin_lock(&itte->irq->irq_lock); - itte->irq->target_vcpu = vcpu; - spin_unlock(&itte->irq->irq_lock); - - return 0; -} - -/* - * Check whether an ID can be stored into the corresponding guest table. - * For a direct table this is pretty easy, but gets a bit nasty for - * indirect tables. We check whether the resulting guest physical address - * is actually valid (covered by a memslot and guest accessbible). - * For this we have to read the respective first level entry. - */ -static bool vgic_its_check_id(struct vgic_its *its, u64 baser, int id) -{ - int l1_tbl_size = GITS_BASER_NR_PAGES(baser) * SZ_64K; - int index; - u64 indirect_ptr; - gfn_t gfn; - - if (!(baser & GITS_BASER_INDIRECT)) { - phys_addr_t addr; - - if (id >= (l1_tbl_size / GITS_BASER_ENTRY_SIZE(baser))) - return false; - - addr = BASER_ADDRESS(baser) + id * GITS_BASER_ENTRY_SIZE(baser); - gfn = addr >> PAGE_SHIFT; - - return kvm_is_visible_gfn(its->dev->kvm, gfn); - } - - /* calculate and check the index into the 1st level */ - index = id / (SZ_64K / GITS_BASER_ENTRY_SIZE(baser)); - if (index >= (l1_tbl_size / sizeof(u64))) - return false; - - /* Each 1st level entry is represented by a 64-bit value. */ - if (kvm_read_guest(its->dev->kvm, - BASER_ADDRESS(baser) + index * sizeof(indirect_ptr), - &indirect_ptr, sizeof(indirect_ptr))) - return false; - - indirect_ptr = le64_to_cpu(indirect_ptr); - - /* check the valid bit of the first level entry */ - if (!(indirect_ptr & BIT_ULL(63))) - return false; - - /* - * Mask the guest physical address and calculate the frame number. - * Any address beyond our supported 48 bits of PA will be caught - * by the actual check in the final step. - */ - indirect_ptr &= GENMASK_ULL(51, 16); - - /* Find the address of the actual entry */ - index = id % (SZ_64K / GITS_BASER_ENTRY_SIZE(baser)); - indirect_ptr += index * GITS_BASER_ENTRY_SIZE(baser); - gfn = indirect_ptr >> PAGE_SHIFT; - - return kvm_is_visible_gfn(its->dev->kvm, gfn); -} - -static int vgic_its_alloc_collection(struct vgic_its *its, - struct its_collection **colp, - u32 coll_id) -{ - struct its_collection *collection; - - if (!vgic_its_check_id(its, its->baser_coll_table, coll_id)) - return E_ITS_MAPC_COLLECTION_OOR; - - collection = kzalloc(sizeof(*collection), GFP_KERNEL); - - collection->collection_id = coll_id; - collection->target_addr = COLLECTION_NOT_MAPPED; - - list_add_tail(&collection->coll_list, &its->collection_list); - *colp = collection; - - return 0; -} - -static void vgic_its_free_collection(struct vgic_its *its, u32 coll_id) -{ - struct its_collection *collection; - struct its_device *device; - struct its_itte *itte; - - /* - * Clearing the mapping for that collection ID removes the - * entry from the list. If there wasn't any before, we can - * go home early. - */ - collection = find_collection(its, coll_id); - if (!collection) - return; - - for_each_lpi_its(device, itte, its) - if (itte->collection && - itte->collection->collection_id == coll_id) - itte->collection = NULL; - - list_del(&collection->coll_list); - kfree(collection); -} - -/* - * The MAPTI and MAPI commands map LPIs to ITTEs. - * Must be called with its_lock mutex held. - */ -static int vgic_its_cmd_handle_mapi(struct kvm *kvm, struct vgic_its *its, - u64 *its_cmd) -{ - u32 device_id = its_cmd_get_deviceid(its_cmd); - u32 event_id = its_cmd_get_id(its_cmd); - u32 coll_id = its_cmd_get_collection(its_cmd); - struct its_itte *itte; - struct its_device *device; - struct its_collection *collection, *new_coll = NULL; - int lpi_nr; - struct vgic_irq *irq; - - device = find_its_device(its, device_id); - if (!device) - return E_ITS_MAPTI_UNMAPPED_DEVICE; - - if (its_cmd_get_command(its_cmd) == GITS_CMD_MAPTI) - lpi_nr = its_cmd_get_physical_id(its_cmd); - else - lpi_nr = event_id; - if (lpi_nr < GIC_LPI_OFFSET || - lpi_nr >= max_lpis_propbaser(kvm->arch.vgic.propbaser)) - return E_ITS_MAPTI_PHYSICALID_OOR; - - /* If there is an existing mapping, behavior is UNPREDICTABLE. */ - if (find_itte(its, device_id, event_id)) - return 0; - - collection = find_collection(its, coll_id); - if (!collection) { - int ret = vgic_its_alloc_collection(its, &collection, coll_id); - if (ret) - return ret; - new_coll = collection; - } - - itte = kzalloc(sizeof(struct its_itte), GFP_KERNEL); - if (!itte) { - if (new_coll) - vgic_its_free_collection(its, coll_id); - return -ENOMEM; - } - - itte->event_id = event_id; - list_add_tail(&itte->itte_list, &device->itt_head); - - itte->collection = collection; - itte->lpi = lpi_nr; - - irq = vgic_add_lpi(kvm, lpi_nr); - if (IS_ERR(irq)) { - if (new_coll) - vgic_its_free_collection(its, coll_id); - its_free_itte(kvm, itte); - return PTR_ERR(irq); - } - itte->irq = irq; - - update_affinity_itte(kvm, itte); - - /* - * We "cache" the configuration table entries in out struct vgic_irq's. - * However we only have those structs for mapped IRQs, so we read in - * the respective config data from memory here upon mapping the LPI. - */ - update_lpi_config(kvm, itte->irq, NULL); - - return 0; -} - -/* Requires the its_lock to be held. */ -static void vgic_its_unmap_device(struct kvm *kvm, struct its_device *device) -{ - struct its_itte *itte, *temp; - - /* - * The spec says that unmapping a device with still valid - * ITTEs associated is UNPREDICTABLE. We remove all ITTEs, - * since we cannot leave the memory unreferenced. - */ - list_for_each_entry_safe(itte, temp, &device->itt_head, itte_list) - its_free_itte(kvm, itte); - - list_del(&device->dev_list); - kfree(device); -} - -/* - * MAPD maps or unmaps a device ID to Interrupt Translation Tables (ITTs). - * Must be called with the its_lock mutex held. - */ -static int vgic_its_cmd_handle_mapd(struct kvm *kvm, struct vgic_its *its, - u64 *its_cmd) -{ - u32 device_id = its_cmd_get_deviceid(its_cmd); - bool valid = its_cmd_get_validbit(its_cmd); - struct its_device *device; - - if (!vgic_its_check_id(its, its->baser_device_table, device_id)) - return E_ITS_MAPD_DEVICE_OOR; - - device = find_its_device(its, device_id); - - /* - * The spec says that calling MAPD on an already mapped device - * invalidates all cached data for this device. We implement this - * by removing the mapping and re-establishing it. - */ - if (device) - vgic_its_unmap_device(kvm, device); - - /* - * The spec does not say whether unmapping a not-mapped device - * is an error, so we are done in any case. - */ - if (!valid) - return 0; - - device = kzalloc(sizeof(struct its_device), GFP_KERNEL); - if (!device) - return -ENOMEM; - - device->device_id = device_id; - INIT_LIST_HEAD(&device->itt_head); - - list_add_tail(&device->dev_list, &its->device_list); - - return 0; -} - -/* - * The MAPC command maps collection IDs to redistributors. - * Must be called with the its_lock mutex held. - */ -static int vgic_its_cmd_handle_mapc(struct kvm *kvm, struct vgic_its *its, - u64 *its_cmd) -{ - u16 coll_id; - u32 target_addr; - struct its_collection *collection; - bool valid; - - valid = its_cmd_get_validbit(its_cmd); - coll_id = its_cmd_get_collection(its_cmd); - target_addr = its_cmd_get_target_addr(its_cmd); - - if (target_addr >= atomic_read(&kvm->online_vcpus)) - return E_ITS_MAPC_PROCNUM_OOR; - - if (!valid) { - vgic_its_free_collection(its, coll_id); - } else { - collection = find_collection(its, coll_id); - - if (!collection) { - int ret; - - ret = vgic_its_alloc_collection(its, &collection, - coll_id); - if (ret) - return ret; - collection->target_addr = target_addr; - } else { - collection->target_addr = target_addr; - update_affinity_collection(kvm, its, collection); - } - } - - return 0; -} - -/* - * The CLEAR command removes the pending state for a particular LPI. - * Must be called with the its_lock mutex held. - */ -static int vgic_its_cmd_handle_clear(struct kvm *kvm, struct vgic_its *its, - u64 *its_cmd) -{ - u32 device_id = its_cmd_get_deviceid(its_cmd); - u32 event_id = its_cmd_get_id(its_cmd); - struct its_itte *itte; - - - itte = find_itte(its, device_id, event_id); - if (!itte) - return E_ITS_CLEAR_UNMAPPED_INTERRUPT; - - itte->irq->pending = false; - - return 0; -} - -/* - * The INV command syncs the configuration bits from the memory table. - * Must be called with the its_lock mutex held. - */ -static int vgic_its_cmd_handle_inv(struct kvm *kvm, struct vgic_its *its, - u64 *its_cmd) -{ - u32 device_id = its_cmd_get_deviceid(its_cmd); - u32 event_id = its_cmd_get_id(its_cmd); - struct its_itte *itte; - - - itte = find_itte(its, device_id, event_id); - if (!itte) - return E_ITS_INV_UNMAPPED_INTERRUPT; - - return update_lpi_config(kvm, itte->irq, NULL); -} - -/* - * The INVALL command requests flushing of all IRQ data in this collection. - * Find the VCPU mapped to that collection, then iterate over the VM's list - * of mapped LPIs and update the configuration for each IRQ which targets - * the specified vcpu. The configuration will be read from the in-memory - * configuration table. - * Must be called with the its_lock mutex held. - */ -static int vgic_its_cmd_handle_invall(struct kvm *kvm, struct vgic_its *its, - u64 *its_cmd) -{ - u32 coll_id = its_cmd_get_collection(its_cmd); - struct its_collection *collection; - struct kvm_vcpu *vcpu; - struct vgic_irq *irq; - u32 *intids; - int irq_count, i; - - collection = find_collection(its, coll_id); - if (!its_is_collection_mapped(collection)) - return E_ITS_INVALL_UNMAPPED_COLLECTION; - - vcpu = kvm_get_vcpu(kvm, collection->target_addr); - - irq_count = vgic_copy_lpi_list(kvm, &intids); - if (irq_count < 0) - return irq_count; - - for (i = 0; i < irq_count; i++) { - irq = vgic_get_irq(kvm, NULL, intids[i]); - if (!irq) - continue; - update_lpi_config(kvm, irq, vcpu); - vgic_put_irq(kvm, irq); - } - - kfree(intids); - - return 0; -} - -/* - * The MOVALL command moves the pending state of all IRQs targeting one - * redistributor to another. We don't hold the pending state in the VCPUs, - * but in the IRQs instead, so there is really not much to do for us here. - * However the spec says that no IRQ must target the old redistributor - * afterwards, so we make sure that no LPI is using the associated target_vcpu. - * This command affects all LPIs in the system that target that redistributor. - */ -static int vgic_its_cmd_handle_movall(struct kvm *kvm, struct vgic_its *its, - u64 *its_cmd) -{ - struct vgic_dist *dist = &kvm->arch.vgic; - u32 target1_addr = its_cmd_get_target_addr(its_cmd); - u32 target2_addr = its_cmd_mask_field(its_cmd, 3, 16, 32); - struct kvm_vcpu *vcpu1, *vcpu2; - struct vgic_irq *irq; - - if (target1_addr >= atomic_read(&kvm->online_vcpus) || - target2_addr >= atomic_read(&kvm->online_vcpus)) - return E_ITS_MOVALL_PROCNUM_OOR; - - if (target1_addr == target2_addr) - return 0; - - vcpu1 = kvm_get_vcpu(kvm, target1_addr); - vcpu2 = kvm_get_vcpu(kvm, target2_addr); - - spin_lock(&dist->lpi_list_lock); - - list_for_each_entry(irq, &dist->lpi_list_head, lpi_list) { - spin_lock(&irq->irq_lock); - - if (irq->target_vcpu == vcpu1) - irq->target_vcpu = vcpu2; - - spin_unlock(&irq->irq_lock); - } - - spin_unlock(&dist->lpi_list_lock); - - return 0; -} - -/* - * The INT command injects the LPI associated with that DevID/EvID pair. - * Must be called with the its_lock mutex held. - */ -static int vgic_its_cmd_handle_int(struct kvm *kvm, struct vgic_its *its, - u64 *its_cmd) -{ - u32 msi_data = its_cmd_get_id(its_cmd); - u64 msi_devid = its_cmd_get_deviceid(its_cmd); - - return vgic_its_trigger_msi(kvm, its, msi_devid, msi_data); -} - -/* - * This function is called with the its_cmd lock held, but the ITS data - * structure lock dropped. - */ -static int vgic_its_handle_command(struct kvm *kvm, struct vgic_its *its, - u64 *its_cmd) -{ - int ret = -ENODEV; - - mutex_lock(&its->its_lock); - switch (its_cmd_get_command(its_cmd)) { - case GITS_CMD_MAPD: - ret = vgic_its_cmd_handle_mapd(kvm, its, its_cmd); - break; - case GITS_CMD_MAPC: - ret = vgic_its_cmd_handle_mapc(kvm, its, its_cmd); - break; - case GITS_CMD_MAPI: - ret = vgic_its_cmd_handle_mapi(kvm, its, its_cmd); - break; - case GITS_CMD_MAPTI: - ret = vgic_its_cmd_handle_mapi(kvm, its, its_cmd); - break; - case GITS_CMD_MOVI: - ret = vgic_its_cmd_handle_movi(kvm, its, its_cmd); - break; - case GITS_CMD_DISCARD: - ret = vgic_its_cmd_handle_discard(kvm, its, its_cmd); - break; - case GITS_CMD_CLEAR: - ret = vgic_its_cmd_handle_clear(kvm, its, its_cmd); - break; - case GITS_CMD_MOVALL: - ret = vgic_its_cmd_handle_movall(kvm, its, its_cmd); - break; - case GITS_CMD_INT: - ret = vgic_its_cmd_handle_int(kvm, its, its_cmd); - break; - case GITS_CMD_INV: - ret = vgic_its_cmd_handle_inv(kvm, its, its_cmd); - break; - case GITS_CMD_INVALL: - ret = vgic_its_cmd_handle_invall(kvm, its, its_cmd); - break; - case GITS_CMD_SYNC: - /* we ignore this command: we are in sync all of the time */ - ret = 0; - break; - } - mutex_unlock(&its->its_lock); - - return ret; -} - -static u64 vgic_sanitise_its_baser(u64 reg) -{ - reg = vgic_sanitise_field(reg, GITS_BASER_SHAREABILITY_MASK, - GITS_BASER_SHAREABILITY_SHIFT, - vgic_sanitise_shareability); - reg = vgic_sanitise_field(reg, GITS_BASER_INNER_CACHEABILITY_MASK, - GITS_BASER_INNER_CACHEABILITY_SHIFT, - vgic_sanitise_inner_cacheability); - reg = vgic_sanitise_field(reg, GITS_BASER_OUTER_CACHEABILITY_MASK, - GITS_BASER_OUTER_CACHEABILITY_SHIFT, - vgic_sanitise_outer_cacheability); - - /* Bits 15:12 contain bits 51:48 of the PA, which we don't support. */ - reg &= ~GENMASK_ULL(15, 12); - - /* We support only one (ITS) page size: 64K */ - reg = (reg & ~GITS_BASER_PAGE_SIZE_MASK) | GITS_BASER_PAGE_SIZE_64K; - - return reg; -} - -static u64 vgic_sanitise_its_cbaser(u64 reg) -{ - reg = vgic_sanitise_field(reg, GITS_CBASER_SHAREABILITY_MASK, - GITS_CBASER_SHAREABILITY_SHIFT, - vgic_sanitise_shareability); - reg = vgic_sanitise_field(reg, GITS_CBASER_INNER_CACHEABILITY_MASK, - GITS_CBASER_INNER_CACHEABILITY_SHIFT, - vgic_sanitise_inner_cacheability); - reg = vgic_sanitise_field(reg, GITS_CBASER_OUTER_CACHEABILITY_MASK, - GITS_CBASER_OUTER_CACHEABILITY_SHIFT, - vgic_sanitise_outer_cacheability); - - /* - * Sanitise the physical address to be 64k aligned. - * Also limit the physical addresses to 48 bits. - */ - reg &= ~(GENMASK_ULL(51, 48) | GENMASK_ULL(15, 12)); - - return reg; -} - -static unsigned long vgic_mmio_read_its_cbaser(struct kvm *kvm, - struct vgic_its *its, - gpa_t addr, unsigned int len) -{ - return extract_bytes(its->cbaser, addr & 7, len); -} - -static void vgic_mmio_write_its_cbaser(struct kvm *kvm, struct vgic_its *its, - gpa_t addr, unsigned int len, - unsigned long val) -{ - /* When GITS_CTLR.Enable is 1, this register is RO. */ - if (its->enabled) - return; - - mutex_lock(&its->cmd_lock); - its->cbaser = update_64bit_reg(its->cbaser, addr & 7, len, val); - its->cbaser = vgic_sanitise_its_cbaser(its->cbaser); - its->creadr = 0; - /* - * CWRITER is architecturally UNKNOWN on reset, but we need to reset - * it to CREADR to make sure we start with an empty command buffer. - */ - its->cwriter = its->creadr; - mutex_unlock(&its->cmd_lock); -} - -#define ITS_CMD_BUFFER_SIZE(baser) ((((baser) & 0xff) + 1) << 12) -#define ITS_CMD_SIZE 32 -#define ITS_CMD_OFFSET(reg) ((reg) & GENMASK(19, 5)) - -/* - * By writing to CWRITER the guest announces new commands to be processed. - * To avoid any races in the first place, we take the its_cmd lock, which - * protects our ring buffer variables, so that there is only one user - * per ITS handling commands at a given time. - */ -static void vgic_mmio_write_its_cwriter(struct kvm *kvm, struct vgic_its *its, - gpa_t addr, unsigned int len, - unsigned long val) -{ - gpa_t cbaser; - u64 cmd_buf[4]; - u32 reg; - - if (!its) - return; - - mutex_lock(&its->cmd_lock); - - reg = update_64bit_reg(its->cwriter, addr & 7, len, val); - reg = ITS_CMD_OFFSET(reg); - if (reg >= ITS_CMD_BUFFER_SIZE(its->cbaser)) { - mutex_unlock(&its->cmd_lock); - return; - } - - its->cwriter = reg; - cbaser = CBASER_ADDRESS(its->cbaser); - - while (its->cwriter != its->creadr) { - int ret = kvm_read_guest(kvm, cbaser + its->creadr, - cmd_buf, ITS_CMD_SIZE); - /* - * If kvm_read_guest() fails, this could be due to the guest - * programming a bogus value in CBASER or something else going - * wrong from which we cannot easily recover. - * According to section 6.3.2 in the GICv3 spec we can just - * ignore that command then. - */ - if (!ret) - vgic_its_handle_command(kvm, its, cmd_buf); - - its->creadr += ITS_CMD_SIZE; - if (its->creadr == ITS_CMD_BUFFER_SIZE(its->cbaser)) - its->creadr = 0; - } - - mutex_unlock(&its->cmd_lock); -} - -static unsigned long vgic_mmio_read_its_cwriter(struct kvm *kvm, - struct vgic_its *its, - gpa_t addr, unsigned int len) -{ - return extract_bytes(its->cwriter, addr & 0x7, len); -} - -static unsigned long vgic_mmio_read_its_creadr(struct kvm *kvm, - struct vgic_its *its, - gpa_t addr, unsigned int len) -{ - return extract_bytes(its->creadr, addr & 0x7, len); -} - -#define BASER_INDEX(addr) (((addr) / sizeof(u64)) & 0x7) -static unsigned long vgic_mmio_read_its_baser(struct kvm *kvm, - struct vgic_its *its, - gpa_t addr, unsigned int len) -{ - u64 reg; - - switch (BASER_INDEX(addr)) { - case 0: - reg = its->baser_device_table; - break; - case 1: - reg = its->baser_coll_table; - break; - default: - reg = 0; - break; - } - - return extract_bytes(reg, addr & 7, len); -} - -#define GITS_BASER_RO_MASK (GENMASK_ULL(52, 48) | GENMASK_ULL(58, 56)) -static void vgic_mmio_write_its_baser(struct kvm *kvm, - struct vgic_its *its, - gpa_t addr, unsigned int len, - unsigned long val) -{ - u64 entry_size, device_type; - u64 reg, *regptr, clearbits = 0; - - /* When GITS_CTLR.Enable is 1, we ignore write accesses. */ - if (its->enabled) - return; - - switch (BASER_INDEX(addr)) { - case 0: - regptr = &its->baser_device_table; - entry_size = 8; - device_type = GITS_BASER_TYPE_DEVICE; - break; - case 1: - regptr = &its->baser_coll_table; - entry_size = 8; - device_type = GITS_BASER_TYPE_COLLECTION; - clearbits = GITS_BASER_INDIRECT; - break; - default: - return; - } - - reg = update_64bit_reg(*regptr, addr & 7, len, val); - reg &= ~GITS_BASER_RO_MASK; - reg &= ~clearbits; - - reg |= (entry_size - 1) << GITS_BASER_ENTRY_SIZE_SHIFT; - reg |= device_type << GITS_BASER_TYPE_SHIFT; - reg = vgic_sanitise_its_baser(reg); - - *regptr = reg; -} - -#define REGISTER_ITS_DESC(off, rd, wr, length, acc) \ -{ \ - .reg_offset = off, \ - .len = length, \ - .access_flags = acc, \ - .its_read = rd, \ - .its_write = wr, \ -} - -static void its_mmio_write_wi(struct kvm *kvm, struct vgic_its *its, - gpa_t addr, unsigned int len, unsigned long val) -{ - /* Ignore */ -} - -static struct vgic_register_region its_registers[] = { - REGISTER_ITS_DESC(GITS_CTLR, - vgic_mmio_read_its_ctlr, vgic_mmio_write_its_ctlr, 4, - VGIC_ACCESS_32bit), - REGISTER_ITS_DESC(GITS_IIDR, - vgic_mmio_read_its_iidr, its_mmio_write_wi, 4, - VGIC_ACCESS_32bit), - REGISTER_ITS_DESC(GITS_TYPER, - vgic_mmio_read_its_typer, its_mmio_write_wi, 8, - VGIC_ACCESS_64bit | VGIC_ACCESS_32bit), - REGISTER_ITS_DESC(GITS_CBASER, - vgic_mmio_read_its_cbaser, vgic_mmio_write_its_cbaser, 8, - VGIC_ACCESS_64bit | VGIC_ACCESS_32bit), - REGISTER_ITS_DESC(GITS_CWRITER, - vgic_mmio_read_its_cwriter, vgic_mmio_write_its_cwriter, 8, - VGIC_ACCESS_64bit | VGIC_ACCESS_32bit), - REGISTER_ITS_DESC(GITS_CREADR, - vgic_mmio_read_its_creadr, its_mmio_write_wi, 8, - VGIC_ACCESS_64bit | VGIC_ACCESS_32bit), - REGISTER_ITS_DESC(GITS_BASER, - vgic_mmio_read_its_baser, vgic_mmio_write_its_baser, 0x40, - VGIC_ACCESS_64bit | VGIC_ACCESS_32bit), - REGISTER_ITS_DESC(GITS_IDREGS_BASE, - vgic_mmio_read_its_idregs, its_mmio_write_wi, 0x30, - VGIC_ACCESS_32bit), -}; - -/* This is called on setting the LPI enable bit in the redistributor. */ -void vgic_enable_lpis(struct kvm_vcpu *vcpu) -{ - if (!(vcpu->arch.vgic_cpu.pendbaser & GICR_PENDBASER_PTZ)) - its_sync_lpi_pending_table(vcpu); -} - -static int vgic_register_its_iodev(struct kvm *kvm, struct vgic_its *its) -{ - struct vgic_io_device *iodev = &its->iodev; - int ret; - - if (!its->initialized) - return -EBUSY; - - if (IS_VGIC_ADDR_UNDEF(its->vgic_its_base)) - return -ENXIO; - - iodev->regions = its_registers; - iodev->nr_regions = ARRAY_SIZE(its_registers); - kvm_iodevice_init(&iodev->dev, &kvm_io_gic_ops); - - iodev->base_addr = its->vgic_its_base; - iodev->iodev_type = IODEV_ITS; - iodev->its = its; - mutex_lock(&kvm->slots_lock); - ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, iodev->base_addr, - KVM_VGIC_V3_ITS_SIZE, &iodev->dev); - mutex_unlock(&kvm->slots_lock); - - return ret; -} - -#define INITIAL_BASER_VALUE \ - (GIC_BASER_CACHEABILITY(GITS_BASER, INNER, RaWb) | \ - GIC_BASER_CACHEABILITY(GITS_BASER, OUTER, SameAsInner) | \ - GIC_BASER_SHAREABILITY(GITS_BASER, InnerShareable) | \ - ((8ULL - 1) << GITS_BASER_ENTRY_SIZE_SHIFT) | \ - GITS_BASER_PAGE_SIZE_64K) - -#define INITIAL_PROPBASER_VALUE \ - (GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, RaWb) | \ - GIC_BASER_CACHEABILITY(GICR_PROPBASER, OUTER, SameAsInner) | \ - GIC_BASER_SHAREABILITY(GICR_PROPBASER, InnerShareable)) - -static int vgic_its_create(struct kvm_device *dev, u32 type) -{ - struct vgic_its *its; - - if (type != KVM_DEV_TYPE_ARM_VGIC_ITS) - return -ENODEV; - - its = kzalloc(sizeof(struct vgic_its), GFP_KERNEL); - if (!its) - return -ENOMEM; - - mutex_init(&its->its_lock); - mutex_init(&its->cmd_lock); - - its->vgic_its_base = VGIC_ADDR_UNDEF; - - INIT_LIST_HEAD(&its->device_list); - INIT_LIST_HEAD(&its->collection_list); - - dev->kvm->arch.vgic.has_its = true; - its->initialized = false; - its->enabled = false; - its->dev = dev; - - its->baser_device_table = INITIAL_BASER_VALUE | - ((u64)GITS_BASER_TYPE_DEVICE << GITS_BASER_TYPE_SHIFT); - its->baser_coll_table = INITIAL_BASER_VALUE | - ((u64)GITS_BASER_TYPE_COLLECTION << GITS_BASER_TYPE_SHIFT); - dev->kvm->arch.vgic.propbaser = INITIAL_PROPBASER_VALUE; - - dev->private = its; - - return 0; -} - -static void vgic_its_destroy(struct kvm_device *kvm_dev) -{ - struct kvm *kvm = kvm_dev->kvm; - struct vgic_its *its = kvm_dev->private; - struct its_device *dev; - struct its_itte *itte; - struct list_head *dev_cur, *dev_temp; - struct list_head *cur, *temp; - - /* - * We may end up here without the lists ever having been initialized. - * Check this and bail out early to avoid dereferencing a NULL pointer. - */ - if (!its->device_list.next) - return; - - mutex_lock(&its->its_lock); - list_for_each_safe(dev_cur, dev_temp, &its->device_list) { - dev = container_of(dev_cur, struct its_device, dev_list); - list_for_each_safe(cur, temp, &dev->itt_head) { - itte = (container_of(cur, struct its_itte, itte_list)); - its_free_itte(kvm, itte); - } - list_del(dev_cur); - kfree(dev); - } - - list_for_each_safe(cur, temp, &its->collection_list) { - list_del(cur); - kfree(container_of(cur, struct its_collection, coll_list)); - } - mutex_unlock(&its->its_lock); - - kfree(its); -} - -static int vgic_its_has_attr(struct kvm_device *dev, - struct kvm_device_attr *attr) -{ - switch (attr->group) { - case KVM_DEV_ARM_VGIC_GRP_ADDR: - switch (attr->attr) { - case KVM_VGIC_ITS_ADDR_TYPE: - return 0; - } - break; - case KVM_DEV_ARM_VGIC_GRP_CTRL: - switch (attr->attr) { - case KVM_DEV_ARM_VGIC_CTRL_INIT: - return 0; - } - break; - } - return -ENXIO; -} - -static int vgic_its_set_attr(struct kvm_device *dev, - struct kvm_device_attr *attr) -{ - struct vgic_its *its = dev->private; - int ret; - - switch (attr->group) { - case KVM_DEV_ARM_VGIC_GRP_ADDR: { - u64 __user *uaddr = (u64 __user *)(long)attr->addr; - unsigned long type = (unsigned long)attr->attr; - u64 addr; - - if (type != KVM_VGIC_ITS_ADDR_TYPE) - return -ENODEV; - - if (copy_from_user(&addr, uaddr, sizeof(addr))) - return -EFAULT; - - ret = vgic_check_ioaddr(dev->kvm, &its->vgic_its_base, - addr, SZ_64K); - if (ret) - return ret; - - its->vgic_its_base = addr; - - return 0; - } - case KVM_DEV_ARM_VGIC_GRP_CTRL: - switch (attr->attr) { - case KVM_DEV_ARM_VGIC_CTRL_INIT: - its->initialized = true; - - return 0; - } - break; - } - return -ENXIO; -} - -static int vgic_its_get_attr(struct kvm_device *dev, - struct kvm_device_attr *attr) -{ - switch (attr->group) { - case KVM_DEV_ARM_VGIC_GRP_ADDR: { - struct vgic_its *its = dev->private; - u64 addr = its->vgic_its_base; - u64 __user *uaddr = (u64 __user *)(long)attr->addr; - unsigned long type = (unsigned long)attr->attr; - - if (type != KVM_VGIC_ITS_ADDR_TYPE) - return -ENODEV; - - if (copy_to_user(uaddr, &addr, sizeof(addr))) - return -EFAULT; - break; - default: - return -ENXIO; - } - } - - return 0; -} - -static struct kvm_device_ops kvm_arm_vgic_its_ops = { - .name = "kvm-arm-vgic-its", - .create = vgic_its_create, - .destroy = vgic_its_destroy, - .set_attr = vgic_its_set_attr, - .get_attr = vgic_its_get_attr, - .has_attr = vgic_its_has_attr, -}; - -int kvm_vgic_register_its_device(void) -{ - return kvm_register_device_ops(&kvm_arm_vgic_its_ops, - KVM_DEV_TYPE_ARM_VGIC_ITS); -} - -/* - * Registers all ITSes with the kvm_io_bus framework. - * To follow the existing VGIC initialization sequence, this has to be - * done as late as possible, just before the first VCPU runs. - */ -int vgic_register_its_iodevs(struct kvm *kvm) -{ - struct kvm_device *dev; - int ret = 0; - - list_for_each_entry(dev, &kvm->devices, vm_node) { - if (dev->ops != &kvm_arm_vgic_its_ops) - continue; - - ret = vgic_register_its_iodev(kvm, dev->private); - if (ret) - return ret; - /* - * We don't need to care about tearing down previously - * registered ITSes, as the kvm_io_bus framework removes - * them for us if the VM gets destroyed. - */ - } - - return ret; -} diff --git a/virt/kvm/arm/vgic/vgic-kvm-device.c b/virt/kvm/arm/vgic/vgic-kvm-device.c deleted file mode 100644 index ce1f4ed..0000000 --- a/virt/kvm/arm/vgic/vgic-kvm-device.c +++ /dev/null @@ -1,474 +0,0 @@ -/* - * VGIC: KVM DEVICE API - * - * Copyright (C) 2015 ARM Ltd. - * Author: Marc Zyngier <marc.zyngier@arm.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - */ -#include <linux/kvm_host.h> -#include <kvm/arm_vgic.h> -#include <linux/uaccess.h> -#include <asm/kvm_mmu.h> -#include "vgic.h" - -/* common helpers */ - -int vgic_check_ioaddr(struct kvm *kvm, phys_addr_t *ioaddr, - phys_addr_t addr, phys_addr_t alignment) -{ - if (addr & ~KVM_PHYS_MASK) - return -E2BIG; - - if (!IS_ALIGNED(addr, alignment)) - return -EINVAL; - - if (!IS_VGIC_ADDR_UNDEF(*ioaddr)) - return -EEXIST; - - return 0; -} - -/** - * kvm_vgic_addr - set or get vgic VM base addresses - * @kvm: pointer to the vm struct - * @type: the VGIC addr type, one of KVM_VGIC_V[23]_ADDR_TYPE_XXX - * @addr: pointer to address value - * @write: if true set the address in the VM address space, if false read the - * address - * - * Set or get the vgic base addresses for the distributor and the virtual CPU - * interface in the VM physical address space. These addresses are properties - * of the emulated core/SoC and therefore user space initially knows this - * information. - * Check them for sanity (alignment, double assignment). We can't check for - * overlapping regions in case of a virtual GICv3 here, since we don't know - * the number of VCPUs yet, so we defer this check to map_resources(). - */ -int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write) -{ - int r = 0; - struct vgic_dist *vgic = &kvm->arch.vgic; - int type_needed; - phys_addr_t *addr_ptr, alignment; - - mutex_lock(&kvm->lock); - switch (type) { - case KVM_VGIC_V2_ADDR_TYPE_DIST: - type_needed = KVM_DEV_TYPE_ARM_VGIC_V2; - addr_ptr = &vgic->vgic_dist_base; - alignment = SZ_4K; - break; - case KVM_VGIC_V2_ADDR_TYPE_CPU: - type_needed = KVM_DEV_TYPE_ARM_VGIC_V2; - addr_ptr = &vgic->vgic_cpu_base; - alignment = SZ_4K; - break; - case KVM_VGIC_V3_ADDR_TYPE_DIST: - type_needed = KVM_DEV_TYPE_ARM_VGIC_V3; - addr_ptr = &vgic->vgic_dist_base; - alignment = SZ_64K; - break; - case KVM_VGIC_V3_ADDR_TYPE_REDIST: - type_needed = KVM_DEV_TYPE_ARM_VGIC_V3; - addr_ptr = &vgic->vgic_redist_base; - alignment = SZ_64K; - break; - default: - r = -ENODEV; - goto out; - } - - if (vgic->vgic_model != type_needed) { - r = -ENODEV; - goto out; - } - - if (write) { - r = vgic_check_ioaddr(kvm, addr_ptr, *addr, alignment); - if (!r) - *addr_ptr = *addr; - } else { - *addr = *addr_ptr; - } - -out: - mutex_unlock(&kvm->lock); - return r; -} - -static int vgic_set_common_attr(struct kvm_device *dev, - struct kvm_device_attr *attr) -{ - int r; - - switch (attr->group) { - case KVM_DEV_ARM_VGIC_GRP_ADDR: { - u64 __user *uaddr = (u64 __user *)(long)attr->addr; - u64 addr; - unsigned long type = (unsigned long)attr->attr; - - if (copy_from_user(&addr, uaddr, sizeof(addr))) - return -EFAULT; - - r = kvm_vgic_addr(dev->kvm, type, &addr, true); - return (r == -ENODEV) ? -ENXIO : r; - } - case KVM_DEV_ARM_VGIC_GRP_NR_IRQS: { - u32 __user *uaddr = (u32 __user *)(long)attr->addr; - u32 val; - int ret = 0; - - if (get_user(val, uaddr)) - return -EFAULT; - - /* - * We require: - * - at least 32 SPIs on top of the 16 SGIs and 16 PPIs - * - at most 1024 interrupts - * - a multiple of 32 interrupts - */ - if (val < (VGIC_NR_PRIVATE_IRQS + 32) || - val > VGIC_MAX_RESERVED || - (val & 31)) - return -EINVAL; - - mutex_lock(&dev->kvm->lock); - - if (vgic_ready(dev->kvm) || dev->kvm->arch.vgic.nr_spis) - ret = -EBUSY; - else - dev->kvm->arch.vgic.nr_spis = - val - VGIC_NR_PRIVATE_IRQS; - - mutex_unlock(&dev->kvm->lock); - - return ret; - } - case KVM_DEV_ARM_VGIC_GRP_CTRL: { - switch (attr->attr) { - case KVM_DEV_ARM_VGIC_CTRL_INIT: - mutex_lock(&dev->kvm->lock); - r = vgic_init(dev->kvm); - mutex_unlock(&dev->kvm->lock); - return r; - } - break; - } - } - - return -ENXIO; -} - -static int vgic_get_common_attr(struct kvm_device *dev, - struct kvm_device_attr *attr) -{ - int r = -ENXIO; - - switch (attr->group) { - case KVM_DEV_ARM_VGIC_GRP_ADDR: { - u64 __user *uaddr = (u64 __user *)(long)attr->addr; - u64 addr; - unsigned long type = (unsigned long)attr->attr; - - r = kvm_vgic_addr(dev->kvm, type, &addr, false); - if (r) - return (r == -ENODEV) ? -ENXIO : r; - - if (copy_to_user(uaddr, &addr, sizeof(addr))) - return -EFAULT; - break; - } - case KVM_DEV_ARM_VGIC_GRP_NR_IRQS: { - u32 __user *uaddr = (u32 __user *)(long)attr->addr; - - r = put_user(dev->kvm->arch.vgic.nr_spis + - VGIC_NR_PRIVATE_IRQS, uaddr); - break; - } - } - - return r; -} - -static int vgic_create(struct kvm_device *dev, u32 type) -{ - return kvm_vgic_create(dev->kvm, type); -} - -static void vgic_destroy(struct kvm_device *dev) -{ - kfree(dev); -} - -int kvm_register_vgic_device(unsigned long type) -{ - int ret = -ENODEV; - - switch (type) { - case KVM_DEV_TYPE_ARM_VGIC_V2: - ret = kvm_register_device_ops(&kvm_arm_vgic_v2_ops, - KVM_DEV_TYPE_ARM_VGIC_V2); - break; - case KVM_DEV_TYPE_ARM_VGIC_V3: - ret = kvm_register_device_ops(&kvm_arm_vgic_v3_ops, - KVM_DEV_TYPE_ARM_VGIC_V3); - -#ifdef CONFIG_KVM_ARM_VGIC_V3_ITS - if (ret) - break; - ret = kvm_vgic_register_its_device(); -#endif - break; - } - - return ret; -} - -struct vgic_reg_attr { - struct kvm_vcpu *vcpu; - gpa_t addr; -}; - -static int parse_vgic_v2_attr(struct kvm_device *dev, - struct kvm_device_attr *attr, - struct vgic_reg_attr *reg_attr) -{ - int cpuid; - - cpuid = (attr->attr & KVM_DEV_ARM_VGIC_CPUID_MASK) >> - KVM_DEV_ARM_VGIC_CPUID_SHIFT; - - if (cpuid >= atomic_read(&dev->kvm->online_vcpus)) - return -EINVAL; - - reg_attr->vcpu = kvm_get_vcpu(dev->kvm, cpuid); - reg_attr->addr = attr->attr & KVM_DEV_ARM_VGIC_OFFSET_MASK; - - return 0; -} - -/* unlocks vcpus from @vcpu_lock_idx and smaller */ -static void unlock_vcpus(struct kvm *kvm, int vcpu_lock_idx) -{ - struct kvm_vcpu *tmp_vcpu; - - for (; vcpu_lock_idx >= 0; vcpu_lock_idx--) { - tmp_vcpu = kvm_get_vcpu(kvm, vcpu_lock_idx); - mutex_unlock(&tmp_vcpu->mutex); - } -} - -static void unlock_all_vcpus(struct kvm *kvm) -{ - unlock_vcpus(kvm, atomic_read(&kvm->online_vcpus) - 1); -} - -/* Returns true if all vcpus were locked, false otherwise */ -static bool lock_all_vcpus(struct kvm *kvm) -{ - struct kvm_vcpu *tmp_vcpu; - int c; - - /* - * Any time a vcpu is run, vcpu_load is called which tries to grab the - * vcpu->mutex. By grabbing the vcpu->mutex of all VCPUs we ensure - * that no other VCPUs are run and fiddle with the vgic state while we - * access it. - */ - kvm_for_each_vcpu(c, tmp_vcpu, kvm) { - if (!mutex_trylock(&tmp_vcpu->mutex)) { - unlock_vcpus(kvm, c - 1); - return false; - } - } - - return true; -} - -/** - * vgic_attr_regs_access_v2 - allows user space to access VGIC v2 state - * - * @dev: kvm device handle - * @attr: kvm device attribute - * @reg: address the value is read or written - * @is_write: true if userspace is writing a register - */ -static int vgic_attr_regs_access_v2(struct kvm_device *dev, - struct kvm_device_attr *attr, - u32 *reg, bool is_write) -{ - struct vgic_reg_attr reg_attr; - gpa_t addr; - struct kvm_vcpu *vcpu; - int ret; - - ret = parse_vgic_v2_attr(dev, attr, ®_attr); - if (ret) - return ret; - - vcpu = reg_attr.vcpu; - addr = reg_attr.addr; - - mutex_lock(&dev->kvm->lock); - - ret = vgic_init(dev->kvm); - if (ret) - goto out; - - if (!lock_all_vcpus(dev->kvm)) { - ret = -EBUSY; - goto out; - } - - switch (attr->group) { - case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: - ret = vgic_v2_cpuif_uaccess(vcpu, is_write, addr, reg); - break; - case KVM_DEV_ARM_VGIC_GRP_DIST_REGS: - ret = vgic_v2_dist_uaccess(vcpu, is_write, addr, reg); - break; - default: - ret = -EINVAL; - break; - } - - unlock_all_vcpus(dev->kvm); -out: - mutex_unlock(&dev->kvm->lock); - return ret; -} - -static int vgic_v2_set_attr(struct kvm_device *dev, - struct kvm_device_attr *attr) -{ - int ret; - - ret = vgic_set_common_attr(dev, attr); - if (ret != -ENXIO) - return ret; - - switch (attr->group) { - case KVM_DEV_ARM_VGIC_GRP_DIST_REGS: - case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: { - u32 __user *uaddr = (u32 __user *)(long)attr->addr; - u32 reg; - - if (get_user(reg, uaddr)) - return -EFAULT; - - return vgic_attr_regs_access_v2(dev, attr, ®, true); - } - } - - return -ENXIO; -} - -static int vgic_v2_get_attr(struct kvm_device *dev, - struct kvm_device_attr *attr) -{ - int ret; - - ret = vgic_get_common_attr(dev, attr); - if (ret != -ENXIO) - return ret; - - switch (attr->group) { - case KVM_DEV_ARM_VGIC_GRP_DIST_REGS: - case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: { - u32 __user *uaddr = (u32 __user *)(long)attr->addr; - u32 reg = 0; - - ret = vgic_attr_regs_access_v2(dev, attr, ®, false); - if (ret) - return ret; - return put_user(reg, uaddr); - } - } - - return -ENXIO; -} - -static int vgic_v2_has_attr(struct kvm_device *dev, - struct kvm_device_attr *attr) -{ - switch (attr->group) { - case KVM_DEV_ARM_VGIC_GRP_ADDR: - switch (attr->attr) { - case KVM_VGIC_V2_ADDR_TYPE_DIST: - case KVM_VGIC_V2_ADDR_TYPE_CPU: - return 0; - } - break; - case KVM_DEV_ARM_VGIC_GRP_DIST_REGS: - case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: - return vgic_v2_has_attr_regs(dev, attr); - case KVM_DEV_ARM_VGIC_GRP_NR_IRQS: - return 0; - case KVM_DEV_ARM_VGIC_GRP_CTRL: - switch (attr->attr) { - case KVM_DEV_ARM_VGIC_CTRL_INIT: - return 0; - } - } - return -ENXIO; -} - -struct kvm_device_ops kvm_arm_vgic_v2_ops = { - .name = "kvm-arm-vgic-v2", - .create = vgic_create, - .destroy = vgic_destroy, - .set_attr = vgic_v2_set_attr, - .get_attr = vgic_v2_get_attr, - .has_attr = vgic_v2_has_attr, -}; - -static int vgic_v3_set_attr(struct kvm_device *dev, - struct kvm_device_attr *attr) -{ - return vgic_set_common_attr(dev, attr); -} - -static int vgic_v3_get_attr(struct kvm_device *dev, - struct kvm_device_attr *attr) -{ - return vgic_get_common_attr(dev, attr); -} - -static int vgic_v3_has_attr(struct kvm_device *dev, - struct kvm_device_attr *attr) -{ - switch (attr->group) { - case KVM_DEV_ARM_VGIC_GRP_ADDR: - switch (attr->attr) { - case KVM_VGIC_V3_ADDR_TYPE_DIST: - case KVM_VGIC_V3_ADDR_TYPE_REDIST: - return 0; - } - break; - case KVM_DEV_ARM_VGIC_GRP_NR_IRQS: - return 0; - case KVM_DEV_ARM_VGIC_GRP_CTRL: - switch (attr->attr) { - case KVM_DEV_ARM_VGIC_CTRL_INIT: - return 0; - } - } - return -ENXIO; -} - -struct kvm_device_ops kvm_arm_vgic_v3_ops = { - .name = "kvm-arm-vgic-v3", - .create = vgic_create, - .destroy = vgic_destroy, - .set_attr = vgic_v3_set_attr, - .get_attr = vgic_v3_get_attr, - .has_attr = vgic_v3_has_attr, -}; diff --git a/virt/kvm/arm/vgic/vgic-mmio-v2.c b/virt/kvm/arm/vgic/vgic-mmio-v2.c deleted file mode 100644 index b44b359..0000000 --- a/virt/kvm/arm/vgic/vgic-mmio-v2.c +++ /dev/null @@ -1,456 +0,0 @@ -/* - * VGICv2 MMIO handling functions - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - */ - -#include <linux/irqchip/arm-gic.h> -#include <linux/kvm.h> -#include <linux/kvm_host.h> -#include <kvm/iodev.h> -#include <kvm/arm_vgic.h> - -#include "vgic.h" -#include "vgic-mmio.h" - -static unsigned long vgic_mmio_read_v2_misc(struct kvm_vcpu *vcpu, - gpa_t addr, unsigned int len) -{ - u32 value; - - switch (addr & 0x0c) { - case GIC_DIST_CTRL: - value = vcpu->kvm->arch.vgic.enabled ? GICD_ENABLE : 0; - break; - case GIC_DIST_CTR: - value = vcpu->kvm->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS; - value = (value >> 5) - 1; - value |= (atomic_read(&vcpu->kvm->online_vcpus) - 1) << 5; - break; - case GIC_DIST_IIDR: - value = (PRODUCT_ID_KVM << 24) | (IMPLEMENTER_ARM << 0); - break; - default: - return 0; - } - - return value; -} - -static void vgic_mmio_write_v2_misc(struct kvm_vcpu *vcpu, - gpa_t addr, unsigned int len, - unsigned long val) -{ - struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - bool was_enabled = dist->enabled; - - switch (addr & 0x0c) { - case GIC_DIST_CTRL: - dist->enabled = val & GICD_ENABLE; - if (!was_enabled && dist->enabled) - vgic_kick_vcpus(vcpu->kvm); - break; - case GIC_DIST_CTR: - case GIC_DIST_IIDR: - /* Nothing to do */ - return; - } -} - -static void vgic_mmio_write_sgir(struct kvm_vcpu *source_vcpu, - gpa_t addr, unsigned int len, - unsigned long val) -{ - int nr_vcpus = atomic_read(&source_vcpu->kvm->online_vcpus); - int intid = val & 0xf; - int targets = (val >> 16) & 0xff; - int mode = (val >> 24) & 0x03; - int c; - struct kvm_vcpu *vcpu; - - switch (mode) { - case 0x0: /* as specified by targets */ - break; - case 0x1: - targets = (1U << nr_vcpus) - 1; /* all, ... */ - targets &= ~(1U << source_vcpu->vcpu_id); /* but self */ - break; - case 0x2: /* this very vCPU only */ - targets = (1U << source_vcpu->vcpu_id); - break; - case 0x3: /* reserved */ - return; - } - - kvm_for_each_vcpu(c, vcpu, source_vcpu->kvm) { - struct vgic_irq *irq; - - if (!(targets & (1U << c))) - continue; - - irq = vgic_get_irq(source_vcpu->kvm, vcpu, intid); - - spin_lock(&irq->irq_lock); - irq->pending = true; - irq->source |= 1U << source_vcpu->vcpu_id; - - vgic_queue_irq_unlock(source_vcpu->kvm, irq); - vgic_put_irq(source_vcpu->kvm, irq); - } -} - -static unsigned long vgic_mmio_read_target(struct kvm_vcpu *vcpu, - gpa_t addr, unsigned int len) -{ - u32 intid = VGIC_ADDR_TO_INTID(addr, 8); - int i; - u64 val = 0; - - for (i = 0; i < len; i++) { - struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); - - val |= (u64)irq->targets << (i * 8); - - vgic_put_irq(vcpu->kvm, irq); - } - - return val; -} - -static void vgic_mmio_write_target(struct kvm_vcpu *vcpu, - gpa_t addr, unsigned int len, - unsigned long val) -{ - u32 intid = VGIC_ADDR_TO_INTID(addr, 8); - int i; - - /* GICD_ITARGETSR[0-7] are read-only */ - if (intid < VGIC_NR_PRIVATE_IRQS) - return; - - for (i = 0; i < len; i++) { - struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, NULL, intid + i); - int target; - - spin_lock(&irq->irq_lock); - - irq->targets = (val >> (i * 8)) & 0xff; - target = irq->targets ? __ffs(irq->targets) : 0; - irq->target_vcpu = kvm_get_vcpu(vcpu->kvm, target); - - spin_unlock(&irq->irq_lock); - vgic_put_irq(vcpu->kvm, irq); - } -} - -static unsigned long vgic_mmio_read_sgipend(struct kvm_vcpu *vcpu, - gpa_t addr, unsigned int len) -{ - u32 intid = addr & 0x0f; - int i; - u64 val = 0; - - for (i = 0; i < len; i++) { - struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); - - val |= (u64)irq->source << (i * 8); - - vgic_put_irq(vcpu->kvm, irq); - } - return val; -} - -static void vgic_mmio_write_sgipendc(struct kvm_vcpu *vcpu, - gpa_t addr, unsigned int len, - unsigned long val) -{ - u32 intid = addr & 0x0f; - int i; - - for (i = 0; i < len; i++) { - struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); - - spin_lock(&irq->irq_lock); - - irq->source &= ~((val >> (i * 8)) & 0xff); - if (!irq->source) - irq->pending = false; - - spin_unlock(&irq->irq_lock); - vgic_put_irq(vcpu->kvm, irq); - } -} - -static void vgic_mmio_write_sgipends(struct kvm_vcpu *vcpu, - gpa_t addr, unsigned int len, - unsigned long val) -{ - u32 intid = addr & 0x0f; - int i; - - for (i = 0; i < len; i++) { - struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); - - spin_lock(&irq->irq_lock); - - irq->source |= (val >> (i * 8)) & 0xff; - - if (irq->source) { - irq->pending = true; - vgic_queue_irq_unlock(vcpu->kvm, irq); - } else { - spin_unlock(&irq->irq_lock); - } - vgic_put_irq(vcpu->kvm, irq); - } -} - -static void vgic_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr) -{ - if (kvm_vgic_global_state.type == VGIC_V2) - vgic_v2_set_vmcr(vcpu, vmcr); - else - vgic_v3_set_vmcr(vcpu, vmcr); -} - -static void vgic_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr) -{ - if (kvm_vgic_global_state.type == VGIC_V2) - vgic_v2_get_vmcr(vcpu, vmcr); - else - vgic_v3_get_vmcr(vcpu, vmcr); -} - -#define GICC_ARCH_VERSION_V2 0x2 - -/* These are for userland accesses only, there is no guest-facing emulation. */ -static unsigned long vgic_mmio_read_vcpuif(struct kvm_vcpu *vcpu, - gpa_t addr, unsigned int len) -{ - struct vgic_vmcr vmcr; - u32 val; - - vgic_get_vmcr(vcpu, &vmcr); - - switch (addr & 0xff) { - case GIC_CPU_CTRL: - val = vmcr.ctlr; - break; - case GIC_CPU_PRIMASK: - val = vmcr.pmr; - break; - case GIC_CPU_BINPOINT: - val = vmcr.bpr; - break; - case GIC_CPU_ALIAS_BINPOINT: - val = vmcr.abpr; - break; - case GIC_CPU_IDENT: - val = ((PRODUCT_ID_KVM << 20) | - (GICC_ARCH_VERSION_V2 << 16) | - IMPLEMENTER_ARM); - break; - default: - return 0; - } - - return val; -} - -static void vgic_mmio_write_vcpuif(struct kvm_vcpu *vcpu, - gpa_t addr, unsigned int len, - unsigned long val) -{ - struct vgic_vmcr vmcr; - - vgic_get_vmcr(vcpu, &vmcr); - - switch (addr & 0xff) { - case GIC_CPU_CTRL: - vmcr.ctlr = val; - break; - case GIC_CPU_PRIMASK: - vmcr.pmr = val; - break; - case GIC_CPU_BINPOINT: - vmcr.bpr = val; - break; - case GIC_CPU_ALIAS_BINPOINT: - vmcr.abpr = val; - break; - } - - vgic_set_vmcr(vcpu, &vmcr); -} - -static const struct vgic_register_region vgic_v2_dist_registers[] = { - REGISTER_DESC_WITH_LENGTH(GIC_DIST_CTRL, - vgic_mmio_read_v2_misc, vgic_mmio_write_v2_misc, 12, - VGIC_ACCESS_32bit), - REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_IGROUP, - vgic_mmio_read_rao, vgic_mmio_write_wi, 1, - VGIC_ACCESS_32bit), - REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_ENABLE_SET, - vgic_mmio_read_enable, vgic_mmio_write_senable, 1, - VGIC_ACCESS_32bit), - REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_ENABLE_CLEAR, - vgic_mmio_read_enable, vgic_mmio_write_cenable, 1, - VGIC_ACCESS_32bit), - REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_PENDING_SET, - vgic_mmio_read_pending, vgic_mmio_write_spending, 1, - VGIC_ACCESS_32bit), - REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_PENDING_CLEAR, - vgic_mmio_read_pending, vgic_mmio_write_cpending, 1, - VGIC_ACCESS_32bit), - REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_ACTIVE_SET, - vgic_mmio_read_active, vgic_mmio_write_sactive, 1, - VGIC_ACCESS_32bit), - REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_ACTIVE_CLEAR, - vgic_mmio_read_active, vgic_mmio_write_cactive, 1, - VGIC_ACCESS_32bit), - REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_PRI, - vgic_mmio_read_priority, vgic_mmio_write_priority, 8, - VGIC_ACCESS_32bit | VGIC_ACCESS_8bit), - REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_TARGET, - vgic_mmio_read_target, vgic_mmio_write_target, 8, - VGIC_ACCESS_32bit | VGIC_ACCESS_8bit), - REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_CONFIG, - vgic_mmio_read_config, vgic_mmio_write_config, 2, - VGIC_ACCESS_32bit), - REGISTER_DESC_WITH_LENGTH(GIC_DIST_SOFTINT, - vgic_mmio_read_raz, vgic_mmio_write_sgir, 4, - VGIC_ACCESS_32bit), - REGISTER_DESC_WITH_LENGTH(GIC_DIST_SGI_PENDING_CLEAR, - vgic_mmio_read_sgipend, vgic_mmio_write_sgipendc, 16, - VGIC_ACCESS_32bit | VGIC_ACCESS_8bit), - REGISTER_DESC_WITH_LENGTH(GIC_DIST_SGI_PENDING_SET, - vgic_mmio_read_sgipend, vgic_mmio_write_sgipends, 16, - VGIC_ACCESS_32bit | VGIC_ACCESS_8bit), -}; - -static const struct vgic_register_region vgic_v2_cpu_registers[] = { - REGISTER_DESC_WITH_LENGTH(GIC_CPU_CTRL, - vgic_mmio_read_vcpuif, vgic_mmio_write_vcpuif, 4, - VGIC_ACCESS_32bit), - REGISTER_DESC_WITH_LENGTH(GIC_CPU_PRIMASK, - vgic_mmio_read_vcpuif, vgic_mmio_write_vcpuif, 4, - VGIC_ACCESS_32bit), - REGISTER_DESC_WITH_LENGTH(GIC_CPU_BINPOINT, - vgic_mmio_read_vcpuif, vgic_mmio_write_vcpuif, 4, - VGIC_ACCESS_32bit), - REGISTER_DESC_WITH_LENGTH(GIC_CPU_ALIAS_BINPOINT, - vgic_mmio_read_vcpuif, vgic_mmio_write_vcpuif, 4, - VGIC_ACCESS_32bit), - REGISTER_DESC_WITH_LENGTH(GIC_CPU_ACTIVEPRIO, - vgic_mmio_read_raz, vgic_mmio_write_wi, 16, - VGIC_ACCESS_32bit), - REGISTER_DESC_WITH_LENGTH(GIC_CPU_IDENT, - vgic_mmio_read_vcpuif, vgic_mmio_write_vcpuif, 4, - VGIC_ACCESS_32bit), -}; - -unsigned int vgic_v2_init_dist_iodev(struct vgic_io_device *dev) -{ - dev->regions = vgic_v2_dist_registers; - dev->nr_regions = ARRAY_SIZE(vgic_v2_dist_registers); - - kvm_iodevice_init(&dev->dev, &kvm_io_gic_ops); - - return SZ_4K; -} - -int vgic_v2_has_attr_regs(struct kvm_device *dev, struct kvm_device_attr *attr) -{ - int nr_irqs = dev->kvm->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS; - const struct vgic_register_region *regions; - gpa_t addr; - int nr_regions, i, len; - - addr = attr->attr & KVM_DEV_ARM_VGIC_OFFSET_MASK; - - switch (attr->group) { - case KVM_DEV_ARM_VGIC_GRP_DIST_REGS: - regions = vgic_v2_dist_registers; - nr_regions = ARRAY_SIZE(vgic_v2_dist_registers); - break; - case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: - regions = vgic_v2_cpu_registers; - nr_regions = ARRAY_SIZE(vgic_v2_cpu_registers); - break; - default: - return -ENXIO; - } - - /* We only support aligned 32-bit accesses. */ - if (addr & 3) - return -ENXIO; - - for (i = 0; i < nr_regions; i++) { - if (regions[i].bits_per_irq) - len = (regions[i].bits_per_irq * nr_irqs) / 8; - else - len = regions[i].len; - - if (regions[i].reg_offset <= addr && - regions[i].reg_offset + len > addr) - return 0; - } - - return -ENXIO; -} - -/* - * When userland tries to access the VGIC register handlers, we need to - * create a usable struct vgic_io_device to be passed to the handlers and we - * have to set up a buffer similar to what would have happened if a guest MMIO - * access occurred, including doing endian conversions on BE systems. - */ -static int vgic_uaccess(struct kvm_vcpu *vcpu, struct vgic_io_device *dev, - bool is_write, int offset, u32 *val) -{ - unsigned int len = 4; - u8 buf[4]; - int ret; - - if (is_write) { - vgic_data_host_to_mmio_bus(buf, len, *val); - ret = kvm_io_gic_ops.write(vcpu, &dev->dev, offset, len, buf); - } else { - ret = kvm_io_gic_ops.read(vcpu, &dev->dev, offset, len, buf); - if (!ret) - *val = vgic_data_mmio_bus_to_host(buf, len); - } - - return ret; -} - -int vgic_v2_cpuif_uaccess(struct kvm_vcpu *vcpu, bool is_write, - int offset, u32 *val) -{ - struct vgic_io_device dev = { - .regions = vgic_v2_cpu_registers, - .nr_regions = ARRAY_SIZE(vgic_v2_cpu_registers), - .iodev_type = IODEV_CPUIF, - }; - - return vgic_uaccess(vcpu, &dev, is_write, offset, val); -} - -int vgic_v2_dist_uaccess(struct kvm_vcpu *vcpu, bool is_write, - int offset, u32 *val) -{ - struct vgic_io_device dev = { - .regions = vgic_v2_dist_registers, - .nr_regions = ARRAY_SIZE(vgic_v2_dist_registers), - .iodev_type = IODEV_DIST, - }; - - return vgic_uaccess(vcpu, &dev, is_write, offset, val); -} diff --git a/virt/kvm/arm/vgic/vgic-mmio-v3.c b/virt/kvm/arm/vgic/vgic-mmio-v3.c deleted file mode 100644 index 0d3c76a..0000000 --- a/virt/kvm/arm/vgic/vgic-mmio-v3.c +++ /dev/null @@ -1,656 +0,0 @@ -/* - * VGICv3 MMIO handling functions - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - */ - -#include <linux/irqchip/arm-gic-v3.h> -#include <linux/kvm.h> -#include <linux/kvm_host.h> -#include <kvm/iodev.h> -#include <kvm/arm_vgic.h> - -#include <asm/kvm_emulate.h> - -#include "vgic.h" -#include "vgic-mmio.h" - -/* extract @num bytes at @offset bytes offset in data */ -unsigned long extract_bytes(u64 data, unsigned int offset, - unsigned int num) -{ - return (data >> (offset * 8)) & GENMASK_ULL(num * 8 - 1, 0); -} - -/* allows updates of any half of a 64-bit register (or the whole thing) */ -u64 update_64bit_reg(u64 reg, unsigned int offset, unsigned int len, - unsigned long val) -{ - int lower = (offset & 4) * 8; - int upper = lower + 8 * len - 1; - - reg &= ~GENMASK_ULL(upper, lower); - val &= GENMASK_ULL(len * 8 - 1, 0); - - return reg | ((u64)val << lower); -} - -#ifdef CONFIG_KVM_ARM_VGIC_V3_ITS -bool vgic_has_its(struct kvm *kvm) -{ - struct vgic_dist *dist = &kvm->arch.vgic; - - if (dist->vgic_model != KVM_DEV_TYPE_ARM_VGIC_V3) - return false; - - return dist->has_its; -} -#endif - -static unsigned long vgic_mmio_read_v3_misc(struct kvm_vcpu *vcpu, - gpa_t addr, unsigned int len) -{ - u32 value = 0; - - switch (addr & 0x0c) { - case GICD_CTLR: - if (vcpu->kvm->arch.vgic.enabled) - value |= GICD_CTLR_ENABLE_SS_G1; - value |= GICD_CTLR_ARE_NS | GICD_CTLR_DS; - break; - case GICD_TYPER: - value = vcpu->kvm->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS; - value = (value >> 5) - 1; - if (vgic_has_its(vcpu->kvm)) { - value |= (INTERRUPT_ID_BITS_ITS - 1) << 19; - value |= GICD_TYPER_LPIS; - } else { - value |= (INTERRUPT_ID_BITS_SPIS - 1) << 19; - } - break; - case GICD_IIDR: - value = (PRODUCT_ID_KVM << 24) | (IMPLEMENTER_ARM << 0); - break; - default: - return 0; - } - - return value; -} - -static void vgic_mmio_write_v3_misc(struct kvm_vcpu *vcpu, - gpa_t addr, unsigned int len, - unsigned long val) -{ - struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - bool was_enabled = dist->enabled; - - switch (addr & 0x0c) { - case GICD_CTLR: - dist->enabled = val & GICD_CTLR_ENABLE_SS_G1; - - if (!was_enabled && dist->enabled) - vgic_kick_vcpus(vcpu->kvm); - break; - case GICD_TYPER: - case GICD_IIDR: - return; - } -} - -static unsigned long vgic_mmio_read_irouter(struct kvm_vcpu *vcpu, - gpa_t addr, unsigned int len) -{ - int intid = VGIC_ADDR_TO_INTID(addr, 64); - struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, NULL, intid); - unsigned long ret = 0; - - if (!irq) - return 0; - - /* The upper word is RAZ for us. */ - if (!(addr & 4)) - ret = extract_bytes(READ_ONCE(irq->mpidr), addr & 7, len); - - vgic_put_irq(vcpu->kvm, irq); - return ret; -} - -static void vgic_mmio_write_irouter(struct kvm_vcpu *vcpu, - gpa_t addr, unsigned int len, - unsigned long val) -{ - int intid = VGIC_ADDR_TO_INTID(addr, 64); - struct vgic_irq *irq; - - /* The upper word is WI for us since we don't implement Aff3. */ - if (addr & 4) - return; - - irq = vgic_get_irq(vcpu->kvm, NULL, intid); - - if (!irq) - return; - - spin_lock(&irq->irq_lock); - - /* We only care about and preserve Aff0, Aff1 and Aff2. */ - irq->mpidr = val & GENMASK(23, 0); - irq->target_vcpu = kvm_mpidr_to_vcpu(vcpu->kvm, irq->mpidr); - - spin_unlock(&irq->irq_lock); - vgic_put_irq(vcpu->kvm, irq); -} - -static unsigned long vgic_mmio_read_v3r_ctlr(struct kvm_vcpu *vcpu, - gpa_t addr, unsigned int len) -{ - struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; - - return vgic_cpu->lpis_enabled ? GICR_CTLR_ENABLE_LPIS : 0; -} - - -static void vgic_mmio_write_v3r_ctlr(struct kvm_vcpu *vcpu, - gpa_t addr, unsigned int len, - unsigned long val) -{ - struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; - bool was_enabled = vgic_cpu->lpis_enabled; - - if (!vgic_has_its(vcpu->kvm)) - return; - - vgic_cpu->lpis_enabled = val & GICR_CTLR_ENABLE_LPIS; - - if (!was_enabled && vgic_cpu->lpis_enabled) - vgic_enable_lpis(vcpu); -} - -static unsigned long vgic_mmio_read_v3r_typer(struct kvm_vcpu *vcpu, - gpa_t addr, unsigned int len) -{ - unsigned long mpidr = kvm_vcpu_get_mpidr_aff(vcpu); - int target_vcpu_id = vcpu->vcpu_id; - u64 value; - - value = (u64)(mpidr & GENMASK(23, 0)) << 32; - value |= ((target_vcpu_id & 0xffff) << 8); - if (target_vcpu_id == atomic_read(&vcpu->kvm->online_vcpus) - 1) - value |= GICR_TYPER_LAST; - if (vgic_has_its(vcpu->kvm)) - value |= GICR_TYPER_PLPIS; - - return extract_bytes(value, addr & 7, len); -} - -static unsigned long vgic_mmio_read_v3r_iidr(struct kvm_vcpu *vcpu, - gpa_t addr, unsigned int len) -{ - return (PRODUCT_ID_KVM << 24) | (IMPLEMENTER_ARM << 0); -} - -static unsigned long vgic_mmio_read_v3_idregs(struct kvm_vcpu *vcpu, - gpa_t addr, unsigned int len) -{ - switch (addr & 0xffff) { - case GICD_PIDR2: - /* report a GICv3 compliant implementation */ - return 0x3b; - } - - return 0; -} - -/* We want to avoid outer shareable. */ -u64 vgic_sanitise_shareability(u64 field) -{ - switch (field) { - case GIC_BASER_OuterShareable: - return GIC_BASER_InnerShareable; - default: - return field; - } -} - -/* Avoid any inner non-cacheable mapping. */ -u64 vgic_sanitise_inner_cacheability(u64 field) -{ - switch (field) { - case GIC_BASER_CACHE_nCnB: - case GIC_BASER_CACHE_nC: - return GIC_BASER_CACHE_RaWb; - default: - return field; - } -} - -/* Non-cacheable or same-as-inner are OK. */ -u64 vgic_sanitise_outer_cacheability(u64 field) -{ - switch (field) { - case GIC_BASER_CACHE_SameAsInner: - case GIC_BASER_CACHE_nC: - return field; - default: - return GIC_BASER_CACHE_nC; - } -} - -u64 vgic_sanitise_field(u64 reg, u64 field_mask, int field_shift, - u64 (*sanitise_fn)(u64)) -{ - u64 field = (reg & field_mask) >> field_shift; - - field = sanitise_fn(field) << field_shift; - return (reg & ~field_mask) | field; -} - -#define PROPBASER_RES0_MASK \ - (GENMASK_ULL(63, 59) | GENMASK_ULL(55, 52) | GENMASK_ULL(6, 5)) -#define PENDBASER_RES0_MASK \ - (BIT_ULL(63) | GENMASK_ULL(61, 59) | GENMASK_ULL(55, 52) | \ - GENMASK_ULL(15, 12) | GENMASK_ULL(6, 0)) - -static u64 vgic_sanitise_pendbaser(u64 reg) -{ - reg = vgic_sanitise_field(reg, GICR_PENDBASER_SHAREABILITY_MASK, - GICR_PENDBASER_SHAREABILITY_SHIFT, - vgic_sanitise_shareability); - reg = vgic_sanitise_field(reg, GICR_PENDBASER_INNER_CACHEABILITY_MASK, - GICR_PENDBASER_INNER_CACHEABILITY_SHIFT, - vgic_sanitise_inner_cacheability); - reg = vgic_sanitise_field(reg, GICR_PENDBASER_OUTER_CACHEABILITY_MASK, - GICR_PENDBASER_OUTER_CACHEABILITY_SHIFT, - vgic_sanitise_outer_cacheability); - - reg &= ~PENDBASER_RES0_MASK; - reg &= ~GENMASK_ULL(51, 48); - - return reg; -} - -static u64 vgic_sanitise_propbaser(u64 reg) -{ - reg = vgic_sanitise_field(reg, GICR_PROPBASER_SHAREABILITY_MASK, - GICR_PROPBASER_SHAREABILITY_SHIFT, - vgic_sanitise_shareability); - reg = vgic_sanitise_field(reg, GICR_PROPBASER_INNER_CACHEABILITY_MASK, - GICR_PROPBASER_INNER_CACHEABILITY_SHIFT, - vgic_sanitise_inner_cacheability); - reg = vgic_sanitise_field(reg, GICR_PROPBASER_OUTER_CACHEABILITY_MASK, - GICR_PROPBASER_OUTER_CACHEABILITY_SHIFT, - vgic_sanitise_outer_cacheability); - - reg &= ~PROPBASER_RES0_MASK; - reg &= ~GENMASK_ULL(51, 48); - return reg; -} - -static unsigned long vgic_mmio_read_propbase(struct kvm_vcpu *vcpu, - gpa_t addr, unsigned int len) -{ - struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - - return extract_bytes(dist->propbaser, addr & 7, len); -} - -static void vgic_mmio_write_propbase(struct kvm_vcpu *vcpu, - gpa_t addr, unsigned int len, - unsigned long val) -{ - struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; - u64 old_propbaser, propbaser; - - /* Storing a value with LPIs already enabled is undefined */ - if (vgic_cpu->lpis_enabled) - return; - - do { - old_propbaser = dist->propbaser; - propbaser = old_propbaser; - propbaser = update_64bit_reg(propbaser, addr & 4, len, val); - propbaser = vgic_sanitise_propbaser(propbaser); - } while (cmpxchg64(&dist->propbaser, old_propbaser, - propbaser) != old_propbaser); -} - -static unsigned long vgic_mmio_read_pendbase(struct kvm_vcpu *vcpu, - gpa_t addr, unsigned int len) -{ - struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; - - return extract_bytes(vgic_cpu->pendbaser, addr & 7, len); -} - -static void vgic_mmio_write_pendbase(struct kvm_vcpu *vcpu, - gpa_t addr, unsigned int len, - unsigned long val) -{ - struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; - u64 old_pendbaser, pendbaser; - - /* Storing a value with LPIs already enabled is undefined */ - if (vgic_cpu->lpis_enabled) - return; - - do { - old_pendbaser = vgic_cpu->pendbaser; - pendbaser = old_pendbaser; - pendbaser = update_64bit_reg(pendbaser, addr & 4, len, val); - pendbaser = vgic_sanitise_pendbaser(pendbaser); - } while (cmpxchg64(&vgic_cpu->pendbaser, old_pendbaser, - pendbaser) != old_pendbaser); -} - -/* - * The GICv3 per-IRQ registers are split to control PPIs and SGIs in the - * redistributors, while SPIs are covered by registers in the distributor - * block. Trying to set private IRQs in this block gets ignored. - * We take some special care here to fix the calculation of the register - * offset. - */ -#define REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(off, rd, wr, bpi, acc) \ - { \ - .reg_offset = off, \ - .bits_per_irq = bpi, \ - .len = (bpi * VGIC_NR_PRIVATE_IRQS) / 8, \ - .access_flags = acc, \ - .read = vgic_mmio_read_raz, \ - .write = vgic_mmio_write_wi, \ - }, { \ - .reg_offset = off + (bpi * VGIC_NR_PRIVATE_IRQS) / 8, \ - .bits_per_irq = bpi, \ - .len = (bpi * (1024 - VGIC_NR_PRIVATE_IRQS)) / 8, \ - .access_flags = acc, \ - .read = rd, \ - .write = wr, \ - } - -static const struct vgic_register_region vgic_v3_dist_registers[] = { - REGISTER_DESC_WITH_LENGTH(GICD_CTLR, - vgic_mmio_read_v3_misc, vgic_mmio_write_v3_misc, 16, - VGIC_ACCESS_32bit), - REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_IGROUPR, - vgic_mmio_read_rao, vgic_mmio_write_wi, 1, - VGIC_ACCESS_32bit), - REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ISENABLER, - vgic_mmio_read_enable, vgic_mmio_write_senable, 1, - VGIC_ACCESS_32bit), - REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ICENABLER, - vgic_mmio_read_enable, vgic_mmio_write_cenable, 1, - VGIC_ACCESS_32bit), - REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ISPENDR, - vgic_mmio_read_pending, vgic_mmio_write_spending, 1, - VGIC_ACCESS_32bit), - REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ICPENDR, - vgic_mmio_read_pending, vgic_mmio_write_cpending, 1, - VGIC_ACCESS_32bit), - REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ISACTIVER, - vgic_mmio_read_active, vgic_mmio_write_sactive, 1, - VGIC_ACCESS_32bit), - REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ICACTIVER, - vgic_mmio_read_active, vgic_mmio_write_cactive, 1, - VGIC_ACCESS_32bit), - REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_IPRIORITYR, - vgic_mmio_read_priority, vgic_mmio_write_priority, 8, - VGIC_ACCESS_32bit | VGIC_ACCESS_8bit), - REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ITARGETSR, - vgic_mmio_read_raz, vgic_mmio_write_wi, 8, - VGIC_ACCESS_32bit | VGIC_ACCESS_8bit), - REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ICFGR, - vgic_mmio_read_config, vgic_mmio_write_config, 2, - VGIC_ACCESS_32bit), - REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_IGRPMODR, - vgic_mmio_read_raz, vgic_mmio_write_wi, 1, - VGIC_ACCESS_32bit), - REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_IROUTER, - vgic_mmio_read_irouter, vgic_mmio_write_irouter, 64, - VGIC_ACCESS_64bit | VGIC_ACCESS_32bit), - REGISTER_DESC_WITH_LENGTH(GICD_IDREGS, - vgic_mmio_read_v3_idregs, vgic_mmio_write_wi, 48, - VGIC_ACCESS_32bit), -}; - -static const struct vgic_register_region vgic_v3_rdbase_registers[] = { - REGISTER_DESC_WITH_LENGTH(GICR_CTLR, - vgic_mmio_read_v3r_ctlr, vgic_mmio_write_v3r_ctlr, 4, - VGIC_ACCESS_32bit), - REGISTER_DESC_WITH_LENGTH(GICR_IIDR, - vgic_mmio_read_v3r_iidr, vgic_mmio_write_wi, 4, - VGIC_ACCESS_32bit), - REGISTER_DESC_WITH_LENGTH(GICR_TYPER, - vgic_mmio_read_v3r_typer, vgic_mmio_write_wi, 8, - VGIC_ACCESS_64bit | VGIC_ACCESS_32bit), - REGISTER_DESC_WITH_LENGTH(GICR_PROPBASER, - vgic_mmio_read_propbase, vgic_mmio_write_propbase, 8, - VGIC_ACCESS_64bit | VGIC_ACCESS_32bit), - REGISTER_DESC_WITH_LENGTH(GICR_PENDBASER, - vgic_mmio_read_pendbase, vgic_mmio_write_pendbase, 8, - VGIC_ACCESS_64bit | VGIC_ACCESS_32bit), - REGISTER_DESC_WITH_LENGTH(GICR_IDREGS, - vgic_mmio_read_v3_idregs, vgic_mmio_write_wi, 48, - VGIC_ACCESS_32bit), -}; - -static const struct vgic_register_region vgic_v3_sgibase_registers[] = { - REGISTER_DESC_WITH_LENGTH(GICR_IGROUPR0, - vgic_mmio_read_rao, vgic_mmio_write_wi, 4, - VGIC_ACCESS_32bit), - REGISTER_DESC_WITH_LENGTH(GICR_ISENABLER0, - vgic_mmio_read_enable, vgic_mmio_write_senable, 4, - VGIC_ACCESS_32bit), - REGISTER_DESC_WITH_LENGTH(GICR_ICENABLER0, - vgic_mmio_read_enable, vgic_mmio_write_cenable, 4, - VGIC_ACCESS_32bit), - REGISTER_DESC_WITH_LENGTH(GICR_ISPENDR0, - vgic_mmio_read_pending, vgic_mmio_write_spending, 4, - VGIC_ACCESS_32bit), - REGISTER_DESC_WITH_LENGTH(GICR_ICPENDR0, - vgic_mmio_read_pending, vgic_mmio_write_cpending, 4, - VGIC_ACCESS_32bit), - REGISTER_DESC_WITH_LENGTH(GICR_ISACTIVER0, - vgic_mmio_read_active, vgic_mmio_write_sactive, 4, - VGIC_ACCESS_32bit), - REGISTER_DESC_WITH_LENGTH(GICR_ICACTIVER0, - vgic_mmio_read_active, vgic_mmio_write_cactive, 4, - VGIC_ACCESS_32bit), - REGISTER_DESC_WITH_LENGTH(GICR_IPRIORITYR0, - vgic_mmio_read_priority, vgic_mmio_write_priority, 32, - VGIC_ACCESS_32bit | VGIC_ACCESS_8bit), - REGISTER_DESC_WITH_LENGTH(GICR_ICFGR0, - vgic_mmio_read_config, vgic_mmio_write_config, 8, - VGIC_ACCESS_32bit), - REGISTER_DESC_WITH_LENGTH(GICR_IGRPMODR0, - vgic_mmio_read_raz, vgic_mmio_write_wi, 4, - VGIC_ACCESS_32bit), - REGISTER_DESC_WITH_LENGTH(GICR_NSACR, - vgic_mmio_read_raz, vgic_mmio_write_wi, 4, - VGIC_ACCESS_32bit), -}; - -unsigned int vgic_v3_init_dist_iodev(struct vgic_io_device *dev) -{ - dev->regions = vgic_v3_dist_registers; - dev->nr_regions = ARRAY_SIZE(vgic_v3_dist_registers); - - kvm_iodevice_init(&dev->dev, &kvm_io_gic_ops); - - return SZ_64K; -} - -int vgic_register_redist_iodevs(struct kvm *kvm, gpa_t redist_base_address) -{ - struct kvm_vcpu *vcpu; - int c, ret = 0; - - kvm_for_each_vcpu(c, vcpu, kvm) { - gpa_t rd_base = redist_base_address + c * SZ_64K * 2; - gpa_t sgi_base = rd_base + SZ_64K; - struct vgic_io_device *rd_dev = &vcpu->arch.vgic_cpu.rd_iodev; - struct vgic_io_device *sgi_dev = &vcpu->arch.vgic_cpu.sgi_iodev; - - kvm_iodevice_init(&rd_dev->dev, &kvm_io_gic_ops); - rd_dev->base_addr = rd_base; - rd_dev->iodev_type = IODEV_REDIST; - rd_dev->regions = vgic_v3_rdbase_registers; - rd_dev->nr_regions = ARRAY_SIZE(vgic_v3_rdbase_registers); - rd_dev->redist_vcpu = vcpu; - - mutex_lock(&kvm->slots_lock); - ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, rd_base, - SZ_64K, &rd_dev->dev); - mutex_unlock(&kvm->slots_lock); - - if (ret) - break; - - kvm_iodevice_init(&sgi_dev->dev, &kvm_io_gic_ops); - sgi_dev->base_addr = sgi_base; - sgi_dev->iodev_type = IODEV_REDIST; - sgi_dev->regions = vgic_v3_sgibase_registers; - sgi_dev->nr_regions = ARRAY_SIZE(vgic_v3_sgibase_registers); - sgi_dev->redist_vcpu = vcpu; - - mutex_lock(&kvm->slots_lock); - ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, sgi_base, - SZ_64K, &sgi_dev->dev); - mutex_unlock(&kvm->slots_lock); - if (ret) { - kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, - &rd_dev->dev); - break; - } - } - - if (ret) { - /* The current c failed, so we start with the previous one. */ - for (c--; c >= 0; c--) { - struct vgic_cpu *vgic_cpu; - - vcpu = kvm_get_vcpu(kvm, c); - vgic_cpu = &vcpu->arch.vgic_cpu; - kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, - &vgic_cpu->rd_iodev.dev); - kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, - &vgic_cpu->sgi_iodev.dev); - } - } - - return ret; -} - -/* - * Compare a given affinity (level 1-3 and a level 0 mask, from the SGI - * generation register ICC_SGI1R_EL1) with a given VCPU. - * If the VCPU's MPIDR matches, return the level0 affinity, otherwise - * return -1. - */ -static int match_mpidr(u64 sgi_aff, u16 sgi_cpu_mask, struct kvm_vcpu *vcpu) -{ - unsigned long affinity; - int level0; - - /* - * Split the current VCPU's MPIDR into affinity level 0 and the - * rest as this is what we have to compare against. - */ - affinity = kvm_vcpu_get_mpidr_aff(vcpu); - level0 = MPIDR_AFFINITY_LEVEL(affinity, 0); - affinity &= ~MPIDR_LEVEL_MASK; - - /* bail out if the upper three levels don't match */ - if (sgi_aff != affinity) - return -1; - - /* Is this VCPU's bit set in the mask ? */ - if (!(sgi_cpu_mask & BIT(level0))) - return -1; - - return level0; -} - -/* - * The ICC_SGI* registers encode the affinity differently from the MPIDR, - * so provide a wrapper to use the existing defines to isolate a certain - * affinity level. - */ -#define SGI_AFFINITY_LEVEL(reg, level) \ - ((((reg) & ICC_SGI1R_AFFINITY_## level ##_MASK) \ - >> ICC_SGI1R_AFFINITY_## level ##_SHIFT) << MPIDR_LEVEL_SHIFT(level)) - -/** - * vgic_v3_dispatch_sgi - handle SGI requests from VCPUs - * @vcpu: The VCPU requesting a SGI - * @reg: The value written into the ICC_SGI1R_EL1 register by that VCPU - * - * With GICv3 (and ARE=1) CPUs trigger SGIs by writing to a system register. - * This will trap in sys_regs.c and call this function. - * This ICC_SGI1R_EL1 register contains the upper three affinity levels of the - * target processors as well as a bitmask of 16 Aff0 CPUs. - * If the interrupt routing mode bit is not set, we iterate over all VCPUs to - * check for matching ones. If this bit is set, we signal all, but not the - * calling VCPU. - */ -void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg) -{ - struct kvm *kvm = vcpu->kvm; - struct kvm_vcpu *c_vcpu; - u16 target_cpus; - u64 mpidr; - int sgi, c; - int vcpu_id = vcpu->vcpu_id; - bool broadcast; - - sgi = (reg & ICC_SGI1R_SGI_ID_MASK) >> ICC_SGI1R_SGI_ID_SHIFT; - broadcast = reg & BIT_ULL(ICC_SGI1R_IRQ_ROUTING_MODE_BIT); - target_cpus = (reg & ICC_SGI1R_TARGET_LIST_MASK) >> ICC_SGI1R_TARGET_LIST_SHIFT; - mpidr = SGI_AFFINITY_LEVEL(reg, 3); - mpidr |= SGI_AFFINITY_LEVEL(reg, 2); - mpidr |= SGI_AFFINITY_LEVEL(reg, 1); - - /* - * We iterate over all VCPUs to find the MPIDRs matching the request. - * If we have handled one CPU, we clear its bit to detect early - * if we are already finished. This avoids iterating through all - * VCPUs when most of the times we just signal a single VCPU. - */ - kvm_for_each_vcpu(c, c_vcpu, kvm) { - struct vgic_irq *irq; - - /* Exit early if we have dealt with all requested CPUs */ - if (!broadcast && target_cpus == 0) - break; - - /* Don't signal the calling VCPU */ - if (broadcast && c == vcpu_id) - continue; - - if (!broadcast) { - int level0; - - level0 = match_mpidr(mpidr, target_cpus, c_vcpu); - if (level0 == -1) - continue; - - /* remove this matching VCPU from the mask */ - target_cpus &= ~BIT(level0); - } - - irq = vgic_get_irq(vcpu->kvm, c_vcpu, sgi); - - spin_lock(&irq->irq_lock); - irq->pending = true; - - vgic_queue_irq_unlock(vcpu->kvm, irq); - vgic_put_irq(vcpu->kvm, irq); - } -} diff --git a/virt/kvm/arm/vgic/vgic-mmio.c b/virt/kvm/arm/vgic/vgic-mmio.c deleted file mode 100644 index ebe1b9f..0000000 --- a/virt/kvm/arm/vgic/vgic-mmio.c +++ /dev/null @@ -1,583 +0,0 @@ -/* - * VGIC MMIO handling functions - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - */ - -#include <linux/bitops.h> -#include <linux/bsearch.h> -#include <linux/kvm.h> -#include <linux/kvm_host.h> -#include <kvm/iodev.h> -#include <kvm/arm_vgic.h> - -#include "vgic.h" -#include "vgic-mmio.h" - -unsigned long vgic_mmio_read_raz(struct kvm_vcpu *vcpu, - gpa_t addr, unsigned int len) -{ - return 0; -} - -unsigned long vgic_mmio_read_rao(struct kvm_vcpu *vcpu, - gpa_t addr, unsigned int len) -{ - return -1UL; -} - -void vgic_mmio_write_wi(struct kvm_vcpu *vcpu, gpa_t addr, - unsigned int len, unsigned long val) -{ - /* Ignore */ -} - -/* - * Read accesses to both GICD_ICENABLER and GICD_ISENABLER return the value - * of the enabled bit, so there is only one function for both here. - */ -unsigned long vgic_mmio_read_enable(struct kvm_vcpu *vcpu, - gpa_t addr, unsigned int len) -{ - u32 intid = VGIC_ADDR_TO_INTID(addr, 1); - u32 value = 0; - int i; - - /* Loop over all IRQs affected by this read */ - for (i = 0; i < len * 8; i++) { - struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); - - if (irq->enabled) - value |= (1U << i); - - vgic_put_irq(vcpu->kvm, irq); - } - - return value; -} - -void vgic_mmio_write_senable(struct kvm_vcpu *vcpu, - gpa_t addr, unsigned int len, - unsigned long val) -{ - u32 intid = VGIC_ADDR_TO_INTID(addr, 1); - int i; - - for_each_set_bit(i, &val, len * 8) { - struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); - - spin_lock(&irq->irq_lock); - irq->enabled = true; - vgic_queue_irq_unlock(vcpu->kvm, irq); - - vgic_put_irq(vcpu->kvm, irq); - } -} - -void vgic_mmio_write_cenable(struct kvm_vcpu *vcpu, - gpa_t addr, unsigned int len, - unsigned long val) -{ - u32 intid = VGIC_ADDR_TO_INTID(addr, 1); - int i; - - for_each_set_bit(i, &val, len * 8) { - struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); - - spin_lock(&irq->irq_lock); - - irq->enabled = false; - - spin_unlock(&irq->irq_lock); - vgic_put_irq(vcpu->kvm, irq); - } -} - -unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu, - gpa_t addr, unsigned int len) -{ - u32 intid = VGIC_ADDR_TO_INTID(addr, 1); - u32 value = 0; - int i; - - /* Loop over all IRQs affected by this read */ - for (i = 0; i < len * 8; i++) { - struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); - - if (irq->pending) - value |= (1U << i); - - vgic_put_irq(vcpu->kvm, irq); - } - - return value; -} - -void vgic_mmio_write_spending(struct kvm_vcpu *vcpu, - gpa_t addr, unsigned int len, - unsigned long val) -{ - u32 intid = VGIC_ADDR_TO_INTID(addr, 1); - int i; - - for_each_set_bit(i, &val, len * 8) { - struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); - - spin_lock(&irq->irq_lock); - irq->pending = true; - if (irq->config == VGIC_CONFIG_LEVEL) - irq->soft_pending = true; - - vgic_queue_irq_unlock(vcpu->kvm, irq); - vgic_put_irq(vcpu->kvm, irq); - } -} - -void vgic_mmio_write_cpending(struct kvm_vcpu *vcpu, - gpa_t addr, unsigned int len, - unsigned long val) -{ - u32 intid = VGIC_ADDR_TO_INTID(addr, 1); - int i; - - for_each_set_bit(i, &val, len * 8) { - struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); - - spin_lock(&irq->irq_lock); - - if (irq->config == VGIC_CONFIG_LEVEL) { - irq->soft_pending = false; - irq->pending = irq->line_level; - } else { - irq->pending = false; - } - - spin_unlock(&irq->irq_lock); - vgic_put_irq(vcpu->kvm, irq); - } -} - -unsigned long vgic_mmio_read_active(struct kvm_vcpu *vcpu, - gpa_t addr, unsigned int len) -{ - u32 intid = VGIC_ADDR_TO_INTID(addr, 1); - u32 value = 0; - int i; - - /* Loop over all IRQs affected by this read */ - for (i = 0; i < len * 8; i++) { - struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); - - if (irq->active) - value |= (1U << i); - - vgic_put_irq(vcpu->kvm, irq); - } - - return value; -} - -static void vgic_mmio_change_active(struct kvm_vcpu *vcpu, struct vgic_irq *irq, - bool new_active_state) -{ - spin_lock(&irq->irq_lock); - /* - * If this virtual IRQ was written into a list register, we - * have to make sure the CPU that runs the VCPU thread has - * synced back LR state to the struct vgic_irq. We can only - * know this for sure, when either this irq is not assigned to - * anyone's AP list anymore, or the VCPU thread is not - * running on any CPUs. - * - * In the opposite case, we know the VCPU thread may be on its - * way back from the guest and still has to sync back this - * IRQ, so we release and re-acquire the spin_lock to let the - * other thread sync back the IRQ. - */ - while (irq->vcpu && /* IRQ may have state in an LR somewhere */ - irq->vcpu->cpu != -1) /* VCPU thread is running */ - cond_resched_lock(&irq->irq_lock); - - irq->active = new_active_state; - if (new_active_state) - vgic_queue_irq_unlock(vcpu->kvm, irq); - else - spin_unlock(&irq->irq_lock); -} - -/* - * If we are fiddling with an IRQ's active state, we have to make sure the IRQ - * is not queued on some running VCPU's LRs, because then the change to the - * active state can be overwritten when the VCPU's state is synced coming back - * from the guest. - * - * For shared interrupts, we have to stop all the VCPUs because interrupts can - * be migrated while we don't hold the IRQ locks and we don't want to be - * chasing moving targets. - * - * For private interrupts, we only have to make sure the single and only VCPU - * that can potentially queue the IRQ is stopped. - */ -static void vgic_change_active_prepare(struct kvm_vcpu *vcpu, u32 intid) -{ - if (intid < VGIC_NR_PRIVATE_IRQS) - kvm_arm_halt_vcpu(vcpu); - else - kvm_arm_halt_guest(vcpu->kvm); -} - -/* See vgic_change_active_prepare */ -static void vgic_change_active_finish(struct kvm_vcpu *vcpu, u32 intid) -{ - if (intid < VGIC_NR_PRIVATE_IRQS) - kvm_arm_resume_vcpu(vcpu); - else - kvm_arm_resume_guest(vcpu->kvm); -} - -void vgic_mmio_write_cactive(struct kvm_vcpu *vcpu, - gpa_t addr, unsigned int len, - unsigned long val) -{ - u32 intid = VGIC_ADDR_TO_INTID(addr, 1); - int i; - - vgic_change_active_prepare(vcpu, intid); - for_each_set_bit(i, &val, len * 8) { - struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); - vgic_mmio_change_active(vcpu, irq, false); - vgic_put_irq(vcpu->kvm, irq); - } - vgic_change_active_finish(vcpu, intid); -} - -void vgic_mmio_write_sactive(struct kvm_vcpu *vcpu, - gpa_t addr, unsigned int len, - unsigned long val) -{ - u32 intid = VGIC_ADDR_TO_INTID(addr, 1); - int i; - - vgic_change_active_prepare(vcpu, intid); - for_each_set_bit(i, &val, len * 8) { - struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); - vgic_mmio_change_active(vcpu, irq, true); - vgic_put_irq(vcpu->kvm, irq); - } - vgic_change_active_finish(vcpu, intid); -} - -unsigned long vgic_mmio_read_priority(struct kvm_vcpu *vcpu, - gpa_t addr, unsigned int len) -{ - u32 intid = VGIC_ADDR_TO_INTID(addr, 8); - int i; - u64 val = 0; - - for (i = 0; i < len; i++) { - struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); - - val |= (u64)irq->priority << (i * 8); - - vgic_put_irq(vcpu->kvm, irq); - } - - return val; -} - -/* - * We currently don't handle changing the priority of an interrupt that - * is already pending on a VCPU. If there is a need for this, we would - * need to make this VCPU exit and re-evaluate the priorities, potentially - * leading to this interrupt getting presented now to the guest (if it has - * been masked by the priority mask before). - */ -void vgic_mmio_write_priority(struct kvm_vcpu *vcpu, - gpa_t addr, unsigned int len, - unsigned long val) -{ - u32 intid = VGIC_ADDR_TO_INTID(addr, 8); - int i; - - for (i = 0; i < len; i++) { - struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); - - spin_lock(&irq->irq_lock); - /* Narrow the priority range to what we actually support */ - irq->priority = (val >> (i * 8)) & GENMASK(7, 8 - VGIC_PRI_BITS); - spin_unlock(&irq->irq_lock); - - vgic_put_irq(vcpu->kvm, irq); - } -} - -unsigned long vgic_mmio_read_config(struct kvm_vcpu *vcpu, - gpa_t addr, unsigned int len) -{ - u32 intid = VGIC_ADDR_TO_INTID(addr, 2); - u32 value = 0; - int i; - - for (i = 0; i < len * 4; i++) { - struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); - - if (irq->config == VGIC_CONFIG_EDGE) - value |= (2U << (i * 2)); - - vgic_put_irq(vcpu->kvm, irq); - } - - return value; -} - -void vgic_mmio_write_config(struct kvm_vcpu *vcpu, - gpa_t addr, unsigned int len, - unsigned long val) -{ - u32 intid = VGIC_ADDR_TO_INTID(addr, 2); - int i; - - for (i = 0; i < len * 4; i++) { - struct vgic_irq *irq; - - /* - * The configuration cannot be changed for SGIs in general, - * for PPIs this is IMPLEMENTATION DEFINED. The arch timer - * code relies on PPIs being level triggered, so we also - * make them read-only here. - */ - if (intid + i < VGIC_NR_PRIVATE_IRQS) - continue; - - irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); - spin_lock(&irq->irq_lock); - - if (test_bit(i * 2 + 1, &val)) { - irq->config = VGIC_CONFIG_EDGE; - } else { - irq->config = VGIC_CONFIG_LEVEL; - irq->pending = irq->line_level | irq->soft_pending; - } - - spin_unlock(&irq->irq_lock); - vgic_put_irq(vcpu->kvm, irq); - } -} - -static int match_region(const void *key, const void *elt) -{ - const unsigned int offset = (unsigned long)key; - const struct vgic_register_region *region = elt; - - if (offset < region->reg_offset) - return -1; - - if (offset >= region->reg_offset + region->len) - return 1; - - return 0; -} - -/* Find the proper register handler entry given a certain address offset. */ -static const struct vgic_register_region * -vgic_find_mmio_region(const struct vgic_register_region *region, int nr_regions, - unsigned int offset) -{ - return bsearch((void *)(uintptr_t)offset, region, nr_regions, - sizeof(region[0]), match_region); -} - -/* - * kvm_mmio_read_buf() returns a value in a format where it can be converted - * to a byte array and be directly observed as the guest wanted it to appear - * in memory if it had done the store itself, which is LE for the GIC, as the - * guest knows the GIC is always LE. - * - * We convert this value to the CPUs native format to deal with it as a data - * value. - */ -unsigned long vgic_data_mmio_bus_to_host(const void *val, unsigned int len) -{ - unsigned long data = kvm_mmio_read_buf(val, len); - - switch (len) { - case 1: - return data; - case 2: - return le16_to_cpu(data); - case 4: - return le32_to_cpu(data); - default: - return le64_to_cpu(data); - } -} - -/* - * kvm_mmio_write_buf() expects a value in a format such that if converted to - * a byte array it is observed as the guest would see it if it could perform - * the load directly. Since the GIC is LE, and the guest knows this, the - * guest expects a value in little endian format. - * - * We convert the data value from the CPUs native format to LE so that the - * value is returned in the proper format. - */ -void vgic_data_host_to_mmio_bus(void *buf, unsigned int len, - unsigned long data) -{ - switch (len) { - case 1: - break; - case 2: - data = cpu_to_le16(data); - break; - case 4: - data = cpu_to_le32(data); - break; - default: - data = cpu_to_le64(data); - } - - kvm_mmio_write_buf(buf, len, data); -} - -static -struct vgic_io_device *kvm_to_vgic_iodev(const struct kvm_io_device *dev) -{ - return container_of(dev, struct vgic_io_device, dev); -} - -static bool check_region(const struct kvm *kvm, - const struct vgic_register_region *region, - gpa_t addr, int len) -{ - int flags, nr_irqs = kvm->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS; - - switch (len) { - case sizeof(u8): - flags = VGIC_ACCESS_8bit; - break; - case sizeof(u32): - flags = VGIC_ACCESS_32bit; - break; - case sizeof(u64): - flags = VGIC_ACCESS_64bit; - break; - default: - return false; - } - - if ((region->access_flags & flags) && IS_ALIGNED(addr, len)) { - if (!region->bits_per_irq) - return true; - - /* Do we access a non-allocated IRQ? */ - return VGIC_ADDR_TO_INTID(addr, region->bits_per_irq) < nr_irqs; - } - - return false; -} - -static int dispatch_mmio_read(struct kvm_vcpu *vcpu, struct kvm_io_device *dev, - gpa_t addr, int len, void *val) -{ - struct vgic_io_device *iodev = kvm_to_vgic_iodev(dev); - const struct vgic_register_region *region; - unsigned long data = 0; - - region = vgic_find_mmio_region(iodev->regions, iodev->nr_regions, - addr - iodev->base_addr); - if (!region || !check_region(vcpu->kvm, region, addr, len)) { - memset(val, 0, len); - return 0; - } - - switch (iodev->iodev_type) { - case IODEV_CPUIF: - data = region->read(vcpu, addr, len); - break; - case IODEV_DIST: - data = region->read(vcpu, addr, len); - break; - case IODEV_REDIST: - data = region->read(iodev->redist_vcpu, addr, len); - break; - case IODEV_ITS: - data = region->its_read(vcpu->kvm, iodev->its, addr, len); - break; - } - - vgic_data_host_to_mmio_bus(val, len, data); - return 0; -} - -static int dispatch_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *dev, - gpa_t addr, int len, const void *val) -{ - struct vgic_io_device *iodev = kvm_to_vgic_iodev(dev); - const struct vgic_register_region *region; - unsigned long data = vgic_data_mmio_bus_to_host(val, len); - - region = vgic_find_mmio_region(iodev->regions, iodev->nr_regions, - addr - iodev->base_addr); - if (!region || !check_region(vcpu->kvm, region, addr, len)) - return 0; - - switch (iodev->iodev_type) { - case IODEV_CPUIF: - region->write(vcpu, addr, len, data); - break; - case IODEV_DIST: - region->write(vcpu, addr, len, data); - break; - case IODEV_REDIST: - region->write(iodev->redist_vcpu, addr, len, data); - break; - case IODEV_ITS: - region->its_write(vcpu->kvm, iodev->its, addr, len, data); - break; - } - - return 0; -} - -struct kvm_io_device_ops kvm_io_gic_ops = { - .read = dispatch_mmio_read, - .write = dispatch_mmio_write, -}; - -int vgic_register_dist_iodev(struct kvm *kvm, gpa_t dist_base_address, - enum vgic_type type) -{ - struct vgic_io_device *io_device = &kvm->arch.vgic.dist_iodev; - int ret = 0; - unsigned int len; - - switch (type) { - case VGIC_V2: - len = vgic_v2_init_dist_iodev(io_device); - break; - case VGIC_V3: - len = vgic_v3_init_dist_iodev(io_device); - break; - default: - BUG_ON(1); - } - - io_device->base_addr = dist_base_address; - io_device->iodev_type = IODEV_DIST; - io_device->redist_vcpu = NULL; - - mutex_lock(&kvm->slots_lock); - ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, dist_base_address, - len, &io_device->dev); - mutex_unlock(&kvm->slots_lock); - - return ret; -} diff --git a/virt/kvm/arm/vgic/vgic-mmio.h b/virt/kvm/arm/vgic/vgic-mmio.h deleted file mode 100644 index 84961b4..0000000 --- a/virt/kvm/arm/vgic/vgic-mmio.h +++ /dev/null @@ -1,171 +0,0 @@ -/* - * Copyright (C) 2015, 2016 ARM Ltd. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - */ -#ifndef __KVM_ARM_VGIC_MMIO_H__ -#define __KVM_ARM_VGIC_MMIO_H__ - -struct vgic_register_region { - unsigned int reg_offset; - unsigned int len; - unsigned int bits_per_irq; - unsigned int access_flags; - union { - unsigned long (*read)(struct kvm_vcpu *vcpu, gpa_t addr, - unsigned int len); - unsigned long (*its_read)(struct kvm *kvm, struct vgic_its *its, - gpa_t addr, unsigned int len); - }; - union { - void (*write)(struct kvm_vcpu *vcpu, gpa_t addr, - unsigned int len, unsigned long val); - void (*its_write)(struct kvm *kvm, struct vgic_its *its, - gpa_t addr, unsigned int len, - unsigned long val); - }; -}; - -extern struct kvm_io_device_ops kvm_io_gic_ops; - -#define VGIC_ACCESS_8bit 1 -#define VGIC_ACCESS_32bit 2 -#define VGIC_ACCESS_64bit 4 - -/* - * Generate a mask that covers the number of bytes required to address - * up to 1024 interrupts, each represented by <bits> bits. This assumes - * that <bits> is a power of two. - */ -#define VGIC_ADDR_IRQ_MASK(bits) (((bits) * 1024 / 8) - 1) - -/* - * (addr & mask) gives us the _byte_ offset for the INT ID. - * We multiply this by 8 the get the _bit_ offset, then divide this by - * the number of bits to learn the actual INT ID. - * But instead of a division (which requires a "long long div" implementation), - * we shift by the binary logarithm of <bits>. - * This assumes that <bits> is a power of two. - */ -#define VGIC_ADDR_TO_INTID(addr, bits) (((addr) & VGIC_ADDR_IRQ_MASK(bits)) * \ - 8 >> ilog2(bits)) - -/* - * Some VGIC registers store per-IRQ information, with a different number - * of bits per IRQ. For those registers this macro is used. - * The _WITH_LENGTH version instantiates registers with a fixed length - * and is mutually exclusive with the _PER_IRQ version. - */ -#define REGISTER_DESC_WITH_BITS_PER_IRQ(off, rd, wr, bpi, acc) \ - { \ - .reg_offset = off, \ - .bits_per_irq = bpi, \ - .len = bpi * 1024 / 8, \ - .access_flags = acc, \ - .read = rd, \ - .write = wr, \ - } - -#define REGISTER_DESC_WITH_LENGTH(off, rd, wr, length, acc) \ - { \ - .reg_offset = off, \ - .bits_per_irq = 0, \ - .len = length, \ - .access_flags = acc, \ - .read = rd, \ - .write = wr, \ - } - -int kvm_vgic_register_mmio_region(struct kvm *kvm, struct kvm_vcpu *vcpu, - struct vgic_register_region *reg_desc, - struct vgic_io_device *region, - int nr_irqs, bool offset_private); - -unsigned long vgic_data_mmio_bus_to_host(const void *val, unsigned int len); - -void vgic_data_host_to_mmio_bus(void *buf, unsigned int len, - unsigned long data); - -unsigned long extract_bytes(u64 data, unsigned int offset, - unsigned int num); - -u64 update_64bit_reg(u64 reg, unsigned int offset, unsigned int len, - unsigned long val); - -unsigned long vgic_mmio_read_raz(struct kvm_vcpu *vcpu, - gpa_t addr, unsigned int len); - -unsigned long vgic_mmio_read_rao(struct kvm_vcpu *vcpu, - gpa_t addr, unsigned int len); - -void vgic_mmio_write_wi(struct kvm_vcpu *vcpu, gpa_t addr, - unsigned int len, unsigned long val); - -unsigned long vgic_mmio_read_enable(struct kvm_vcpu *vcpu, - gpa_t addr, unsigned int len); - -void vgic_mmio_write_senable(struct kvm_vcpu *vcpu, - gpa_t addr, unsigned int len, - unsigned long val); - -void vgic_mmio_write_cenable(struct kvm_vcpu *vcpu, - gpa_t addr, unsigned int len, - unsigned long val); - -unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu, - gpa_t addr, unsigned int len); - -void vgic_mmio_write_spending(struct kvm_vcpu *vcpu, - gpa_t addr, unsigned int len, - unsigned long val); - -void vgic_mmio_write_cpending(struct kvm_vcpu *vcpu, - gpa_t addr, unsigned int len, - unsigned long val); - -unsigned long vgic_mmio_read_active(struct kvm_vcpu *vcpu, - gpa_t addr, unsigned int len); - -void vgic_mmio_write_cactive(struct kvm_vcpu *vcpu, - gpa_t addr, unsigned int len, - unsigned long val); - -void vgic_mmio_write_sactive(struct kvm_vcpu *vcpu, - gpa_t addr, unsigned int len, - unsigned long val); - -unsigned long vgic_mmio_read_priority(struct kvm_vcpu *vcpu, - gpa_t addr, unsigned int len); - -void vgic_mmio_write_priority(struct kvm_vcpu *vcpu, - gpa_t addr, unsigned int len, - unsigned long val); - -unsigned long vgic_mmio_read_config(struct kvm_vcpu *vcpu, - gpa_t addr, unsigned int len); - -void vgic_mmio_write_config(struct kvm_vcpu *vcpu, - gpa_t addr, unsigned int len, - unsigned long val); - -unsigned int vgic_v2_init_dist_iodev(struct vgic_io_device *dev); - -unsigned int vgic_v3_init_dist_iodev(struct vgic_io_device *dev); - -u64 vgic_sanitise_outer_cacheability(u64 reg); -u64 vgic_sanitise_inner_cacheability(u64 reg); -u64 vgic_sanitise_shareability(u64 reg); -u64 vgic_sanitise_field(u64 reg, u64 field_mask, int field_shift, - u64 (*sanitise_fn)(u64)); - -#endif diff --git a/virt/kvm/arm/vgic/vgic-v2.c b/virt/kvm/arm/vgic/vgic-v2.c deleted file mode 100644 index 0a063af..0000000 --- a/virt/kvm/arm/vgic/vgic-v2.c +++ /dev/null @@ -1,379 +0,0 @@ -/* - * Copyright (C) 2015, 2016 ARM Ltd. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - -#include <linux/irqchip/arm-gic.h> -#include <linux/kvm.h> -#include <linux/kvm_host.h> -#include <kvm/arm_vgic.h> -#include <asm/kvm_mmu.h> - -#include "vgic.h" - -/* - * Call this function to convert a u64 value to an unsigned long * bitmask - * in a way that works on both 32-bit and 64-bit LE and BE platforms. - * - * Warning: Calling this function may modify *val. - */ -static unsigned long *u64_to_bitmask(u64 *val) -{ -#if defined(CONFIG_CPU_BIG_ENDIAN) && BITS_PER_LONG == 32 - *val = (*val >> 32) | (*val << 32); -#endif - return (unsigned long *)val; -} - -void vgic_v2_process_maintenance(struct kvm_vcpu *vcpu) -{ - struct vgic_v2_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v2; - - if (cpuif->vgic_misr & GICH_MISR_EOI) { - u64 eisr = cpuif->vgic_eisr; - unsigned long *eisr_bmap = u64_to_bitmask(&eisr); - int lr; - - for_each_set_bit(lr, eisr_bmap, kvm_vgic_global_state.nr_lr) { - u32 intid = cpuif->vgic_lr[lr] & GICH_LR_VIRTUALID; - - WARN_ON(cpuif->vgic_lr[lr] & GICH_LR_STATE); - - kvm_notify_acked_irq(vcpu->kvm, 0, - intid - VGIC_NR_PRIVATE_IRQS); - } - } - - /* check and disable underflow maintenance IRQ */ - cpuif->vgic_hcr &= ~GICH_HCR_UIE; - - /* - * In the next iterations of the vcpu loop, if we sync the - * vgic state after flushing it, but before entering the guest - * (this happens for pending signals and vmid rollovers), then - * make sure we don't pick up any old maintenance interrupts - * here. - */ - cpuif->vgic_eisr = 0; -} - -void vgic_v2_set_underflow(struct kvm_vcpu *vcpu) -{ - struct vgic_v2_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v2; - - cpuif->vgic_hcr |= GICH_HCR_UIE; -} - -/* - * transfer the content of the LRs back into the corresponding ap_list: - * - active bit is transferred as is - * - pending bit is - * - transferred as is in case of edge sensitive IRQs - * - set to the line-level (resample time) for level sensitive IRQs - */ -void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu) -{ - struct vgic_v2_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v2; - int lr; - - for (lr = 0; lr < vcpu->arch.vgic_cpu.used_lrs; lr++) { - u32 val = cpuif->vgic_lr[lr]; - u32 intid = val & GICH_LR_VIRTUALID; - struct vgic_irq *irq; - - irq = vgic_get_irq(vcpu->kvm, vcpu, intid); - - spin_lock(&irq->irq_lock); - - /* Always preserve the active bit */ - irq->active = !!(val & GICH_LR_ACTIVE_BIT); - - /* Edge is the only case where we preserve the pending bit */ - if (irq->config == VGIC_CONFIG_EDGE && - (val & GICH_LR_PENDING_BIT)) { - irq->pending = true; - - if (vgic_irq_is_sgi(intid)) { - u32 cpuid = val & GICH_LR_PHYSID_CPUID; - - cpuid >>= GICH_LR_PHYSID_CPUID_SHIFT; - irq->source |= (1 << cpuid); - } - } - - /* - * Clear soft pending state when level irqs have been acked. - * Always regenerate the pending state. - */ - if (irq->config == VGIC_CONFIG_LEVEL) { - if (!(val & GICH_LR_PENDING_BIT)) - irq->soft_pending = false; - - irq->pending = irq->line_level || irq->soft_pending; - } - - spin_unlock(&irq->irq_lock); - vgic_put_irq(vcpu->kvm, irq); - } -} - -/* - * Populates the particular LR with the state of a given IRQ: - * - for an edge sensitive IRQ the pending state is cleared in struct vgic_irq - * - for a level sensitive IRQ the pending state value is unchanged; - * it is dictated directly by the input level - * - * If @irq describes an SGI with multiple sources, we choose the - * lowest-numbered source VCPU and clear that bit in the source bitmap. - * - * The irq_lock must be held by the caller. - */ -void vgic_v2_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr) -{ - u32 val = irq->intid; - - if (irq->pending) { - val |= GICH_LR_PENDING_BIT; - - if (irq->config == VGIC_CONFIG_EDGE) - irq->pending = false; - - if (vgic_irq_is_sgi(irq->intid)) { - u32 src = ffs(irq->source); - - BUG_ON(!src); - val |= (src - 1) << GICH_LR_PHYSID_CPUID_SHIFT; - irq->source &= ~(1 << (src - 1)); - if (irq->source) - irq->pending = true; - } - } - - if (irq->active) - val |= GICH_LR_ACTIVE_BIT; - - if (irq->hw) { - val |= GICH_LR_HW; - val |= irq->hwintid << GICH_LR_PHYSID_CPUID_SHIFT; - } else { - if (irq->config == VGIC_CONFIG_LEVEL) - val |= GICH_LR_EOI; - } - - /* The GICv2 LR only holds five bits of priority. */ - val |= (irq->priority >> 3) << GICH_LR_PRIORITY_SHIFT; - - vcpu->arch.vgic_cpu.vgic_v2.vgic_lr[lr] = val; -} - -void vgic_v2_clear_lr(struct kvm_vcpu *vcpu, int lr) -{ - vcpu->arch.vgic_cpu.vgic_v2.vgic_lr[lr] = 0; -} - -void vgic_v2_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp) -{ - u32 vmcr; - - vmcr = (vmcrp->ctlr << GICH_VMCR_CTRL_SHIFT) & GICH_VMCR_CTRL_MASK; - vmcr |= (vmcrp->abpr << GICH_VMCR_ALIAS_BINPOINT_SHIFT) & - GICH_VMCR_ALIAS_BINPOINT_MASK; - vmcr |= (vmcrp->bpr << GICH_VMCR_BINPOINT_SHIFT) & - GICH_VMCR_BINPOINT_MASK; - vmcr |= (vmcrp->pmr << GICH_VMCR_PRIMASK_SHIFT) & - GICH_VMCR_PRIMASK_MASK; - - vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr = vmcr; -} - -void vgic_v2_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp) -{ - u32 vmcr = vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr; - - vmcrp->ctlr = (vmcr & GICH_VMCR_CTRL_MASK) >> - GICH_VMCR_CTRL_SHIFT; - vmcrp->abpr = (vmcr & GICH_VMCR_ALIAS_BINPOINT_MASK) >> - GICH_VMCR_ALIAS_BINPOINT_SHIFT; - vmcrp->bpr = (vmcr & GICH_VMCR_BINPOINT_MASK) >> - GICH_VMCR_BINPOINT_SHIFT; - vmcrp->pmr = (vmcr & GICH_VMCR_PRIMASK_MASK) >> - GICH_VMCR_PRIMASK_SHIFT; -} - -void vgic_v2_enable(struct kvm_vcpu *vcpu) -{ - /* - * By forcing VMCR to zero, the GIC will restore the binary - * points to their reset values. Anything else resets to zero - * anyway. - */ - vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr = 0; - vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr = ~0; - - /* Get the show on the road... */ - vcpu->arch.vgic_cpu.vgic_v2.vgic_hcr = GICH_HCR_EN; -} - -/* check for overlapping regions and for regions crossing the end of memory */ -static bool vgic_v2_check_base(gpa_t dist_base, gpa_t cpu_base) -{ - if (dist_base + KVM_VGIC_V2_DIST_SIZE < dist_base) - return false; - if (cpu_base + KVM_VGIC_V2_CPU_SIZE < cpu_base) - return false; - - if (dist_base + KVM_VGIC_V2_DIST_SIZE <= cpu_base) - return true; - if (cpu_base + KVM_VGIC_V2_CPU_SIZE <= dist_base) - return true; - - return false; -} - -int vgic_v2_map_resources(struct kvm *kvm) -{ - struct vgic_dist *dist = &kvm->arch.vgic; - int ret = 0; - - if (vgic_ready(kvm)) - goto out; - - if (IS_VGIC_ADDR_UNDEF(dist->vgic_dist_base) || - IS_VGIC_ADDR_UNDEF(dist->vgic_cpu_base)) { - kvm_err("Need to set vgic cpu and dist addresses first\n"); - ret = -ENXIO; - goto out; - } - - if (!vgic_v2_check_base(dist->vgic_dist_base, dist->vgic_cpu_base)) { - kvm_err("VGIC CPU and dist frames overlap\n"); - ret = -EINVAL; - goto out; - } - - /* - * Initialize the vgic if this hasn't already been done on demand by - * accessing the vgic state from userspace. - */ - ret = vgic_init(kvm); - if (ret) { - kvm_err("Unable to initialize VGIC dynamic data structures\n"); - goto out; - } - - ret = vgic_register_dist_iodev(kvm, dist->vgic_dist_base, VGIC_V2); - if (ret) { - kvm_err("Unable to register VGIC MMIO regions\n"); - goto out; - } - - if (!static_branch_unlikely(&vgic_v2_cpuif_trap)) { - ret = kvm_phys_addr_ioremap(kvm, dist->vgic_cpu_base, - kvm_vgic_global_state.vcpu_base, - KVM_VGIC_V2_CPU_SIZE, true); - if (ret) { - kvm_err("Unable to remap VGIC CPU to VCPU\n"); - goto out; - } - } - - dist->ready = true; - -out: - if (ret) - kvm_vgic_destroy(kvm); - return ret; -} - -DEFINE_STATIC_KEY_FALSE(vgic_v2_cpuif_trap); - -/** - * vgic_v2_probe - probe for a GICv2 compatible interrupt controller in DT - * @node: pointer to the DT node - * - * Returns 0 if a GICv2 has been found, returns an error code otherwise - */ -int vgic_v2_probe(const struct gic_kvm_info *info) -{ - int ret; - u32 vtr; - - if (!info->vctrl.start) { - kvm_err("GICH not present in the firmware table\n"); - return -ENXIO; - } - - if (!PAGE_ALIGNED(info->vcpu.start) || - !PAGE_ALIGNED(resource_size(&info->vcpu))) { - kvm_info("GICV region size/alignment is unsafe, using trapping (reduced performance)\n"); - kvm_vgic_global_state.vcpu_base_va = ioremap(info->vcpu.start, - resource_size(&info->vcpu)); - if (!kvm_vgic_global_state.vcpu_base_va) { - kvm_err("Cannot ioremap GICV\n"); - return -ENOMEM; - } - - ret = create_hyp_io_mappings(kvm_vgic_global_state.vcpu_base_va, - kvm_vgic_global_state.vcpu_base_va + resource_size(&info->vcpu), - info->vcpu.start); - if (ret) { - kvm_err("Cannot map GICV into hyp\n"); - goto out; - } - - static_branch_enable(&vgic_v2_cpuif_trap); - } - - kvm_vgic_global_state.vctrl_base = ioremap(info->vctrl.start, - resource_size(&info->vctrl)); - if (!kvm_vgic_global_state.vctrl_base) { - kvm_err("Cannot ioremap GICH\n"); - ret = -ENOMEM; - goto out; - } - - vtr = readl_relaxed(kvm_vgic_global_state.vctrl_base + GICH_VTR); - kvm_vgic_global_state.nr_lr = (vtr & 0x3f) + 1; - - ret = create_hyp_io_mappings(kvm_vgic_global_state.vctrl_base, - kvm_vgic_global_state.vctrl_base + - resource_size(&info->vctrl), - info->vctrl.start); - if (ret) { - kvm_err("Cannot map VCTRL into hyp\n"); - goto out; - } - - ret = kvm_register_vgic_device(KVM_DEV_TYPE_ARM_VGIC_V2); - if (ret) { - kvm_err("Cannot register GICv2 KVM device\n"); - goto out; - } - - kvm_vgic_global_state.can_emulate_gicv2 = true; - kvm_vgic_global_state.vcpu_base = info->vcpu.start; - kvm_vgic_global_state.type = VGIC_V2; - kvm_vgic_global_state.max_gic_vcpus = VGIC_V2_MAX_CPUS; - - kvm_info("vgic-v2@%llx\n", info->vctrl.start); - - return 0; -out: - if (kvm_vgic_global_state.vctrl_base) - iounmap(kvm_vgic_global_state.vctrl_base); - if (kvm_vgic_global_state.vcpu_base_va) - iounmap(kvm_vgic_global_state.vcpu_base_va); - - return ret; -} diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c deleted file mode 100644 index 9f0dae3..0000000 --- a/virt/kvm/arm/vgic/vgic-v3.c +++ /dev/null @@ -1,363 +0,0 @@ -/* - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - -#include <linux/irqchip/arm-gic-v3.h> -#include <linux/kvm.h> -#include <linux/kvm_host.h> -#include <kvm/arm_vgic.h> -#include <asm/kvm_mmu.h> -#include <asm/kvm_asm.h> - -#include "vgic.h" - -void vgic_v3_process_maintenance(struct kvm_vcpu *vcpu) -{ - struct vgic_v3_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v3; - u32 model = vcpu->kvm->arch.vgic.vgic_model; - - if (cpuif->vgic_misr & ICH_MISR_EOI) { - unsigned long eisr_bmap = cpuif->vgic_eisr; - int lr; - - for_each_set_bit(lr, &eisr_bmap, kvm_vgic_global_state.nr_lr) { - u32 intid; - u64 val = cpuif->vgic_lr[lr]; - - if (model == KVM_DEV_TYPE_ARM_VGIC_V3) - intid = val & ICH_LR_VIRTUAL_ID_MASK; - else - intid = val & GICH_LR_VIRTUALID; - - WARN_ON(cpuif->vgic_lr[lr] & ICH_LR_STATE); - - kvm_notify_acked_irq(vcpu->kvm, 0, - intid - VGIC_NR_PRIVATE_IRQS); - } - - /* - * In the next iterations of the vcpu loop, if we sync - * the vgic state after flushing it, but before - * entering the guest (this happens for pending - * signals and vmid rollovers), then make sure we - * don't pick up any old maintenance interrupts here. - */ - cpuif->vgic_eisr = 0; - } - - cpuif->vgic_hcr &= ~ICH_HCR_UIE; -} - -void vgic_v3_set_underflow(struct kvm_vcpu *vcpu) -{ - struct vgic_v3_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v3; - - cpuif->vgic_hcr |= ICH_HCR_UIE; -} - -void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu) -{ - struct vgic_v3_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v3; - u32 model = vcpu->kvm->arch.vgic.vgic_model; - int lr; - - for (lr = 0; lr < vcpu->arch.vgic_cpu.used_lrs; lr++) { - u64 val = cpuif->vgic_lr[lr]; - u32 intid; - struct vgic_irq *irq; - - if (model == KVM_DEV_TYPE_ARM_VGIC_V3) - intid = val & ICH_LR_VIRTUAL_ID_MASK; - else - intid = val & GICH_LR_VIRTUALID; - irq = vgic_get_irq(vcpu->kvm, vcpu, intid); - if (!irq) /* An LPI could have been unmapped. */ - continue; - - spin_lock(&irq->irq_lock); - - /* Always preserve the active bit */ - irq->active = !!(val & ICH_LR_ACTIVE_BIT); - - /* Edge is the only case where we preserve the pending bit */ - if (irq->config == VGIC_CONFIG_EDGE && - (val & ICH_LR_PENDING_BIT)) { - irq->pending = true; - - if (vgic_irq_is_sgi(intid) && - model == KVM_DEV_TYPE_ARM_VGIC_V2) { - u32 cpuid = val & GICH_LR_PHYSID_CPUID; - - cpuid >>= GICH_LR_PHYSID_CPUID_SHIFT; - irq->source |= (1 << cpuid); - } - } - - /* - * Clear soft pending state when level irqs have been acked. - * Always regenerate the pending state. - */ - if (irq->config == VGIC_CONFIG_LEVEL) { - if (!(val & ICH_LR_PENDING_BIT)) - irq->soft_pending = false; - - irq->pending = irq->line_level || irq->soft_pending; - } - - spin_unlock(&irq->irq_lock); - vgic_put_irq(vcpu->kvm, irq); - } -} - -/* Requires the irq to be locked already */ -void vgic_v3_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr) -{ - u32 model = vcpu->kvm->arch.vgic.vgic_model; - u64 val = irq->intid; - - if (irq->pending) { - val |= ICH_LR_PENDING_BIT; - - if (irq->config == VGIC_CONFIG_EDGE) - irq->pending = false; - - if (vgic_irq_is_sgi(irq->intid) && - model == KVM_DEV_TYPE_ARM_VGIC_V2) { - u32 src = ffs(irq->source); - - BUG_ON(!src); - val |= (src - 1) << GICH_LR_PHYSID_CPUID_SHIFT; - irq->source &= ~(1 << (src - 1)); - if (irq->source) - irq->pending = true; - } - } - - if (irq->active) - val |= ICH_LR_ACTIVE_BIT; - - if (irq->hw) { - val |= ICH_LR_HW; - val |= ((u64)irq->hwintid) << ICH_LR_PHYS_ID_SHIFT; - } else { - if (irq->config == VGIC_CONFIG_LEVEL) - val |= ICH_LR_EOI; - } - - /* - * We currently only support Group1 interrupts, which is a - * known defect. This needs to be addressed at some point. - */ - if (model == KVM_DEV_TYPE_ARM_VGIC_V3) - val |= ICH_LR_GROUP; - - val |= (u64)irq->priority << ICH_LR_PRIORITY_SHIFT; - - vcpu->arch.vgic_cpu.vgic_v3.vgic_lr[lr] = val; -} - -void vgic_v3_clear_lr(struct kvm_vcpu *vcpu, int lr) -{ - vcpu->arch.vgic_cpu.vgic_v3.vgic_lr[lr] = 0; -} - -void vgic_v3_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp) -{ - u32 vmcr; - - vmcr = (vmcrp->ctlr << ICH_VMCR_CTLR_SHIFT) & ICH_VMCR_CTLR_MASK; - vmcr |= (vmcrp->abpr << ICH_VMCR_BPR1_SHIFT) & ICH_VMCR_BPR1_MASK; - vmcr |= (vmcrp->bpr << ICH_VMCR_BPR0_SHIFT) & ICH_VMCR_BPR0_MASK; - vmcr |= (vmcrp->pmr << ICH_VMCR_PMR_SHIFT) & ICH_VMCR_PMR_MASK; - - vcpu->arch.vgic_cpu.vgic_v3.vgic_vmcr = vmcr; -} - -void vgic_v3_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp) -{ - u32 vmcr = vcpu->arch.vgic_cpu.vgic_v3.vgic_vmcr; - - vmcrp->ctlr = (vmcr & ICH_VMCR_CTLR_MASK) >> ICH_VMCR_CTLR_SHIFT; - vmcrp->abpr = (vmcr & ICH_VMCR_BPR1_MASK) >> ICH_VMCR_BPR1_SHIFT; - vmcrp->bpr = (vmcr & ICH_VMCR_BPR0_MASK) >> ICH_VMCR_BPR0_SHIFT; - vmcrp->pmr = (vmcr & ICH_VMCR_PMR_MASK) >> ICH_VMCR_PMR_SHIFT; -} - -#define INITIAL_PENDBASER_VALUE \ - (GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, RaWb) | \ - GIC_BASER_CACHEABILITY(GICR_PENDBASER, OUTER, SameAsInner) | \ - GIC_BASER_SHAREABILITY(GICR_PENDBASER, InnerShareable)) - -void vgic_v3_enable(struct kvm_vcpu *vcpu) -{ - struct vgic_v3_cpu_if *vgic_v3 = &vcpu->arch.vgic_cpu.vgic_v3; - - /* - * By forcing VMCR to zero, the GIC will restore the binary - * points to their reset values. Anything else resets to zero - * anyway. - */ - vgic_v3->vgic_vmcr = 0; - vgic_v3->vgic_elrsr = ~0; - - /* - * If we are emulating a GICv3, we do it in an non-GICv2-compatible - * way, so we force SRE to 1 to demonstrate this to the guest. - * This goes with the spec allowing the value to be RAO/WI. - */ - if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) { - vgic_v3->vgic_sre = ICC_SRE_EL1_SRE; - vcpu->arch.vgic_cpu.pendbaser = INITIAL_PENDBASER_VALUE; - } else { - vgic_v3->vgic_sre = 0; - } - - /* Get the show on the road... */ - vgic_v3->vgic_hcr = ICH_HCR_EN; -} - -/* check for overlapping regions and for regions crossing the end of memory */ -static bool vgic_v3_check_base(struct kvm *kvm) -{ - struct vgic_dist *d = &kvm->arch.vgic; - gpa_t redist_size = KVM_VGIC_V3_REDIST_SIZE; - - redist_size *= atomic_read(&kvm->online_vcpus); - - if (d->vgic_dist_base + KVM_VGIC_V3_DIST_SIZE < d->vgic_dist_base) - return false; - if (d->vgic_redist_base + redist_size < d->vgic_redist_base) - return false; - - if (d->vgic_dist_base + KVM_VGIC_V3_DIST_SIZE <= d->vgic_redist_base) - return true; - if (d->vgic_redist_base + redist_size <= d->vgic_dist_base) - return true; - - return false; -} - -int vgic_v3_map_resources(struct kvm *kvm) -{ - int ret = 0; - struct vgic_dist *dist = &kvm->arch.vgic; - - if (vgic_ready(kvm)) - goto out; - - if (IS_VGIC_ADDR_UNDEF(dist->vgic_dist_base) || - IS_VGIC_ADDR_UNDEF(dist->vgic_redist_base)) { - kvm_err("Need to set vgic distributor addresses first\n"); - ret = -ENXIO; - goto out; - } - - if (!vgic_v3_check_base(kvm)) { - kvm_err("VGIC redist and dist frames overlap\n"); - ret = -EINVAL; - goto out; - } - - /* - * For a VGICv3 we require the userland to explicitly initialize - * the VGIC before we need to use it. - */ - if (!vgic_initialized(kvm)) { - ret = -EBUSY; - goto out; - } - - ret = vgic_register_dist_iodev(kvm, dist->vgic_dist_base, VGIC_V3); - if (ret) { - kvm_err("Unable to register VGICv3 dist MMIO regions\n"); - goto out; - } - - ret = vgic_register_redist_iodevs(kvm, dist->vgic_redist_base); - if (ret) { - kvm_err("Unable to register VGICv3 redist MMIO regions\n"); - goto out; - } - - if (vgic_has_its(kvm)) { - ret = vgic_register_its_iodevs(kvm); - if (ret) { - kvm_err("Unable to register VGIC ITS MMIO regions\n"); - goto out; - } - } - - dist->ready = true; - -out: - if (ret) - kvm_vgic_destroy(kvm); - return ret; -} - -/** - * vgic_v3_probe - probe for a GICv3 compatible interrupt controller in DT - * @node: pointer to the DT node - * - * Returns 0 if a GICv3 has been found, returns an error code otherwise - */ -int vgic_v3_probe(const struct gic_kvm_info *info) -{ - u32 ich_vtr_el2 = kvm_call_hyp(__vgic_v3_get_ich_vtr_el2); - int ret; - - /* - * The ListRegs field is 5 bits, but there is a architectural - * maximum of 16 list registers. Just ignore bit 4... - */ - kvm_vgic_global_state.nr_lr = (ich_vtr_el2 & 0xf) + 1; - kvm_vgic_global_state.can_emulate_gicv2 = false; - - if (!info->vcpu.start) { - kvm_info("GICv3: no GICV resource entry\n"); - kvm_vgic_global_state.vcpu_base = 0; - } else if (!PAGE_ALIGNED(info->vcpu.start)) { - pr_warn("GICV physical address 0x%llx not page aligned\n", - (unsigned long long)info->vcpu.start); - kvm_vgic_global_state.vcpu_base = 0; - } else if (!PAGE_ALIGNED(resource_size(&info->vcpu))) { - pr_warn("GICV size 0x%llx not a multiple of page size 0x%lx\n", - (unsigned long long)resource_size(&info->vcpu), - PAGE_SIZE); - kvm_vgic_global_state.vcpu_base = 0; - } else { - kvm_vgic_global_state.vcpu_base = info->vcpu.start; - kvm_vgic_global_state.can_emulate_gicv2 = true; - ret = kvm_register_vgic_device(KVM_DEV_TYPE_ARM_VGIC_V2); - if (ret) { - kvm_err("Cannot register GICv2 KVM device.\n"); - return ret; - } - kvm_info("vgic-v2@%llx\n", info->vcpu.start); - } - ret = kvm_register_vgic_device(KVM_DEV_TYPE_ARM_VGIC_V3); - if (ret) { - kvm_err("Cannot register GICv3 KVM device.\n"); - kvm_unregister_device_ops(KVM_DEV_TYPE_ARM_VGIC_V2); - return ret; - } - - if (kvm_vgic_global_state.vcpu_base == 0) - kvm_info("disabling GICv2 emulation\n"); - - kvm_vgic_global_state.vctrl_base = NULL; - kvm_vgic_global_state.type = VGIC_V3; - kvm_vgic_global_state.max_gic_vcpus = VGIC_V3_MAX_CPUS; - - return 0; -} diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c deleted file mode 100644 index 6440b56..0000000 --- a/virt/kvm/arm/vgic/vgic.c +++ /dev/null @@ -1,731 +0,0 @@ -/* - * Copyright (C) 2015, 2016 ARM Ltd. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - -#include <linux/kvm.h> -#include <linux/kvm_host.h> -#include <linux/list_sort.h> - -#include "vgic.h" - -#define CREATE_TRACE_POINTS -#include "../trace.h" - -#ifdef CONFIG_DEBUG_SPINLOCK -#define DEBUG_SPINLOCK_BUG_ON(p) BUG_ON(p) -#else -#define DEBUG_SPINLOCK_BUG_ON(p) -#endif - -struct vgic_global __section(.hyp.text) kvm_vgic_global_state = {.gicv3_cpuif = STATIC_KEY_FALSE_INIT,}; - -/* - * Locking order is always: - * its->cmd_lock (mutex) - * its->its_lock (mutex) - * vgic_cpu->ap_list_lock - * kvm->lpi_list_lock - * vgic_irq->irq_lock - * - * If you need to take multiple locks, always take the upper lock first, - * then the lower ones, e.g. first take the its_lock, then the irq_lock. - * If you are already holding a lock and need to take a higher one, you - * have to drop the lower ranking lock first and re-aquire it after having - * taken the upper one. - * - * When taking more than one ap_list_lock at the same time, always take the - * lowest numbered VCPU's ap_list_lock first, so: - * vcpuX->vcpu_id < vcpuY->vcpu_id: - * spin_lock(vcpuX->arch.vgic_cpu.ap_list_lock); - * spin_lock(vcpuY->arch.vgic_cpu.ap_list_lock); - */ - -/* - * Iterate over the VM's list of mapped LPIs to find the one with a - * matching interrupt ID and return a reference to the IRQ structure. - */ -static struct vgic_irq *vgic_get_lpi(struct kvm *kvm, u32 intid) -{ - struct vgic_dist *dist = &kvm->arch.vgic; - struct vgic_irq *irq = NULL; - - spin_lock(&dist->lpi_list_lock); - - list_for_each_entry(irq, &dist->lpi_list_head, lpi_list) { - if (irq->intid != intid) - continue; - - /* - * This increases the refcount, the caller is expected to - * call vgic_put_irq() later once it's finished with the IRQ. - */ - vgic_get_irq_kref(irq); - goto out_unlock; - } - irq = NULL; - -out_unlock: - spin_unlock(&dist->lpi_list_lock); - - return irq; -} - -/* - * This looks up the virtual interrupt ID to get the corresponding - * struct vgic_irq. It also increases the refcount, so any caller is expected - * to call vgic_put_irq() once it's finished with this IRQ. - */ -struct vgic_irq *vgic_get_irq(struct kvm *kvm, struct kvm_vcpu *vcpu, - u32 intid) -{ - /* SGIs and PPIs */ - if (intid <= VGIC_MAX_PRIVATE) - return &vcpu->arch.vgic_cpu.private_irqs[intid]; - - /* SPIs */ - if (intid <= VGIC_MAX_SPI) - return &kvm->arch.vgic.spis[intid - VGIC_NR_PRIVATE_IRQS]; - - /* LPIs */ - if (intid >= VGIC_MIN_LPI) - return vgic_get_lpi(kvm, intid); - - WARN(1, "Looking up struct vgic_irq for reserved INTID"); - return NULL; -} - -/* - * We can't do anything in here, because we lack the kvm pointer to - * lock and remove the item from the lpi_list. So we keep this function - * empty and use the return value of kref_put() to trigger the freeing. - */ -static void vgic_irq_release(struct kref *ref) -{ -} - -void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq) -{ - struct vgic_dist *dist = &kvm->arch.vgic; - - if (irq->intid < VGIC_MIN_LPI) - return; - - spin_lock(&dist->lpi_list_lock); - if (!kref_put(&irq->refcount, vgic_irq_release)) { - spin_unlock(&dist->lpi_list_lock); - return; - }; - - list_del(&irq->lpi_list); - dist->lpi_list_count--; - spin_unlock(&dist->lpi_list_lock); - - kfree(irq); -} - -/** - * kvm_vgic_target_oracle - compute the target vcpu for an irq - * - * @irq: The irq to route. Must be already locked. - * - * Based on the current state of the interrupt (enabled, pending, - * active, vcpu and target_vcpu), compute the next vcpu this should be - * given to. Return NULL if this shouldn't be injected at all. - * - * Requires the IRQ lock to be held. - */ -static struct kvm_vcpu *vgic_target_oracle(struct vgic_irq *irq) -{ - DEBUG_SPINLOCK_BUG_ON(!spin_is_locked(&irq->irq_lock)); - - /* If the interrupt is active, it must stay on the current vcpu */ - if (irq->active) - return irq->vcpu ? : irq->target_vcpu; - - /* - * If the IRQ is not active but enabled and pending, we should direct - * it to its configured target VCPU. - * If the distributor is disabled, pending interrupts shouldn't be - * forwarded. - */ - if (irq->enabled && irq->pending) { - if (unlikely(irq->target_vcpu && - !irq->target_vcpu->kvm->arch.vgic.enabled)) - return NULL; - - return irq->target_vcpu; - } - - /* If neither active nor pending and enabled, then this IRQ should not - * be queued to any VCPU. - */ - return NULL; -} - -/* - * The order of items in the ap_lists defines how we'll pack things in LRs as - * well, the first items in the list being the first things populated in the - * LRs. - * - * A hard rule is that active interrupts can never be pushed out of the LRs - * (and therefore take priority) since we cannot reliably trap on deactivation - * of IRQs and therefore they have to be present in the LRs. - * - * Otherwise things should be sorted by the priority field and the GIC - * hardware support will take care of preemption of priority groups etc. - * - * Return negative if "a" sorts before "b", 0 to preserve order, and positive - * to sort "b" before "a". - */ -static int vgic_irq_cmp(void *priv, struct list_head *a, struct list_head *b) -{ - struct vgic_irq *irqa = container_of(a, struct vgic_irq, ap_list); - struct vgic_irq *irqb = container_of(b, struct vgic_irq, ap_list); - bool penda, pendb; - int ret; - - spin_lock(&irqa->irq_lock); - spin_lock_nested(&irqb->irq_lock, SINGLE_DEPTH_NESTING); - - if (irqa->active || irqb->active) { - ret = (int)irqb->active - (int)irqa->active; - goto out; - } - - penda = irqa->enabled && irqa->pending; - pendb = irqb->enabled && irqb->pending; - - if (!penda || !pendb) { - ret = (int)pendb - (int)penda; - goto out; - } - - /* Both pending and enabled, sort by priority */ - ret = irqa->priority - irqb->priority; -out: - spin_unlock(&irqb->irq_lock); - spin_unlock(&irqa->irq_lock); - return ret; -} - -/* Must be called with the ap_list_lock held */ -static void vgic_sort_ap_list(struct kvm_vcpu *vcpu) -{ - struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; - - DEBUG_SPINLOCK_BUG_ON(!spin_is_locked(&vgic_cpu->ap_list_lock)); - - list_sort(NULL, &vgic_cpu->ap_list_head, vgic_irq_cmp); -} - -/* - * Only valid injection if changing level for level-triggered IRQs or for a - * rising edge. - */ -static bool vgic_validate_injection(struct vgic_irq *irq, bool level) -{ - switch (irq->config) { - case VGIC_CONFIG_LEVEL: - return irq->line_level != level; - case VGIC_CONFIG_EDGE: - return level; - } - - return false; -} - -/* - * Check whether an IRQ needs to (and can) be queued to a VCPU's ap list. - * Do the queuing if necessary, taking the right locks in the right order. - * Returns true when the IRQ was queued, false otherwise. - * - * Needs to be entered with the IRQ lock already held, but will return - * with all locks dropped. - */ -bool vgic_queue_irq_unlock(struct kvm *kvm, struct vgic_irq *irq) -{ - struct kvm_vcpu *vcpu; - - DEBUG_SPINLOCK_BUG_ON(!spin_is_locked(&irq->irq_lock)); - -retry: - vcpu = vgic_target_oracle(irq); - if (irq->vcpu || !vcpu) { - /* - * If this IRQ is already on a VCPU's ap_list, then it - * cannot be moved or modified and there is no more work for - * us to do. - * - * Otherwise, if the irq is not pending and enabled, it does - * not need to be inserted into an ap_list and there is also - * no more work for us to do. - */ - spin_unlock(&irq->irq_lock); - - /* - * We have to kick the VCPU here, because we could be - * queueing an edge-triggered interrupt for which we - * get no EOI maintenance interrupt. In that case, - * while the IRQ is already on the VCPU's AP list, the - * VCPU could have EOI'ed the original interrupt and - * won't see this one until it exits for some other - * reason. - */ - if (vcpu) - kvm_vcpu_kick(vcpu); - return false; - } - - /* - * We must unlock the irq lock to take the ap_list_lock where - * we are going to insert this new pending interrupt. - */ - spin_unlock(&irq->irq_lock); - - /* someone can do stuff here, which we re-check below */ - - spin_lock(&vcpu->arch.vgic_cpu.ap_list_lock); - spin_lock(&irq->irq_lock); - - /* - * Did something change behind our backs? - * - * There are two cases: - * 1) The irq lost its pending state or was disabled behind our - * backs and/or it was queued to another VCPU's ap_list. - * 2) Someone changed the affinity on this irq behind our - * backs and we are now holding the wrong ap_list_lock. - * - * In both cases, drop the locks and retry. - */ - - if (unlikely(irq->vcpu || vcpu != vgic_target_oracle(irq))) { - spin_unlock(&irq->irq_lock); - spin_unlock(&vcpu->arch.vgic_cpu.ap_list_lock); - - spin_lock(&irq->irq_lock); - goto retry; - } - - /* - * Grab a reference to the irq to reflect the fact that it is - * now in the ap_list. - */ - vgic_get_irq_kref(irq); - list_add_tail(&irq->ap_list, &vcpu->arch.vgic_cpu.ap_list_head); - irq->vcpu = vcpu; - - spin_unlock(&irq->irq_lock); - spin_unlock(&vcpu->arch.vgic_cpu.ap_list_lock); - - kvm_vcpu_kick(vcpu); - - return true; -} - -static int vgic_update_irq_pending(struct kvm *kvm, int cpuid, - unsigned int intid, bool level, - bool mapped_irq) -{ - struct kvm_vcpu *vcpu; - struct vgic_irq *irq; - int ret; - - trace_vgic_update_irq_pending(cpuid, intid, level); - - ret = vgic_lazy_init(kvm); - if (ret) - return ret; - - vcpu = kvm_get_vcpu(kvm, cpuid); - if (!vcpu && intid < VGIC_NR_PRIVATE_IRQS) - return -EINVAL; - - irq = vgic_get_irq(kvm, vcpu, intid); - if (!irq) - return -EINVAL; - - if (irq->hw != mapped_irq) { - vgic_put_irq(kvm, irq); - return -EINVAL; - } - - spin_lock(&irq->irq_lock); - - if (!vgic_validate_injection(irq, level)) { - /* Nothing to see here, move along... */ - spin_unlock(&irq->irq_lock); - vgic_put_irq(kvm, irq); - return 0; - } - - if (irq->config == VGIC_CONFIG_LEVEL) { - irq->line_level = level; - irq->pending = level || irq->soft_pending; - } else { - irq->pending = true; - } - - vgic_queue_irq_unlock(kvm, irq); - vgic_put_irq(kvm, irq); - - return 0; -} - -/** - * kvm_vgic_inject_irq - Inject an IRQ from a device to the vgic - * @kvm: The VM structure pointer - * @cpuid: The CPU for PPIs - * @intid: The INTID to inject a new state to. - * @level: Edge-triggered: true: to trigger the interrupt - * false: to ignore the call - * Level-sensitive true: raise the input signal - * false: lower the input signal - * - * The VGIC is not concerned with devices being active-LOW or active-HIGH for - * level-sensitive interrupts. You can think of the level parameter as 1 - * being HIGH and 0 being LOW and all devices being active-HIGH. - */ -int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int intid, - bool level) -{ - return vgic_update_irq_pending(kvm, cpuid, intid, level, false); -} - -int kvm_vgic_inject_mapped_irq(struct kvm *kvm, int cpuid, unsigned int intid, - bool level) -{ - return vgic_update_irq_pending(kvm, cpuid, intid, level, true); -} - -int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, u32 virt_irq, u32 phys_irq) -{ - struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, virt_irq); - - BUG_ON(!irq); - - spin_lock(&irq->irq_lock); - - irq->hw = true; - irq->hwintid = phys_irq; - - spin_unlock(&irq->irq_lock); - vgic_put_irq(vcpu->kvm, irq); - - return 0; -} - -int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int virt_irq) -{ - struct vgic_irq *irq; - - if (!vgic_initialized(vcpu->kvm)) - return -EAGAIN; - - irq = vgic_get_irq(vcpu->kvm, vcpu, virt_irq); - BUG_ON(!irq); - - spin_lock(&irq->irq_lock); - - irq->hw = false; - irq->hwintid = 0; - - spin_unlock(&irq->irq_lock); - vgic_put_irq(vcpu->kvm, irq); - - return 0; -} - -/** - * vgic_prune_ap_list - Remove non-relevant interrupts from the list - * - * @vcpu: The VCPU pointer - * - * Go over the list of "interesting" interrupts, and prune those that we - * won't have to consider in the near future. - */ -static void vgic_prune_ap_list(struct kvm_vcpu *vcpu) -{ - struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; - struct vgic_irq *irq, *tmp; - -retry: - spin_lock(&vgic_cpu->ap_list_lock); - - list_for_each_entry_safe(irq, tmp, &vgic_cpu->ap_list_head, ap_list) { - struct kvm_vcpu *target_vcpu, *vcpuA, *vcpuB; - - spin_lock(&irq->irq_lock); - - BUG_ON(vcpu != irq->vcpu); - - target_vcpu = vgic_target_oracle(irq); - - if (!target_vcpu) { - /* - * We don't need to process this interrupt any - * further, move it off the list. - */ - list_del(&irq->ap_list); - irq->vcpu = NULL; - spin_unlock(&irq->irq_lock); - - /* - * This vgic_put_irq call matches the - * vgic_get_irq_kref in vgic_queue_irq_unlock, - * where we added the LPI to the ap_list. As - * we remove the irq from the list, we drop - * also drop the refcount. - */ - vgic_put_irq(vcpu->kvm, irq); - continue; - } - - if (target_vcpu == vcpu) { - /* We're on the right CPU */ - spin_unlock(&irq->irq_lock); - continue; - } - - /* This interrupt looks like it has to be migrated. */ - - spin_unlock(&irq->irq_lock); - spin_unlock(&vgic_cpu->ap_list_lock); - - /* - * Ensure locking order by always locking the smallest - * ID first. - */ - if (vcpu->vcpu_id < target_vcpu->vcpu_id) { - vcpuA = vcpu; - vcpuB = target_vcpu; - } else { - vcpuA = target_vcpu; - vcpuB = vcpu; - } - - spin_lock(&vcpuA->arch.vgic_cpu.ap_list_lock); - spin_lock_nested(&vcpuB->arch.vgic_cpu.ap_list_lock, - SINGLE_DEPTH_NESTING); - spin_lock(&irq->irq_lock); - - /* - * If the affinity has been preserved, move the - * interrupt around. Otherwise, it means things have - * changed while the interrupt was unlocked, and we - * need to replay this. - * - * In all cases, we cannot trust the list not to have - * changed, so we restart from the beginning. - */ - if (target_vcpu == vgic_target_oracle(irq)) { - struct vgic_cpu *new_cpu = &target_vcpu->arch.vgic_cpu; - - list_del(&irq->ap_list); - irq->vcpu = target_vcpu; - list_add_tail(&irq->ap_list, &new_cpu->ap_list_head); - } - - spin_unlock(&irq->irq_lock); - spin_unlock(&vcpuB->arch.vgic_cpu.ap_list_lock); - spin_unlock(&vcpuA->arch.vgic_cpu.ap_list_lock); - goto retry; - } - - spin_unlock(&vgic_cpu->ap_list_lock); -} - -static inline void vgic_process_maintenance_interrupt(struct kvm_vcpu *vcpu) -{ - if (kvm_vgic_global_state.type == VGIC_V2) - vgic_v2_process_maintenance(vcpu); - else - vgic_v3_process_maintenance(vcpu); -} - -static inline void vgic_fold_lr_state(struct kvm_vcpu *vcpu) -{ - if (kvm_vgic_global_state.type == VGIC_V2) - vgic_v2_fold_lr_state(vcpu); - else - vgic_v3_fold_lr_state(vcpu); -} - -/* Requires the irq_lock to be held. */ -static inline void vgic_populate_lr(struct kvm_vcpu *vcpu, - struct vgic_irq *irq, int lr) -{ - DEBUG_SPINLOCK_BUG_ON(!spin_is_locked(&irq->irq_lock)); - - if (kvm_vgic_global_state.type == VGIC_V2) - vgic_v2_populate_lr(vcpu, irq, lr); - else - vgic_v3_populate_lr(vcpu, irq, lr); -} - -static inline void vgic_clear_lr(struct kvm_vcpu *vcpu, int lr) -{ - if (kvm_vgic_global_state.type == VGIC_V2) - vgic_v2_clear_lr(vcpu, lr); - else - vgic_v3_clear_lr(vcpu, lr); -} - -static inline void vgic_set_underflow(struct kvm_vcpu *vcpu) -{ - if (kvm_vgic_global_state.type == VGIC_V2) - vgic_v2_set_underflow(vcpu); - else - vgic_v3_set_underflow(vcpu); -} - -/* Requires the ap_list_lock to be held. */ -static int compute_ap_list_depth(struct kvm_vcpu *vcpu) -{ - struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; - struct vgic_irq *irq; - int count = 0; - - DEBUG_SPINLOCK_BUG_ON(!spin_is_locked(&vgic_cpu->ap_list_lock)); - - list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) { - spin_lock(&irq->irq_lock); - /* GICv2 SGIs can count for more than one... */ - if (vgic_irq_is_sgi(irq->intid) && irq->source) - count += hweight8(irq->source); - else - count++; - spin_unlock(&irq->irq_lock); - } - return count; -} - -/* Requires the VCPU's ap_list_lock to be held. */ -static void vgic_flush_lr_state(struct kvm_vcpu *vcpu) -{ - struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; - struct vgic_irq *irq; - int count = 0; - - DEBUG_SPINLOCK_BUG_ON(!spin_is_locked(&vgic_cpu->ap_list_lock)); - - if (compute_ap_list_depth(vcpu) > kvm_vgic_global_state.nr_lr) { - vgic_set_underflow(vcpu); - vgic_sort_ap_list(vcpu); - } - - list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) { - spin_lock(&irq->irq_lock); - - if (unlikely(vgic_target_oracle(irq) != vcpu)) - goto next; - - /* - * If we get an SGI with multiple sources, try to get - * them in all at once. - */ - do { - vgic_populate_lr(vcpu, irq, count++); - } while (irq->source && count < kvm_vgic_global_state.nr_lr); - -next: - spin_unlock(&irq->irq_lock); - - if (count == kvm_vgic_global_state.nr_lr) - break; - } - - vcpu->arch.vgic_cpu.used_lrs = count; - - /* Nuke remaining LRs */ - for ( ; count < kvm_vgic_global_state.nr_lr; count++) - vgic_clear_lr(vcpu, count); -} - -/* Sync back the hardware VGIC state into our emulation after a guest's run. */ -void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) -{ - if (unlikely(!vgic_initialized(vcpu->kvm))) - return; - - vgic_process_maintenance_interrupt(vcpu); - vgic_fold_lr_state(vcpu); - vgic_prune_ap_list(vcpu); -} - -/* Flush our emulation state into the GIC hardware before entering the guest. */ -void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu) -{ - if (unlikely(!vgic_initialized(vcpu->kvm))) - return; - - spin_lock(&vcpu->arch.vgic_cpu.ap_list_lock); - vgic_flush_lr_state(vcpu); - spin_unlock(&vcpu->arch.vgic_cpu.ap_list_lock); -} - -int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu) -{ - struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; - struct vgic_irq *irq; - bool pending = false; - - if (!vcpu->kvm->arch.vgic.enabled) - return false; - - spin_lock(&vgic_cpu->ap_list_lock); - - list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) { - spin_lock(&irq->irq_lock); - pending = irq->pending && irq->enabled; - spin_unlock(&irq->irq_lock); - - if (pending) - break; - } - - spin_unlock(&vgic_cpu->ap_list_lock); - - return pending; -} - -void vgic_kick_vcpus(struct kvm *kvm) -{ - struct kvm_vcpu *vcpu; - int c; - - /* - * We've injected an interrupt, time to find out who deserves - * a good kick... - */ - kvm_for_each_vcpu(c, vcpu, kvm) { - if (kvm_vgic_vcpu_pending_irq(vcpu)) - kvm_vcpu_kick(vcpu); - } -} - -bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, unsigned int virt_irq) -{ - struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, virt_irq); - bool map_is_active; - - spin_lock(&irq->irq_lock); - map_is_active = irq->hw && irq->active; - spin_unlock(&irq->irq_lock); - vgic_put_irq(vcpu->kvm, irq); - - return map_is_active; -} - diff --git a/virt/kvm/arm/vgic/vgic.h b/virt/kvm/arm/vgic/vgic.h deleted file mode 100644 index 9d9e014..0000000 --- a/virt/kvm/arm/vgic/vgic.h +++ /dev/null @@ -1,123 +0,0 @@ -/* - * Copyright (C) 2015, 2016 ARM Ltd. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - */ -#ifndef __KVM_ARM_VGIC_NEW_H__ -#define __KVM_ARM_VGIC_NEW_H__ - -#include <linux/irqchip/arm-gic-common.h> - -#define PRODUCT_ID_KVM 0x4b /* ASCII code K */ -#define IMPLEMENTER_ARM 0x43b - -#define VGIC_ADDR_UNDEF (-1) -#define IS_VGIC_ADDR_UNDEF(_x) ((_x) == VGIC_ADDR_UNDEF) - -#define INTERRUPT_ID_BITS_SPIS 10 -#define INTERRUPT_ID_BITS_ITS 16 -#define VGIC_PRI_BITS 5 - -#define vgic_irq_is_sgi(intid) ((intid) < VGIC_NR_SGIS) - -struct vgic_vmcr { - u32 ctlr; - u32 abpr; - u32 bpr; - u32 pmr; -}; - -struct vgic_irq *vgic_get_irq(struct kvm *kvm, struct kvm_vcpu *vcpu, - u32 intid); -void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq); -bool vgic_queue_irq_unlock(struct kvm *kvm, struct vgic_irq *irq); -void vgic_kick_vcpus(struct kvm *kvm); - -int vgic_check_ioaddr(struct kvm *kvm, phys_addr_t *ioaddr, - phys_addr_t addr, phys_addr_t alignment); - -void vgic_v2_process_maintenance(struct kvm_vcpu *vcpu); -void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu); -void vgic_v2_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr); -void vgic_v2_clear_lr(struct kvm_vcpu *vcpu, int lr); -void vgic_v2_set_underflow(struct kvm_vcpu *vcpu); -int vgic_v2_has_attr_regs(struct kvm_device *dev, struct kvm_device_attr *attr); -int vgic_v2_dist_uaccess(struct kvm_vcpu *vcpu, bool is_write, - int offset, u32 *val); -int vgic_v2_cpuif_uaccess(struct kvm_vcpu *vcpu, bool is_write, - int offset, u32 *val); -void vgic_v2_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr); -void vgic_v2_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr); -void vgic_v2_enable(struct kvm_vcpu *vcpu); -int vgic_v2_probe(const struct gic_kvm_info *info); -int vgic_v2_map_resources(struct kvm *kvm); -int vgic_register_dist_iodev(struct kvm *kvm, gpa_t dist_base_address, - enum vgic_type); - -static inline void vgic_get_irq_kref(struct vgic_irq *irq) -{ - if (irq->intid < VGIC_MIN_LPI) - return; - - kref_get(&irq->refcount); -} - -void vgic_v3_process_maintenance(struct kvm_vcpu *vcpu); -void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu); -void vgic_v3_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr); -void vgic_v3_clear_lr(struct kvm_vcpu *vcpu, int lr); -void vgic_v3_set_underflow(struct kvm_vcpu *vcpu); -void vgic_v3_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr); -void vgic_v3_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr); -void vgic_v3_enable(struct kvm_vcpu *vcpu); -int vgic_v3_probe(const struct gic_kvm_info *info); -int vgic_v3_map_resources(struct kvm *kvm); -int vgic_register_redist_iodevs(struct kvm *kvm, gpa_t dist_base_address); - -#ifdef CONFIG_KVM_ARM_VGIC_V3_ITS -int vgic_register_its_iodevs(struct kvm *kvm); -bool vgic_has_its(struct kvm *kvm); -int kvm_vgic_register_its_device(void); -void vgic_enable_lpis(struct kvm_vcpu *vcpu); -int vgic_its_inject_msi(struct kvm *kvm, struct kvm_msi *msi); -#else -static inline int vgic_register_its_iodevs(struct kvm *kvm) -{ - return -ENODEV; -} - -static inline bool vgic_has_its(struct kvm *kvm) -{ - return false; -} - -static inline int kvm_vgic_register_its_device(void) -{ - return -ENODEV; -} - -static inline void vgic_enable_lpis(struct kvm_vcpu *vcpu) -{ -} - -static inline int vgic_its_inject_msi(struct kvm *kvm, struct kvm_msi *msi) -{ - return -ENODEV; -} -#endif - -int kvm_register_vgic_device(unsigned long type); -int vgic_lazy_init(struct kvm *kvm); -int vgic_init(struct kvm *kvm); - -#endif diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c deleted file mode 100644 index efeceb0..0000000 --- a/virt/kvm/async_pf.c +++ /dev/null @@ -1,246 +0,0 @@ -/* - * kvm asynchronous fault support - * - * Copyright 2010 Red Hat, Inc. - * - * Author: - * Gleb Natapov <gleb@redhat.com> - * - * This file is free software; you can redistribute it and/or modify - * it under the terms of version 2 of the GNU General Public License - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. - */ - -#include <linux/kvm_host.h> -#include <linux/slab.h> -#include <linux/module.h> -#include <linux/mmu_context.h> - -#include "async_pf.h" -#include <trace/events/kvm.h> - -static inline void kvm_async_page_present_sync(struct kvm_vcpu *vcpu, - struct kvm_async_pf *work) -{ -#ifdef CONFIG_KVM_ASYNC_PF_SYNC - kvm_arch_async_page_present(vcpu, work); -#endif -} -static inline void kvm_async_page_present_async(struct kvm_vcpu *vcpu, - struct kvm_async_pf *work) -{ -#ifndef CONFIG_KVM_ASYNC_PF_SYNC - kvm_arch_async_page_present(vcpu, work); -#endif -} - -static struct kmem_cache *async_pf_cache; - -int kvm_async_pf_init(void) -{ - async_pf_cache = KMEM_CACHE(kvm_async_pf, 0); - - if (!async_pf_cache) - return -ENOMEM; - - return 0; -} - -void kvm_async_pf_deinit(void) -{ - kmem_cache_destroy(async_pf_cache); - async_pf_cache = NULL; -} - -void kvm_async_pf_vcpu_init(struct kvm_vcpu *vcpu) -{ - INIT_LIST_HEAD(&vcpu->async_pf.done); - INIT_LIST_HEAD(&vcpu->async_pf.queue); - spin_lock_init(&vcpu->async_pf.lock); -} - -static void async_pf_execute(struct work_struct *work) -{ - struct kvm_async_pf *apf = - container_of(work, struct kvm_async_pf, work); - struct mm_struct *mm = apf->mm; - struct kvm_vcpu *vcpu = apf->vcpu; - unsigned long addr = apf->addr; - gva_t gva = apf->gva; - - might_sleep(); - - /* - * This work is run asynchromously to the task which owns - * mm and might be done in another context, so we must - * use FOLL_REMOTE. - */ - __get_user_pages_unlocked(NULL, mm, addr, 1, NULL, - FOLL_WRITE | FOLL_REMOTE); - - kvm_async_page_present_sync(vcpu, apf); - - spin_lock(&vcpu->async_pf.lock); - list_add_tail(&apf->link, &vcpu->async_pf.done); - apf->vcpu = NULL; - spin_unlock(&vcpu->async_pf.lock); - - /* - * apf may be freed by kvm_check_async_pf_completion() after - * this point - */ - - trace_kvm_async_pf_completed(addr, gva); - - /* - * This memory barrier pairs with prepare_to_wait's set_current_state() - */ - smp_mb(); - if (swait_active(&vcpu->wq)) - swake_up(&vcpu->wq); - - mmput(mm); - kvm_put_kvm(vcpu->kvm); -} - -void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu) -{ - spin_lock(&vcpu->async_pf.lock); - - /* cancel outstanding work queue item */ - while (!list_empty(&vcpu->async_pf.queue)) { - struct kvm_async_pf *work = - list_first_entry(&vcpu->async_pf.queue, - typeof(*work), queue); - list_del(&work->queue); - - /* - * We know it's present in vcpu->async_pf.done, do - * nothing here. - */ - if (!work->vcpu) - continue; - - spin_unlock(&vcpu->async_pf.lock); -#ifdef CONFIG_KVM_ASYNC_PF_SYNC - flush_work(&work->work); -#else - if (cancel_work_sync(&work->work)) { - mmput(work->mm); - kvm_put_kvm(vcpu->kvm); /* == work->vcpu->kvm */ - kmem_cache_free(async_pf_cache, work); - } -#endif - spin_lock(&vcpu->async_pf.lock); - } - - while (!list_empty(&vcpu->async_pf.done)) { - struct kvm_async_pf *work = - list_first_entry(&vcpu->async_pf.done, - typeof(*work), link); - list_del(&work->link); - kmem_cache_free(async_pf_cache, work); - } - spin_unlock(&vcpu->async_pf.lock); - - vcpu->async_pf.queued = 0; -} - -void kvm_check_async_pf_completion(struct kvm_vcpu *vcpu) -{ - struct kvm_async_pf *work; - - while (!list_empty_careful(&vcpu->async_pf.done) && - kvm_arch_can_inject_async_page_present(vcpu)) { - spin_lock(&vcpu->async_pf.lock); - work = list_first_entry(&vcpu->async_pf.done, typeof(*work), - link); - list_del(&work->link); - spin_unlock(&vcpu->async_pf.lock); - - kvm_arch_async_page_ready(vcpu, work); - kvm_async_page_present_async(vcpu, work); - - list_del(&work->queue); - vcpu->async_pf.queued--; - kmem_cache_free(async_pf_cache, work); - } -} - -int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, unsigned long hva, - struct kvm_arch_async_pf *arch) -{ - struct kvm_async_pf *work; - - if (vcpu->async_pf.queued >= ASYNC_PF_PER_VCPU) - return 0; - - /* setup delayed work */ - - /* - * do alloc nowait since if we are going to sleep anyway we - * may as well sleep faulting in page - */ - work = kmem_cache_zalloc(async_pf_cache, GFP_NOWAIT | __GFP_NOWARN); - if (!work) - return 0; - - work->wakeup_all = false; - work->vcpu = vcpu; - work->gva = gva; - work->addr = hva; - work->arch = *arch; - work->mm = current->mm; - atomic_inc(&work->mm->mm_users); - kvm_get_kvm(work->vcpu->kvm); - - /* this can't really happen otherwise gfn_to_pfn_async - would succeed */ - if (unlikely(kvm_is_error_hva(work->addr))) - goto retry_sync; - - INIT_WORK(&work->work, async_pf_execute); - if (!schedule_work(&work->work)) - goto retry_sync; - - list_add_tail(&work->queue, &vcpu->async_pf.queue); - vcpu->async_pf.queued++; - kvm_arch_async_page_not_present(vcpu, work); - return 1; -retry_sync: - kvm_put_kvm(work->vcpu->kvm); - mmput(work->mm); - kmem_cache_free(async_pf_cache, work); - return 0; -} - -int kvm_async_pf_wakeup_all(struct kvm_vcpu *vcpu) -{ - struct kvm_async_pf *work; - - if (!list_empty_careful(&vcpu->async_pf.done)) - return 0; - - work = kmem_cache_zalloc(async_pf_cache, GFP_ATOMIC); - if (!work) - return -ENOMEM; - - work->wakeup_all = true; - INIT_LIST_HEAD(&work->queue); /* for list_del to work */ - - spin_lock(&vcpu->async_pf.lock); - list_add_tail(&work->link, &vcpu->async_pf.done); - spin_unlock(&vcpu->async_pf.lock); - - vcpu->async_pf.queued++; - return 0; -} diff --git a/virt/kvm/async_pf.h b/virt/kvm/async_pf.h deleted file mode 100644 index ec4cfa2..0000000 --- a/virt/kvm/async_pf.h +++ /dev/null @@ -1,36 +0,0 @@ -/* - * kvm asynchronous fault support - * - * Copyright 2010 Red Hat, Inc. - * - * Author: - * Gleb Natapov <gleb@redhat.com> - * - * This file is free software; you can redistribute it and/or modify - * it under the terms of version 2 of the GNU General Public License - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. - */ - -#ifndef __KVM_ASYNC_PF_H__ -#define __KVM_ASYNC_PF_H__ - -#ifdef CONFIG_KVM_ASYNC_PF -int kvm_async_pf_init(void); -void kvm_async_pf_deinit(void); -void kvm_async_pf_vcpu_init(struct kvm_vcpu *vcpu); -#else -#define kvm_async_pf_init() (0) -#define kvm_async_pf_deinit() do {} while (0) -#define kvm_async_pf_vcpu_init(C) do {} while (0) -#endif - -#endif diff --git a/virt/kvm/coalesced_mmio.c b/virt/kvm/coalesced_mmio.c deleted file mode 100644 index 571c1ce..0000000 --- a/virt/kvm/coalesced_mmio.c +++ /dev/null @@ -1,183 +0,0 @@ -/* - * KVM coalesced MMIO - * - * Copyright (c) 2008 Bull S.A.S. - * Copyright 2009 Red Hat, Inc. and/or its affiliates. - * - * Author: Laurent Vivier <Laurent.Vivier@bull.net> - * - */ - -#include <kvm/iodev.h> - -#include <linux/kvm_host.h> -#include <linux/slab.h> -#include <linux/kvm.h> - -#include "coalesced_mmio.h" - -static inline struct kvm_coalesced_mmio_dev *to_mmio(struct kvm_io_device *dev) -{ - return container_of(dev, struct kvm_coalesced_mmio_dev, dev); -} - -static int coalesced_mmio_in_range(struct kvm_coalesced_mmio_dev *dev, - gpa_t addr, int len) -{ - /* is it in a batchable area ? - * (addr,len) is fully included in - * (zone->addr, zone->size) - */ - if (len < 0) - return 0; - if (addr + len < addr) - return 0; - if (addr < dev->zone.addr) - return 0; - if (addr + len > dev->zone.addr + dev->zone.size) - return 0; - return 1; -} - -static int coalesced_mmio_has_room(struct kvm_coalesced_mmio_dev *dev) -{ - struct kvm_coalesced_mmio_ring *ring; - unsigned avail; - - /* Are we able to batch it ? */ - - /* last is the first free entry - * check if we don't meet the first used entry - * there is always one unused entry in the buffer - */ - ring = dev->kvm->coalesced_mmio_ring; - avail = (ring->first - ring->last - 1) % KVM_COALESCED_MMIO_MAX; - if (avail == 0) { - /* full */ - return 0; - } - - return 1; -} - -static int coalesced_mmio_write(struct kvm_vcpu *vcpu, - struct kvm_io_device *this, gpa_t addr, - int len, const void *val) -{ - struct kvm_coalesced_mmio_dev *dev = to_mmio(this); - struct kvm_coalesced_mmio_ring *ring = dev->kvm->coalesced_mmio_ring; - - if (!coalesced_mmio_in_range(dev, addr, len)) - return -EOPNOTSUPP; - - spin_lock(&dev->kvm->ring_lock); - - if (!coalesced_mmio_has_room(dev)) { - spin_unlock(&dev->kvm->ring_lock); - return -EOPNOTSUPP; - } - - /* copy data in first free entry of the ring */ - - ring->coalesced_mmio[ring->last].phys_addr = addr; - ring->coalesced_mmio[ring->last].len = len; - memcpy(ring->coalesced_mmio[ring->last].data, val, len); - smp_wmb(); - ring->last = (ring->last + 1) % KVM_COALESCED_MMIO_MAX; - spin_unlock(&dev->kvm->ring_lock); - return 0; -} - -static void coalesced_mmio_destructor(struct kvm_io_device *this) -{ - struct kvm_coalesced_mmio_dev *dev = to_mmio(this); - - list_del(&dev->list); - - kfree(dev); -} - -static const struct kvm_io_device_ops coalesced_mmio_ops = { - .write = coalesced_mmio_write, - .destructor = coalesced_mmio_destructor, -}; - -int kvm_coalesced_mmio_init(struct kvm *kvm) -{ - struct page *page; - int ret; - - ret = -ENOMEM; - page = alloc_page(GFP_KERNEL | __GFP_ZERO); - if (!page) - goto out_err; - - ret = 0; - kvm->coalesced_mmio_ring = page_address(page); - - /* - * We're using this spinlock to sync access to the coalesced ring. - * The list doesn't need it's own lock since device registration and - * unregistration should only happen when kvm->slots_lock is held. - */ - spin_lock_init(&kvm->ring_lock); - INIT_LIST_HEAD(&kvm->coalesced_zones); - -out_err: - return ret; -} - -void kvm_coalesced_mmio_free(struct kvm *kvm) -{ - if (kvm->coalesced_mmio_ring) - free_page((unsigned long)kvm->coalesced_mmio_ring); -} - -int kvm_vm_ioctl_register_coalesced_mmio(struct kvm *kvm, - struct kvm_coalesced_mmio_zone *zone) -{ - int ret; - struct kvm_coalesced_mmio_dev *dev; - - dev = kzalloc(sizeof(struct kvm_coalesced_mmio_dev), GFP_KERNEL); - if (!dev) - return -ENOMEM; - - kvm_iodevice_init(&dev->dev, &coalesced_mmio_ops); - dev->kvm = kvm; - dev->zone = *zone; - - mutex_lock(&kvm->slots_lock); - ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, zone->addr, - zone->size, &dev->dev); - if (ret < 0) - goto out_free_dev; - list_add_tail(&dev->list, &kvm->coalesced_zones); - mutex_unlock(&kvm->slots_lock); - - return 0; - -out_free_dev: - mutex_unlock(&kvm->slots_lock); - kfree(dev); - - return ret; -} - -int kvm_vm_ioctl_unregister_coalesced_mmio(struct kvm *kvm, - struct kvm_coalesced_mmio_zone *zone) -{ - struct kvm_coalesced_mmio_dev *dev, *tmp; - - mutex_lock(&kvm->slots_lock); - - list_for_each_entry_safe(dev, tmp, &kvm->coalesced_zones, list) - if (coalesced_mmio_in_range(dev, zone->addr, zone->size)) { - kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, &dev->dev); - kvm_iodevice_destructor(&dev->dev); - } - - mutex_unlock(&kvm->slots_lock); - - return 0; -} diff --git a/virt/kvm/coalesced_mmio.h b/virt/kvm/coalesced_mmio.h deleted file mode 100644 index 6bca74c..0000000 --- a/virt/kvm/coalesced_mmio.h +++ /dev/null @@ -1,38 +0,0 @@ -#ifndef __KVM_COALESCED_MMIO_H__ -#define __KVM_COALESCED_MMIO_H__ - -/* - * KVM coalesced MMIO - * - * Copyright (c) 2008 Bull S.A.S. - * - * Author: Laurent Vivier <Laurent.Vivier@bull.net> - * - */ - -#ifdef CONFIG_KVM_MMIO - -#include <linux/list.h> - -struct kvm_coalesced_mmio_dev { - struct list_head list; - struct kvm_io_device dev; - struct kvm *kvm; - struct kvm_coalesced_mmio_zone zone; -}; - -int kvm_coalesced_mmio_init(struct kvm *kvm); -void kvm_coalesced_mmio_free(struct kvm *kvm); -int kvm_vm_ioctl_register_coalesced_mmio(struct kvm *kvm, - struct kvm_coalesced_mmio_zone *zone); -int kvm_vm_ioctl_unregister_coalesced_mmio(struct kvm *kvm, - struct kvm_coalesced_mmio_zone *zone); - -#else - -static inline int kvm_coalesced_mmio_init(struct kvm *kvm) { return 0; } -static inline void kvm_coalesced_mmio_free(struct kvm *kvm) { } - -#endif - -#endif diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c deleted file mode 100644 index a29786d..0000000 --- a/virt/kvm/eventfd.c +++ /dev/null @@ -1,956 +0,0 @@ -/* - * kvm eventfd support - use eventfd objects to signal various KVM events - * - * Copyright 2009 Novell. All Rights Reserved. - * Copyright 2010 Red Hat, Inc. and/or its affiliates. - * - * Author: - * Gregory Haskins <ghaskins@novell.com> - * - * This file is free software; you can redistribute it and/or modify - * it under the terms of version 2 of the GNU General Public License - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. - */ - -#include <linux/kvm_host.h> -#include <linux/kvm.h> -#include <linux/kvm_irqfd.h> -#include <linux/workqueue.h> -#include <linux/syscalls.h> -#include <linux/wait.h> -#include <linux/poll.h> -#include <linux/file.h> -#include <linux/list.h> -#include <linux/eventfd.h> -#include <linux/kernel.h> -#include <linux/srcu.h> -#include <linux/slab.h> -#include <linux/seqlock.h> -#include <linux/irqbypass.h> -#include <trace/events/kvm.h> - -#include <kvm/iodev.h> - -#ifdef CONFIG_HAVE_KVM_IRQFD - -static struct workqueue_struct *irqfd_cleanup_wq; - -static void -irqfd_inject(struct work_struct *work) -{ - struct kvm_kernel_irqfd *irqfd = - container_of(work, struct kvm_kernel_irqfd, inject); - struct kvm *kvm = irqfd->kvm; - - if (!irqfd->resampler) { - kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 1, - false); - kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 0, - false); - } else - kvm_set_irq(kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID, - irqfd->gsi, 1, false); -} - -/* - * Since resampler irqfds share an IRQ source ID, we de-assert once - * then notify all of the resampler irqfds using this GSI. We can't - * do multiple de-asserts or we risk racing with incoming re-asserts. - */ -static void -irqfd_resampler_ack(struct kvm_irq_ack_notifier *kian) -{ - struct kvm_kernel_irqfd_resampler *resampler; - struct kvm *kvm; - struct kvm_kernel_irqfd *irqfd; - int idx; - - resampler = container_of(kian, - struct kvm_kernel_irqfd_resampler, notifier); - kvm = resampler->kvm; - - kvm_set_irq(kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID, - resampler->notifier.gsi, 0, false); - - idx = srcu_read_lock(&kvm->irq_srcu); - - list_for_each_entry_rcu(irqfd, &resampler->list, resampler_link) - eventfd_signal(irqfd->resamplefd, 1); - - srcu_read_unlock(&kvm->irq_srcu, idx); -} - -static void -irqfd_resampler_shutdown(struct kvm_kernel_irqfd *irqfd) -{ - struct kvm_kernel_irqfd_resampler *resampler = irqfd->resampler; - struct kvm *kvm = resampler->kvm; - - mutex_lock(&kvm->irqfds.resampler_lock); - - list_del_rcu(&irqfd->resampler_link); - synchronize_srcu(&kvm->irq_srcu); - - if (list_empty(&resampler->list)) { - list_del(&resampler->link); - kvm_unregister_irq_ack_notifier(kvm, &resampler->notifier); - kvm_set_irq(kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID, - resampler->notifier.gsi, 0, false); - kfree(resampler); - } - - mutex_unlock(&kvm->irqfds.resampler_lock); -} - -/* - * Race-free decouple logic (ordering is critical) - */ -static void -irqfd_shutdown(struct work_struct *work) -{ - struct kvm_kernel_irqfd *irqfd = - container_of(work, struct kvm_kernel_irqfd, shutdown); - u64 cnt; - - /* - * Synchronize with the wait-queue and unhook ourselves to prevent - * further events. - */ - eventfd_ctx_remove_wait_queue(irqfd->eventfd, &irqfd->wait, &cnt); - - /* - * We know no new events will be scheduled at this point, so block - * until all previously outstanding events have completed - */ - flush_work(&irqfd->inject); - - if (irqfd->resampler) { - irqfd_resampler_shutdown(irqfd); - eventfd_ctx_put(irqfd->resamplefd); - } - - /* - * It is now safe to release the object's resources - */ -#ifdef CONFIG_HAVE_KVM_IRQ_BYPASS - irq_bypass_unregister_consumer(&irqfd->consumer); -#endif - eventfd_ctx_put(irqfd->eventfd); - kfree(irqfd); -} - - -/* assumes kvm->irqfds.lock is held */ -static bool -irqfd_is_active(struct kvm_kernel_irqfd *irqfd) -{ - return list_empty(&irqfd->list) ? false : true; -} - -/* - * Mark the irqfd as inactive and schedule it for removal - * - * assumes kvm->irqfds.lock is held - */ -static void -irqfd_deactivate(struct kvm_kernel_irqfd *irqfd) -{ - BUG_ON(!irqfd_is_active(irqfd)); - - list_del_init(&irqfd->list); - - queue_work(irqfd_cleanup_wq, &irqfd->shutdown); -} - -int __attribute__((weak)) kvm_arch_set_irq_inatomic( - struct kvm_kernel_irq_routing_entry *irq, - struct kvm *kvm, int irq_source_id, - int level, - bool line_status) -{ - return -EWOULDBLOCK; -} - -/* - * Called with wqh->lock held and interrupts disabled - */ -static int -irqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, void *key) -{ - struct kvm_kernel_irqfd *irqfd = - container_of(wait, struct kvm_kernel_irqfd, wait); - unsigned long flags = (unsigned long)key; - struct kvm_kernel_irq_routing_entry irq; - struct kvm *kvm = irqfd->kvm; - unsigned seq; - int idx; - - if (flags & POLLIN) { - idx = srcu_read_lock(&kvm->irq_srcu); - do { - seq = read_seqcount_begin(&irqfd->irq_entry_sc); - irq = irqfd->irq_entry; - } while (read_seqcount_retry(&irqfd->irq_entry_sc, seq)); - /* An event has been signaled, inject an interrupt */ - if (kvm_arch_set_irq_inatomic(&irq, kvm, - KVM_USERSPACE_IRQ_SOURCE_ID, 1, - false) == -EWOULDBLOCK) - schedule_work(&irqfd->inject); - srcu_read_unlock(&kvm->irq_srcu, idx); - } - - if (flags & POLLHUP) { - /* The eventfd is closing, detach from KVM */ - unsigned long flags; - - spin_lock_irqsave(&kvm->irqfds.lock, flags); - - /* - * We must check if someone deactivated the irqfd before - * we could acquire the irqfds.lock since the item is - * deactivated from the KVM side before it is unhooked from - * the wait-queue. If it is already deactivated, we can - * simply return knowing the other side will cleanup for us. - * We cannot race against the irqfd going away since the - * other side is required to acquire wqh->lock, which we hold - */ - if (irqfd_is_active(irqfd)) - irqfd_deactivate(irqfd); - - spin_unlock_irqrestore(&kvm->irqfds.lock, flags); - } - - return 0; -} - -static void -irqfd_ptable_queue_proc(struct file *file, wait_queue_head_t *wqh, - poll_table *pt) -{ - struct kvm_kernel_irqfd *irqfd = - container_of(pt, struct kvm_kernel_irqfd, pt); - add_wait_queue(wqh, &irqfd->wait); -} - -/* Must be called under irqfds.lock */ -static void irqfd_update(struct kvm *kvm, struct kvm_kernel_irqfd *irqfd) -{ - struct kvm_kernel_irq_routing_entry *e; - struct kvm_kernel_irq_routing_entry entries[KVM_NR_IRQCHIPS]; - int n_entries; - - n_entries = kvm_irq_map_gsi(kvm, entries, irqfd->gsi); - - write_seqcount_begin(&irqfd->irq_entry_sc); - - e = entries; - if (n_entries == 1) - irqfd->irq_entry = *e; - else - irqfd->irq_entry.type = 0; - - write_seqcount_end(&irqfd->irq_entry_sc); -} - -#ifdef CONFIG_HAVE_KVM_IRQ_BYPASS -void __attribute__((weak)) kvm_arch_irq_bypass_stop( - struct irq_bypass_consumer *cons) -{ -} - -void __attribute__((weak)) kvm_arch_irq_bypass_start( - struct irq_bypass_consumer *cons) -{ -} - -int __attribute__((weak)) kvm_arch_update_irqfd_routing( - struct kvm *kvm, unsigned int host_irq, - uint32_t guest_irq, bool set) -{ - return 0; -} -#endif - -static int -kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args) -{ - struct kvm_kernel_irqfd *irqfd, *tmp; - struct fd f; - struct eventfd_ctx *eventfd = NULL, *resamplefd = NULL; - int ret; - unsigned int events; - int idx; - - if (!kvm_arch_intc_initialized(kvm)) - return -EAGAIN; - - irqfd = kzalloc(sizeof(*irqfd), GFP_KERNEL); - if (!irqfd) - return -ENOMEM; - - irqfd->kvm = kvm; - irqfd->gsi = args->gsi; - INIT_LIST_HEAD(&irqfd->list); - INIT_WORK(&irqfd->inject, irqfd_inject); - INIT_WORK(&irqfd->shutdown, irqfd_shutdown); - seqcount_init(&irqfd->irq_entry_sc); - - f = fdget(args->fd); - if (!f.file) { - ret = -EBADF; - goto out; - } - - eventfd = eventfd_ctx_fileget(f.file); - if (IS_ERR(eventfd)) { - ret = PTR_ERR(eventfd); - goto fail; - } - - irqfd->eventfd = eventfd; - - if (args->flags & KVM_IRQFD_FLAG_RESAMPLE) { - struct kvm_kernel_irqfd_resampler *resampler; - - resamplefd = eventfd_ctx_fdget(args->resamplefd); - if (IS_ERR(resamplefd)) { - ret = PTR_ERR(resamplefd); - goto fail; - } - - irqfd->resamplefd = resamplefd; - INIT_LIST_HEAD(&irqfd->resampler_link); - - mutex_lock(&kvm->irqfds.resampler_lock); - - list_for_each_entry(resampler, - &kvm->irqfds.resampler_list, link) { - if (resampler->notifier.gsi == irqfd->gsi) { - irqfd->resampler = resampler; - break; - } - } - - if (!irqfd->resampler) { - resampler = kzalloc(sizeof(*resampler), GFP_KERNEL); - if (!resampler) { - ret = -ENOMEM; - mutex_unlock(&kvm->irqfds.resampler_lock); - goto fail; - } - - resampler->kvm = kvm; - INIT_LIST_HEAD(&resampler->list); - resampler->notifier.gsi = irqfd->gsi; - resampler->notifier.irq_acked = irqfd_resampler_ack; - INIT_LIST_HEAD(&resampler->link); - - list_add(&resampler->link, &kvm->irqfds.resampler_list); - kvm_register_irq_ack_notifier(kvm, - &resampler->notifier); - irqfd->resampler = resampler; - } - - list_add_rcu(&irqfd->resampler_link, &irqfd->resampler->list); - synchronize_srcu(&kvm->irq_srcu); - - mutex_unlock(&kvm->irqfds.resampler_lock); - } - - /* - * Install our own custom wake-up handling so we are notified via - * a callback whenever someone signals the underlying eventfd - */ - init_waitqueue_func_entry(&irqfd->wait, irqfd_wakeup); - init_poll_funcptr(&irqfd->pt, irqfd_ptable_queue_proc); - - spin_lock_irq(&kvm->irqfds.lock); - - ret = 0; - list_for_each_entry(tmp, &kvm->irqfds.items, list) { - if (irqfd->eventfd != tmp->eventfd) - continue; - /* This fd is used for another irq already. */ - ret = -EBUSY; - spin_unlock_irq(&kvm->irqfds.lock); - goto fail; - } - - idx = srcu_read_lock(&kvm->irq_srcu); - irqfd_update(kvm, irqfd); - srcu_read_unlock(&kvm->irq_srcu, idx); - - list_add_tail(&irqfd->list, &kvm->irqfds.items); - - spin_unlock_irq(&kvm->irqfds.lock); - - /* - * Check if there was an event already pending on the eventfd - * before we registered, and trigger it as if we didn't miss it. - */ - events = f.file->f_op->poll(f.file, &irqfd->pt); - - if (events & POLLIN) - schedule_work(&irqfd->inject); - - /* - * do not drop the file until the irqfd is fully initialized, otherwise - * we might race against the POLLHUP - */ - fdput(f); -#ifdef CONFIG_HAVE_KVM_IRQ_BYPASS - if (kvm_arch_has_irq_bypass()) { - irqfd->consumer.token = (void *)irqfd->eventfd; - irqfd->consumer.add_producer = kvm_arch_irq_bypass_add_producer; - irqfd->consumer.del_producer = kvm_arch_irq_bypass_del_producer; - irqfd->consumer.stop = kvm_arch_irq_bypass_stop; - irqfd->consumer.start = kvm_arch_irq_bypass_start; - ret = irq_bypass_register_consumer(&irqfd->consumer); - if (ret) - pr_info("irq bypass consumer (token %p) registration fails: %d\n", - irqfd->consumer.token, ret); - } -#endif - - return 0; - -fail: - if (irqfd->resampler) - irqfd_resampler_shutdown(irqfd); - - if (resamplefd && !IS_ERR(resamplefd)) - eventfd_ctx_put(resamplefd); - - if (eventfd && !IS_ERR(eventfd)) - eventfd_ctx_put(eventfd); - - fdput(f); - -out: - kfree(irqfd); - return ret; -} - -bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin) -{ - struct kvm_irq_ack_notifier *kian; - int gsi, idx; - - idx = srcu_read_lock(&kvm->irq_srcu); - gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin); - if (gsi != -1) - hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list, - link) - if (kian->gsi == gsi) { - srcu_read_unlock(&kvm->irq_srcu, idx); - return true; - } - - srcu_read_unlock(&kvm->irq_srcu, idx); - - return false; -} -EXPORT_SYMBOL_GPL(kvm_irq_has_notifier); - -void kvm_notify_acked_gsi(struct kvm *kvm, int gsi) -{ - struct kvm_irq_ack_notifier *kian; - - hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list, - link) - if (kian->gsi == gsi) - kian->irq_acked(kian); -} - -void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin) -{ - int gsi, idx; - - trace_kvm_ack_irq(irqchip, pin); - - idx = srcu_read_lock(&kvm->irq_srcu); - gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin); - if (gsi != -1) - kvm_notify_acked_gsi(kvm, gsi); - srcu_read_unlock(&kvm->irq_srcu, idx); -} - -void kvm_register_irq_ack_notifier(struct kvm *kvm, - struct kvm_irq_ack_notifier *kian) -{ - mutex_lock(&kvm->irq_lock); - hlist_add_head_rcu(&kian->link, &kvm->irq_ack_notifier_list); - mutex_unlock(&kvm->irq_lock); - kvm_vcpu_request_scan_ioapic(kvm); -} - -void kvm_unregister_irq_ack_notifier(struct kvm *kvm, - struct kvm_irq_ack_notifier *kian) -{ - mutex_lock(&kvm->irq_lock); - hlist_del_init_rcu(&kian->link); - mutex_unlock(&kvm->irq_lock); - synchronize_srcu(&kvm->irq_srcu); - kvm_vcpu_request_scan_ioapic(kvm); -} -#endif - -void -kvm_eventfd_init(struct kvm *kvm) -{ -#ifdef CONFIG_HAVE_KVM_IRQFD - spin_lock_init(&kvm->irqfds.lock); - INIT_LIST_HEAD(&kvm->irqfds.items); - INIT_LIST_HEAD(&kvm->irqfds.resampler_list); - mutex_init(&kvm->irqfds.resampler_lock); -#endif - INIT_LIST_HEAD(&kvm->ioeventfds); -} - -#ifdef CONFIG_HAVE_KVM_IRQFD -/* - * shutdown any irqfd's that match fd+gsi - */ -static int -kvm_irqfd_deassign(struct kvm *kvm, struct kvm_irqfd *args) -{ - struct kvm_kernel_irqfd *irqfd, *tmp; - struct eventfd_ctx *eventfd; - - eventfd = eventfd_ctx_fdget(args->fd); - if (IS_ERR(eventfd)) - return PTR_ERR(eventfd); - - spin_lock_irq(&kvm->irqfds.lock); - - list_for_each_entry_safe(irqfd, tmp, &kvm->irqfds.items, list) { - if (irqfd->eventfd == eventfd && irqfd->gsi == args->gsi) { - /* - * This clearing of irq_entry.type is needed for when - * another thread calls kvm_irq_routing_update before - * we flush workqueue below (we synchronize with - * kvm_irq_routing_update using irqfds.lock). - */ - write_seqcount_begin(&irqfd->irq_entry_sc); - irqfd->irq_entry.type = 0; - write_seqcount_end(&irqfd->irq_entry_sc); - irqfd_deactivate(irqfd); - } - } - - spin_unlock_irq(&kvm->irqfds.lock); - eventfd_ctx_put(eventfd); - - /* - * Block until we know all outstanding shutdown jobs have completed - * so that we guarantee there will not be any more interrupts on this - * gsi once this deassign function returns. - */ - flush_workqueue(irqfd_cleanup_wq); - - return 0; -} - -int -kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args) -{ - if (args->flags & ~(KVM_IRQFD_FLAG_DEASSIGN | KVM_IRQFD_FLAG_RESAMPLE)) - return -EINVAL; - - if (args->flags & KVM_IRQFD_FLAG_DEASSIGN) - return kvm_irqfd_deassign(kvm, args); - - return kvm_irqfd_assign(kvm, args); -} - -/* - * This function is called as the kvm VM fd is being released. Shutdown all - * irqfds that still remain open - */ -void -kvm_irqfd_release(struct kvm *kvm) -{ - struct kvm_kernel_irqfd *irqfd, *tmp; - - spin_lock_irq(&kvm->irqfds.lock); - - list_for_each_entry_safe(irqfd, tmp, &kvm->irqfds.items, list) - irqfd_deactivate(irqfd); - - spin_unlock_irq(&kvm->irqfds.lock); - - /* - * Block until we know all outstanding shutdown jobs have completed - * since we do not take a kvm* reference. - */ - flush_workqueue(irqfd_cleanup_wq); - -} - -/* - * Take note of a change in irq routing. - * Caller must invoke synchronize_srcu(&kvm->irq_srcu) afterwards. - */ -void kvm_irq_routing_update(struct kvm *kvm) -{ - struct kvm_kernel_irqfd *irqfd; - - spin_lock_irq(&kvm->irqfds.lock); - - list_for_each_entry(irqfd, &kvm->irqfds.items, list) { - irqfd_update(kvm, irqfd); - -#ifdef CONFIG_HAVE_KVM_IRQ_BYPASS - if (irqfd->producer) { - int ret = kvm_arch_update_irqfd_routing( - irqfd->kvm, irqfd->producer->irq, - irqfd->gsi, 1); - WARN_ON(ret); - } -#endif - } - - spin_unlock_irq(&kvm->irqfds.lock); -} - -/* - * create a host-wide workqueue for issuing deferred shutdown requests - * aggregated from all vm* instances. We need our own isolated - * queue to ease flushing work items when a VM exits. - */ -int kvm_irqfd_init(void) -{ - irqfd_cleanup_wq = alloc_workqueue("kvm-irqfd-cleanup", 0, 0); - if (!irqfd_cleanup_wq) - return -ENOMEM; - - return 0; -} - -void kvm_irqfd_exit(void) -{ - destroy_workqueue(irqfd_cleanup_wq); -} -#endif - -/* - * -------------------------------------------------------------------- - * ioeventfd: translate a PIO/MMIO memory write to an eventfd signal. - * - * userspace can register a PIO/MMIO address with an eventfd for receiving - * notification when the memory has been touched. - * -------------------------------------------------------------------- - */ - -struct _ioeventfd { - struct list_head list; - u64 addr; - int length; - struct eventfd_ctx *eventfd; - u64 datamatch; - struct kvm_io_device dev; - u8 bus_idx; - bool wildcard; -}; - -static inline struct _ioeventfd * -to_ioeventfd(struct kvm_io_device *dev) -{ - return container_of(dev, struct _ioeventfd, dev); -} - -static void -ioeventfd_release(struct _ioeventfd *p) -{ - eventfd_ctx_put(p->eventfd); - list_del(&p->list); - kfree(p); -} - -static bool -ioeventfd_in_range(struct _ioeventfd *p, gpa_t addr, int len, const void *val) -{ - u64 _val; - - if (addr != p->addr) - /* address must be precise for a hit */ - return false; - - if (!p->length) - /* length = 0 means only look at the address, so always a hit */ - return true; - - if (len != p->length) - /* address-range must be precise for a hit */ - return false; - - if (p->wildcard) - /* all else equal, wildcard is always a hit */ - return true; - - /* otherwise, we have to actually compare the data */ - - BUG_ON(!IS_ALIGNED((unsigned long)val, len)); - - switch (len) { - case 1: - _val = *(u8 *)val; - break; - case 2: - _val = *(u16 *)val; - break; - case 4: - _val = *(u32 *)val; - break; - case 8: - _val = *(u64 *)val; - break; - default: - return false; - } - - return _val == p->datamatch ? true : false; -} - -/* MMIO/PIO writes trigger an event if the addr/val match */ -static int -ioeventfd_write(struct kvm_vcpu *vcpu, struct kvm_io_device *this, gpa_t addr, - int len, const void *val) -{ - struct _ioeventfd *p = to_ioeventfd(this); - - if (!ioeventfd_in_range(p, addr, len, val)) - return -EOPNOTSUPP; - - eventfd_signal(p->eventfd, 1); - return 0; -} - -/* - * This function is called as KVM is completely shutting down. We do not - * need to worry about locking just nuke anything we have as quickly as possible - */ -static void -ioeventfd_destructor(struct kvm_io_device *this) -{ - struct _ioeventfd *p = to_ioeventfd(this); - - ioeventfd_release(p); -} - -static const struct kvm_io_device_ops ioeventfd_ops = { - .write = ioeventfd_write, - .destructor = ioeventfd_destructor, -}; - -/* assumes kvm->slots_lock held */ -static bool -ioeventfd_check_collision(struct kvm *kvm, struct _ioeventfd *p) -{ - struct _ioeventfd *_p; - - list_for_each_entry(_p, &kvm->ioeventfds, list) - if (_p->bus_idx == p->bus_idx && - _p->addr == p->addr && - (!_p->length || !p->length || - (_p->length == p->length && - (_p->wildcard || p->wildcard || - _p->datamatch == p->datamatch)))) - return true; - - return false; -} - -static enum kvm_bus ioeventfd_bus_from_flags(__u32 flags) -{ - if (flags & KVM_IOEVENTFD_FLAG_PIO) - return KVM_PIO_BUS; - if (flags & KVM_IOEVENTFD_FLAG_VIRTIO_CCW_NOTIFY) - return KVM_VIRTIO_CCW_NOTIFY_BUS; - return KVM_MMIO_BUS; -} - -static int kvm_assign_ioeventfd_idx(struct kvm *kvm, - enum kvm_bus bus_idx, - struct kvm_ioeventfd *args) -{ - - struct eventfd_ctx *eventfd; - struct _ioeventfd *p; - int ret; - - eventfd = eventfd_ctx_fdget(args->fd); - if (IS_ERR(eventfd)) - return PTR_ERR(eventfd); - - p = kzalloc(sizeof(*p), GFP_KERNEL); - if (!p) { - ret = -ENOMEM; - goto fail; - } - - INIT_LIST_HEAD(&p->list); - p->addr = args->addr; - p->bus_idx = bus_idx; - p->length = args->len; - p->eventfd = eventfd; - - /* The datamatch feature is optional, otherwise this is a wildcard */ - if (args->flags & KVM_IOEVENTFD_FLAG_DATAMATCH) - p->datamatch = args->datamatch; - else - p->wildcard = true; - - mutex_lock(&kvm->slots_lock); - - /* Verify that there isn't a match already */ - if (ioeventfd_check_collision(kvm, p)) { - ret = -EEXIST; - goto unlock_fail; - } - - kvm_iodevice_init(&p->dev, &ioeventfd_ops); - - ret = kvm_io_bus_register_dev(kvm, bus_idx, p->addr, p->length, - &p->dev); - if (ret < 0) - goto unlock_fail; - - kvm->buses[bus_idx]->ioeventfd_count++; - list_add_tail(&p->list, &kvm->ioeventfds); - - mutex_unlock(&kvm->slots_lock); - - return 0; - -unlock_fail: - mutex_unlock(&kvm->slots_lock); - -fail: - kfree(p); - eventfd_ctx_put(eventfd); - - return ret; -} - -static int -kvm_deassign_ioeventfd_idx(struct kvm *kvm, enum kvm_bus bus_idx, - struct kvm_ioeventfd *args) -{ - struct _ioeventfd *p, *tmp; - struct eventfd_ctx *eventfd; - int ret = -ENOENT; - - eventfd = eventfd_ctx_fdget(args->fd); - if (IS_ERR(eventfd)) - return PTR_ERR(eventfd); - - mutex_lock(&kvm->slots_lock); - - list_for_each_entry_safe(p, tmp, &kvm->ioeventfds, list) { - bool wildcard = !(args->flags & KVM_IOEVENTFD_FLAG_DATAMATCH); - - if (p->bus_idx != bus_idx || - p->eventfd != eventfd || - p->addr != args->addr || - p->length != args->len || - p->wildcard != wildcard) - continue; - - if (!p->wildcard && p->datamatch != args->datamatch) - continue; - - kvm_io_bus_unregister_dev(kvm, bus_idx, &p->dev); - kvm->buses[bus_idx]->ioeventfd_count--; - ioeventfd_release(p); - ret = 0; - break; - } - - mutex_unlock(&kvm->slots_lock); - - eventfd_ctx_put(eventfd); - - return ret; -} - -static int kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) -{ - enum kvm_bus bus_idx = ioeventfd_bus_from_flags(args->flags); - int ret = kvm_deassign_ioeventfd_idx(kvm, bus_idx, args); - - if (!args->len && bus_idx == KVM_MMIO_BUS) - kvm_deassign_ioeventfd_idx(kvm, KVM_FAST_MMIO_BUS, args); - - return ret; -} - -static int -kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) -{ - enum kvm_bus bus_idx; - int ret; - - bus_idx = ioeventfd_bus_from_flags(args->flags); - /* must be natural-word sized, or 0 to ignore length */ - switch (args->len) { - case 0: - case 1: - case 2: - case 4: - case 8: - break; - default: - return -EINVAL; - } - - /* check for range overflow */ - if (args->addr + args->len < args->addr) - return -EINVAL; - - /* check for extra flags that we don't understand */ - if (args->flags & ~KVM_IOEVENTFD_VALID_FLAG_MASK) - return -EINVAL; - - /* ioeventfd with no length can't be combined with DATAMATCH */ - if (!args->len && (args->flags & KVM_IOEVENTFD_FLAG_DATAMATCH)) - return -EINVAL; - - ret = kvm_assign_ioeventfd_idx(kvm, bus_idx, args); - if (ret) - goto fail; - - /* When length is ignored, MMIO is also put on a separate bus, for - * faster lookups. - */ - if (!args->len && bus_idx == KVM_MMIO_BUS) { - ret = kvm_assign_ioeventfd_idx(kvm, KVM_FAST_MMIO_BUS, args); - if (ret < 0) - goto fast_fail; - } - - return 0; - -fast_fail: - kvm_deassign_ioeventfd_idx(kvm, bus_idx, args); -fail: - return ret; -} - -int -kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) -{ - if (args->flags & KVM_IOEVENTFD_FLAG_DEASSIGN) - return kvm_deassign_ioeventfd(kvm, args); - - return kvm_assign_ioeventfd(kvm, args); -} diff --git a/virt/kvm/irqchip.c b/virt/kvm/irqchip.c index 3bcc999..3885f42 100644..100755 --- a/virt/kvm/irqchip.c +++ b/virt/kvm/irqchip.c @@ -3,6 +3,7 @@ * Copyright (c) 2007, Intel Corporation. * Copyright 2010 Red Hat, Inc. and/or its affiliates. * Copyright (c) 2013, Alexander Graf <agraf@suse.de> + * Copyright 2019 Google LLC * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -25,11 +26,7 @@ */ #include <linux/kvm_host.h> -#include <linux/slab.h> -#include <linux/srcu.h> -#include <linux/export.h> -#include <trace/events/kvm.h> -#include "irq.h" +#include "arch\x86\kvm\irq.h" int kvm_irq_map_gsi(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *entries, int gsi) @@ -38,13 +35,15 @@ int kvm_irq_map_gsi(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e; int n = 0; - irq_rt = srcu_dereference_check(kvm->irq_routing, &kvm->irq_srcu, - lockdep_is_held(&kvm->irq_lock)); + irq_rt = kvm->irq_routing; + if (irq_rt && gsi < irq_rt->nr_rt_entries) { +#define LIST_ENTRY_TYPE_INFO struct kvm_kernel_irq_routing_entry hlist_for_each_entry(e, &irq_rt->map[gsi], link) { entries[n] = *e; ++n; } +#undef LIST_ENTRY_TYPE_INFO } return n; @@ -62,7 +61,7 @@ int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi) { struct kvm_kernel_irq_routing_entry route; - if (!irqchip_in_kernel(kvm) || (msi->flags & ~KVM_MSI_VALID_DEVID)) + if (!irqchip_in_kernel(kvm) || (msi->flags & ~GVM_MSI_VALID_DEVID)) return -EINVAL; route.msi.address_lo = msi->address_lo; @@ -71,7 +70,7 @@ int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi) route.msi.flags = msi->flags; route.msi.devid = msi->devid; - return kvm_set_msi(&route, kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 1, false); + return kvm_set_msi(&route, kvm, GVM_USERSPACE_IRQ_SOURCE_ID, 1, false); } /* @@ -83,11 +82,9 @@ int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi) int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level, bool line_status) { - struct kvm_kernel_irq_routing_entry irq_set[KVM_NR_IRQCHIPS]; + struct kvm_kernel_irq_routing_entry irq_set[GVM_NR_IRQCHIPS]; int ret = -1, i, idx; - trace_kvm_set_irq(irq, level, irq_source_id); - /* Not possible to detect if the guest uses the PIC or the * IOAPIC. So set the bit in both. The guest will ignore * writes to the unused one. @@ -120,10 +117,13 @@ static void free_irq_routing_table(struct kvm_irq_routing_table *rt) struct kvm_kernel_irq_routing_entry *e; struct hlist_node *n; +#define LIST_ENTRY_TYPE_INFO struct kvm_kernel_irq_routing_entry hlist_for_each_entry_safe(e, n, &rt->map[i], link) { + n = e->link.next; hlist_del(&e->link); kfree(e); } +#undef LIST_ENTRY_TYPE_INFO } kfree(rt); @@ -133,7 +133,7 @@ void kvm_free_irq_routing(struct kvm *kvm) { /* Called only during vm destruction. Nobody can use the pointer at this stage */ - struct kvm_irq_routing_table *rt = rcu_access_pointer(kvm->irq_routing); + struct kvm_irq_routing_table *rt = kvm->irq_routing; free_irq_routing_table(rt); } @@ -149,18 +149,20 @@ static int setup_routing_entry(struct kvm *kvm, * Do not allow GSI to be mapped to the same irqchip more than once. * Allow only one to one mapping between GSI and non-irqchip routing. */ +#define LIST_ENTRY_TYPE_INFO struct kvm_kernel_irq_routing_entry hlist_for_each_entry(ei, &rt->map[ue->gsi], link) - if (ei->type != KVM_IRQ_ROUTING_IRQCHIP || - ue->type != KVM_IRQ_ROUTING_IRQCHIP || + if (ei->type != GVM_IRQ_ROUTING_IRQCHIP || + ue->type != GVM_IRQ_ROUTING_IRQCHIP || ue->u.irqchip.irqchip == ei->irqchip.irqchip) return r; +#undef LIST_ENTRY_TYPE_INFO e->gsi = ue->gsi; e->type = ue->type; r = kvm_set_routing_entry(kvm, e, ue); if (r) goto out; - if (e->type == KVM_IRQ_ROUTING_IRQCHIP) + if (e->type == GVM_IRQ_ROUTING_IRQCHIP) rt->chip[e->irqchip.irqchip][e->irqchip.pin] = e->gsi; hlist_add_head(&e->link, &rt->map[e->gsi]); @@ -169,9 +171,10 @@ out: return r; } -void __attribute__((weak)) kvm_arch_irq_routing_update(struct kvm *kvm) +void kvm_arch_irq_routing_update_default(struct kvm *kvm) { } +#pragma comment(linker, "/alternatename:kvm_arch_irq_routing_update=kvm_arch_irq_routing_update_default") int kvm_set_irq_routing(struct kvm *kvm, const struct kvm_irq_routing_entry *ue, @@ -184,7 +187,7 @@ int kvm_set_irq_routing(struct kvm *kvm, int r; for (i = 0; i < nr; ++i) { - if (ue[i].gsi >= KVM_MAX_IRQ_ROUTES) + if (ue[i].gsi >= GVM_MAX_IRQ_ROUTES) return -EINVAL; nr_rt_entries = max(nr_rt_entries, ue[i].gsi); } @@ -198,8 +201,8 @@ int kvm_set_irq_routing(struct kvm *kvm, return -ENOMEM; new->nr_rt_entries = nr_rt_entries; - for (i = 0; i < KVM_NR_IRQCHIPS; i++) - for (j = 0; j < KVM_IRQCHIP_NUM_PINS; j++) + for (i = 0; i < GVM_NR_IRQCHIPS; i++) + for (j = 0; j < GVM_IRQCHIP_NUM_PINS; j++) new->chip[i][j] = -1; for (i = 0; i < nr; ++i) { @@ -210,8 +213,8 @@ int kvm_set_irq_routing(struct kvm *kvm, r = -EINVAL; switch (ue->type) { - case KVM_IRQ_ROUTING_MSI: - if (ue->flags & ~KVM_MSI_VALID_DEVID) + case GVM_IRQ_ROUTING_MSI: + if (ue->flags & ~GVM_MSI_VALID_DEVID) goto free_entry; break; default: @@ -227,9 +230,7 @@ int kvm_set_irq_routing(struct kvm *kvm, mutex_lock(&kvm->irq_lock); old = kvm->irq_routing; - rcu_assign_pointer(kvm->irq_routing, new); - kvm_irq_routing_update(kvm); - kvm_arch_irq_routing_update(kvm); + kvm->irq_routing = new; mutex_unlock(&kvm->irq_lock); kvm_arch_post_irq_routing_update(kvm); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 5c36034..e521da2 100644..100755 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -6,6 +6,7 @@ * * Copyright (C) 2006 Qumranet, Inc. * Copyright 2010 Red Hat, Inc. and/or its affiliates. + * Copyright 2019 Google LLC * * Authors: * Avi Kivity <avi@qumranet.com> @@ -19,68 +20,13 @@ #include <kvm/iodev.h> #include <linux/kvm_host.h> -#include <linux/kvm.h> -#include <linux/module.h> -#include <linux/errno.h> -#include <linux/percpu.h> -#include <linux/mm.h> -#include <linux/miscdevice.h> -#include <linux/vmalloc.h> -#include <linux/reboot.h> -#include <linux/debugfs.h> -#include <linux/highmem.h> -#include <linux/file.h> -#include <linux/syscore_ops.h> -#include <linux/cpu.h> -#include <linux/sched.h> -#include <linux/cpumask.h> -#include <linux/smp.h> -#include <linux/anon_inodes.h> -#include <linux/profile.h> -#include <linux/kvm_para.h> -#include <linux/pagemap.h> -#include <linux/mman.h> -#include <linux/swap.h> -#include <linux/bitops.h> -#include <linux/spinlock.h> -#include <linux/compat.h> -#include <linux/srcu.h> -#include <linux/hugetlb.h> -#include <linux/slab.h> -#include <linux/sort.h> -#include <linux/bsearch.h> - -#include <asm/processor.h> -#include <asm/io.h> -#include <asm/ioctl.h> -#include <asm/uaccess.h> -#include <asm/pgtable.h> - -#include "coalesced_mmio.h" -#include "async_pf.h" -#include "vfio.h" - -#define CREATE_TRACE_POINTS -#include <trace/events/kvm.h> +#include <uapi/linux/kvm.h> +#include <ntkrutils.h> +#include <gvm-main.h> /* Worst case buffer size needed for holding an integer. */ #define ITOA_MAX_LEN 12 -MODULE_AUTHOR("Qumranet"); -MODULE_LICENSE("GPL"); - -/* Architectures should define their poll value according to the halt latency */ -static unsigned int halt_poll_ns = KVM_HALT_POLL_NS_DEFAULT; -module_param(halt_poll_ns, uint, S_IRUGO | S_IWUSR); - -/* Default doubles per-vcpu halt_poll_ns. */ -static unsigned int halt_poll_ns_grow = 2; -module_param(halt_poll_ns_grow, uint, S_IRUGO | S_IWUSR); - -/* Default resets per-vcpu halt_poll_ns . */ -static unsigned int halt_poll_ns_shrink; -module_param(halt_poll_ns_shrink, uint, S_IRUGO | S_IWUSR); - /* * Ordering of locks: * @@ -90,47 +36,129 @@ module_param(halt_poll_ns_shrink, uint, S_IRUGO | S_IWUSR); DEFINE_SPINLOCK(kvm_lock); static DEFINE_RAW_SPINLOCK(kvm_count_lock); LIST_HEAD(vm_list); +static LONG64 global_vm_id = -1; static cpumask_var_t cpus_hardware_enabled; static int kvm_usage_count; static atomic_t hardware_enable_failed; struct kmem_cache *kvm_vcpu_cache; -EXPORT_SYMBOL_GPL(kvm_vcpu_cache); - -static __read_mostly struct preempt_ops kvm_preempt_ops; - -struct dentry *kvm_debugfs_dir; -EXPORT_SYMBOL_GPL(kvm_debugfs_dir); -static int kvm_debugfs_num_entries; -static const struct file_operations *stat_fops_per_vm[]; - -static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl, - unsigned long arg); -#ifdef CONFIG_KVM_COMPAT -static long kvm_vcpu_compat_ioctl(struct file *file, unsigned int ioctl, - unsigned long arg); -#endif static int hardware_enable_all(void); static void hardware_disable_all(void); static void kvm_io_bus_destroy(struct kvm_io_bus *bus); -static void kvm_release_pfn_dirty(kvm_pfn_t pfn); +void kvm_release_pfn_dirty(kvm_pfn_t pfn); static void mark_page_dirty_in_slot(struct kvm_memory_slot *memslot, gfn_t gfn); -__visible bool kvm_rebooting; -EXPORT_SYMBOL_GPL(kvm_rebooting); - -static bool largepages_enabled = true; +/* +* bsearch - binary search an array of elements +* @key: pointer to item being searched for +* @base: pointer to first element to search +* @num: number of elements +* @size: size of each element +* @cmp: pointer to comparison function +* +* This function does a binary search on the given array. The +* contents of the array should already be in ascending sorted order +* under the provided comparison function. +* +* Note that the key need not have the same type as the elements in +* the array, e.g. key could be a string and the comparison function +* could compare the string with the struct's name field. However, if +* the key and elements in the array are of the same type, you can use +* the same comparison function for both sort() and bsearch(). +*/ +void *bsearch(const void *key, const void *base, size_t num, size_t size, + int(*cmp)(const void *key, const void *elt)) +{ + size_t start = 0, end = num; + int result; + const char *__base = base; + + while (start < end) { + size_t mid = start + (end - start) / 2; + + result = cmp(key, __base + mid * size); + if (result < 0) + end = mid; + else if (result > 0) + start = mid + 1; + else + return (void *)(__base + mid * size); + } + + return NULL; +} + +static void generic_swap(void *a, void *b, int size) +{ + char t; + char *__a = a, *__b = b; + + do { + t = *__a; + *__a++ = *__b; + *__b++ = t; + } while (--size > 0); +} -bool kvm_is_reserved_pfn(kvm_pfn_t pfn) -{ - if (pfn_valid(pfn)) - return PageReserved(pfn_to_page(pfn)); +/** +* sort - sort an array of elements +* @base: pointer to data to sort +* @num: number of elements +* @size: size of each element +* @cmp_func: pointer to comparison function +* @swap_func: pointer to swap function or NULL +* +* This function does a heapsort on the given array. You may provide a +* swap_func function optimized to your element type. +* +* Sorting time is O(n log n) both on average and worst-case. While +* qsort is about 20% faster on average, it suffers from exploitable +* O(n*n) worst-case behavior and extra memory requirements that make +* it less suitable for kernel use. +*/ + +static void sort(void *base, size_t num, size_t size, + int(*cmp_func)(const void *, const void *), + void(*swap_func)(void *, void *, int size)) +{ + /* pre-scale counters for performance */ + int i = (num / 2 - 1) * size, n = num * size, c, r; + char *__base = base; + + if (!swap_func) { + swap_func = generic_swap; +} + + /* heapify */ + for (; i >= 0; i -= size) { + for (r = i; r * 2 + size < n; r = c) { + c = r * 2 + size; + if (c < n - size && + cmp_func(__base + c, __base + c + size) < 0) + c += size; + if (cmp_func(__base + r, __base + c) >= 0) + break; + swap_func(__base + r, __base + c, size); + } + } - return true; + /* sort */ + for (i = n - size; i > 0; i -= size) { + swap_func(__base, __base + i, size); + for (r = 0; r * 2 + size < i; r = c) { + c = r * 2 + size; + if (c < i - size && + cmp_func(__base + c, __base + c + size) < 0) + c += size; + if (cmp_func(__base + r, __base + c) >= 0) + break; + swap_func(__base + r, __base + c, size); + } + } } /* @@ -140,27 +168,20 @@ int vcpu_load(struct kvm_vcpu *vcpu) { int cpu; - if (mutex_lock_killable(&vcpu->mutex)) - return -EINTR; + mutex_lock(&vcpu->mutex); cpu = get_cpu(); - preempt_notifier_register(&vcpu->preempt_notifier); kvm_arch_vcpu_load(vcpu, cpu); - put_cpu(); - return 0; + return cpu; } -EXPORT_SYMBOL_GPL(vcpu_load); void vcpu_put(struct kvm_vcpu *vcpu) { - preempt_disable(); kvm_arch_vcpu_put(vcpu); - preempt_notifier_unregister(&vcpu->preempt_notifier); - preempt_enable(); + put_cpu(); mutex_unlock(&vcpu->mutex); } -EXPORT_SYMBOL_GPL(vcpu_put); -static void ack_flush(void *_completed) +void ack_flush(void *_completed) { } @@ -173,7 +194,7 @@ bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req) zalloc_cpumask_var(&cpus, GFP_ATOMIC); - me = get_cpu(); + me = smp_processor_id(); kvm_for_each_vcpu(i, vcpu, kvm) { kvm_make_request(req, vcpu); cpu = vcpu->cpu; @@ -191,19 +212,18 @@ bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req) smp_call_function_many(cpus, ack_flush, NULL, 1); else called = false; - put_cpu(); free_cpumask_var(cpus); return called; } -#ifndef CONFIG_HAVE_KVM_ARCH_TLB_FLUSH_ALL void kvm_flush_remote_tlbs(struct kvm *kvm) { /* - * Read tlbs_dirty before setting KVM_REQ_TLB_FLUSH in + * Read tlbs_dirty before setting GVM_REQ_TLB_FLUSH in * kvm_make_all_cpus_request. */ - long dirty_count = smp_load_acquire(&kvm->tlbs_dirty); + long dirty_count; + READ_ONCE(kvm->tlbs_dirty, dirty_count); /* * We want to publish modifications to the page tables before reading @@ -216,66 +236,65 @@ void kvm_flush_remote_tlbs(struct kvm *kvm) * kvm_make_all_cpus_request() reads vcpu->mode. We reuse that * barrier here. */ - if (kvm_make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH)) + if (kvm_make_all_cpus_request(kvm, GVM_REQ_TLB_FLUSH)) ++kvm->stat.remote_tlb_flush; cmpxchg(&kvm->tlbs_dirty, dirty_count, 0); } -EXPORT_SYMBOL_GPL(kvm_flush_remote_tlbs); -#endif void kvm_reload_remote_mmus(struct kvm *kvm) { - kvm_make_all_cpus_request(kvm, KVM_REQ_MMU_RELOAD); + kvm_make_all_cpus_request(kvm, GVM_REQ_MMU_RELOAD); } int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id) { - struct page *page; int r; mutex_init(&vcpu->mutex); vcpu->cpu = -1; vcpu->kvm = kvm; vcpu->vcpu_id = id; - vcpu->pid = NULL; - init_swait_queue_head(&vcpu->wq); - kvm_async_pf_vcpu_init(vcpu); + vcpu->thread = NULL; vcpu->pre_pcpu = -1; INIT_LIST_HEAD(&vcpu->blocked_vcpu_list); - page = alloc_page(GFP_KERNEL | __GFP_ZERO); - if (!page) { + /* + * KVM(Lin) allocates two seperate pages for vcpu->run and MMIO Emulation page + * vcpu->arch.piodata. These two pages will be mapped to userland as continuous + * virtual address space. Linux API allows to do that but I did not find a + * Windows equivalent API. So keep the physical pages also continuous. + */ + vcpu->run = ExAllocatePoolWithTag(NonPagedPool, 2 * PAGE_SIZE, GVM_POOL_TAG); + if (!vcpu->run) { r = -ENOMEM; goto fail; } - vcpu->run = page_address(page); - kvm_vcpu_set_in_spin_loop(vcpu, false); - kvm_vcpu_set_dy_eligible(vcpu, false); vcpu->preempted = false; + KeInitializeEvent(&vcpu->kick_event, SynchronizationEvent, FALSE); + r = kvm_arch_vcpu_init(vcpu); if (r < 0) goto fail_free_run; return 0; fail_free_run: - free_page((unsigned long)vcpu->run); + ExFreePoolWithTag(vcpu->run, GVM_POOL_TAG); fail: return r; } -EXPORT_SYMBOL_GPL(kvm_vcpu_init); void kvm_vcpu_uninit(struct kvm_vcpu *vcpu) { - put_pid(vcpu->pid); kvm_arch_vcpu_uninit(vcpu); - free_page((unsigned long)vcpu->run); + if (vcpu->run_userva) + __vm_munmap(vcpu->run_userva, 2 * PAGE_SIZE, false); + ExFreePoolWithTag(vcpu->run, GVM_POOL_TAG); } -EXPORT_SYMBOL_GPL(kvm_vcpu_uninit); -#if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER) +#if defined(CONFIG_MMU_NOTIFIER) && defined(GVM_ARCH_WANT_MMU_NOTIFIER) static inline struct kvm *mmu_notifier_to_kvm(struct mmu_notifier *mn) { return container_of(mn, struct kvm, mmu_notifier); @@ -283,7 +302,7 @@ static inline struct kvm *mmu_notifier_to_kvm(struct mmu_notifier *mn) static void kvm_mmu_notifier_invalidate_page(struct mmu_notifier *mn, struct mm_struct *mm, - unsigned long address) + size_t address) { struct kvm *kvm = mmu_notifier_to_kvm(mn); int need_tlb_flush, idx; @@ -324,7 +343,7 @@ static void kvm_mmu_notifier_invalidate_page(struct mmu_notifier *mn, static void kvm_mmu_notifier_change_pte(struct mmu_notifier *mn, struct mm_struct *mm, - unsigned long address, + size_t address, pte_t pte) { struct kvm *kvm = mmu_notifier_to_kvm(mn); @@ -340,8 +359,8 @@ static void kvm_mmu_notifier_change_pte(struct mmu_notifier *mn, static void kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn, struct mm_struct *mm, - unsigned long start, - unsigned long end) + size_t start, + size_t end) { struct kvm *kvm = mmu_notifier_to_kvm(mn); int need_tlb_flush = 0, idx; @@ -366,8 +385,8 @@ static void kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn, static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn, struct mm_struct *mm, - unsigned long start, - unsigned long end) + size_t start, + size_t end) { struct kvm *kvm = mmu_notifier_to_kvm(mn); @@ -392,8 +411,8 @@ static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn, static int kvm_mmu_notifier_clear_flush_young(struct mmu_notifier *mn, struct mm_struct *mm, - unsigned long start, - unsigned long end) + size_t start, + size_t end) { struct kvm *kvm = mmu_notifier_to_kvm(mn); int young, idx; @@ -413,8 +432,8 @@ static int kvm_mmu_notifier_clear_flush_young(struct mmu_notifier *mn, static int kvm_mmu_notifier_clear_young(struct mmu_notifier *mn, struct mm_struct *mm, - unsigned long start, - unsigned long end) + size_t start, + size_t end) { struct kvm *kvm = mmu_notifier_to_kvm(mn); int young, idx; @@ -443,7 +462,7 @@ static int kvm_mmu_notifier_clear_young(struct mmu_notifier *mn, static int kvm_mmu_notifier_test_young(struct mmu_notifier *mn, struct mm_struct *mm, - unsigned long address) + size_t address) { struct kvm *kvm = mmu_notifier_to_kvm(mn); int young, idx; @@ -485,16 +504,16 @@ static int kvm_init_mmu_notifier(struct kvm *kvm) return mmu_notifier_register(&kvm->mmu_notifier, current->mm); } -#else /* !(CONFIG_MMU_NOTIFIER && KVM_ARCH_WANT_MMU_NOTIFIER) */ +#else /* !(CONFIG_MMU_NOTIFIER && GVM_ARCH_WANT_MMU_NOTIFIER) */ static int kvm_init_mmu_notifier(struct kvm *kvm) { return 0; } -#endif /* CONFIG_MMU_NOTIFIER && KVM_ARCH_WANT_MMU_NOTIFIER */ +#endif /* CONFIG_MMU_NOTIFIER && GVM_ARCH_WANT_MMU_NOTIFIER */ -static struct kvm_memslots *kvm_alloc_memslots(void) +static struct kvm_memslots *kvm_alloc_memslots(struct kvm *kvm) { int i; struct kvm_memslots *slots; @@ -508,8 +527,10 @@ static struct kvm_memslots *kvm_alloc_memslots(void) * code of handling generation number wrap-around. */ slots->generation = -150; - for (i = 0; i < KVM_MEM_SLOTS_NUM; i++) + for (i = 0; i < GVM_MEM_SLOTS_NUM; i++) { slots->id_to_index[i] = slots->memslots[i].id = i; + slots->memslots[i].kvm = kvm; + } return slots; } @@ -529,9 +550,27 @@ static void kvm_destroy_dirty_bitmap(struct kvm_memory_slot *memslot) static void kvm_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, struct kvm_memory_slot *dont) { + struct pmem_lock *pl; + int i; + if (!dont || free->dirty_bitmap != dont->dirty_bitmap) kvm_destroy_dirty_bitmap(free); + if (!dont || free->pmem_lock != dont->pmem_lock) + if (free->pmem_lock) { + for (i = 0; i < free->npages; i++) { + pl = &free->pmem_lock[i]; + if (!pl->lock_mdl) + continue; + spin_lock(&pl->lock); + MmUnlockPages(pl->lock_mdl); + IoFreeMdl(pl->lock_mdl); + pl->lock_mdl = NULL; + spin_unlock(&pl->lock); + } + kfree(free->pmem_lock); + } + kvm_arch_free_memslot(kvm, free, dont); free->npages = 0; @@ -550,61 +589,7 @@ static void kvm_free_memslots(struct kvm *kvm, struct kvm_memslots *slots) kvfree(slots); } -static void kvm_destroy_vm_debugfs(struct kvm *kvm) -{ - int i; - - if (!kvm->debugfs_dentry) - return; - - debugfs_remove_recursive(kvm->debugfs_dentry); - - if (kvm->debugfs_stat_data) { - for (i = 0; i < kvm_debugfs_num_entries; i++) - kfree(kvm->debugfs_stat_data[i]); - kfree(kvm->debugfs_stat_data); - } -} - -static int kvm_create_vm_debugfs(struct kvm *kvm, int fd) -{ - char dir_name[ITOA_MAX_LEN * 2]; - struct kvm_stat_data *stat_data; - struct kvm_stats_debugfs_item *p; - - if (!debugfs_initialized()) - return 0; - - snprintf(dir_name, sizeof(dir_name), "%d-%d", task_pid_nr(current), fd); - kvm->debugfs_dentry = debugfs_create_dir(dir_name, - kvm_debugfs_dir); - if (!kvm->debugfs_dentry) - return -ENOMEM; - - kvm->debugfs_stat_data = kcalloc(kvm_debugfs_num_entries, - sizeof(*kvm->debugfs_stat_data), - GFP_KERNEL); - if (!kvm->debugfs_stat_data) - return -ENOMEM; - - for (p = debugfs_entries; p->name; p++) { - stat_data = kzalloc(sizeof(*stat_data), GFP_KERNEL); - if (!stat_data) - return -ENOMEM; - - stat_data->kvm = kvm; - stat_data->offset = p->offset; - kvm->debugfs_stat_data[p - debugfs_entries] = stat_data; - if (!debugfs_create_file(p->name, 0444, - kvm->debugfs_dentry, - stat_data, - stat_fops_per_vm[p->kind])) - return -ENOMEM; - } - return 0; -} - -static struct kvm *kvm_create_vm(unsigned long type) +static struct kvm *kvm_create_vm(size_t type) { int r, i; struct kvm *kvm = kvm_arch_alloc_vm(); @@ -613,14 +598,12 @@ static struct kvm *kvm_create_vm(unsigned long type) return ERR_PTR(-ENOMEM); spin_lock_init(&kvm->mmu_lock); - atomic_inc(¤t->mm->mm_count); - kvm->mm = current->mm; - kvm_eventfd_init(kvm); + kvm->process = IoGetCurrentProcess(); + kvm->vm_id = InterlockedIncrement64(&global_vm_id); mutex_init(&kvm->lock); mutex_init(&kvm->irq_lock); mutex_init(&kvm->slots_lock); atomic_set(&kvm->users_count, 1); - INIT_LIST_HEAD(&kvm->devices); r = kvm_arch_init_vm(kvm, type); if (r) @@ -630,15 +613,9 @@ static struct kvm *kvm_create_vm(unsigned long type) if (r) goto out_err_no_disable; -#ifdef CONFIG_HAVE_KVM_IRQFD - INIT_HLIST_HEAD(&kvm->irq_ack_notifier_list); -#endif - - BUILD_BUG_ON(KVM_MEM_SLOTS_NUM > SHRT_MAX); - r = -ENOMEM; - for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) { - kvm->memslots[i] = kvm_alloc_memslots(); + for (i = 0; i < GVM_ADDRESS_SPACE_NUM; i++) { + kvm->memslots[i] = kvm_alloc_memslots(kvm); if (!kvm->memslots[i]) goto out_err_no_srcu; } @@ -647,7 +624,7 @@ static struct kvm *kvm_create_vm(unsigned long type) goto out_err_no_srcu; if (init_srcu_struct(&kvm->irq_srcu)) goto out_err_no_irq_srcu; - for (i = 0; i < KVM_NR_BUSES; i++) { + for (i = 0; i < GVM_NR_BUSES; i++) { kvm->buses[i] = kzalloc(sizeof(struct kvm_io_bus), GFP_KERNEL); if (!kvm->buses[i]) @@ -662,8 +639,6 @@ static struct kvm *kvm_create_vm(unsigned long type) list_add(&kvm->vm_list, &vm_list); spin_unlock(&kvm_lock); - preempt_notifier_inc(); - return kvm; out_err: @@ -673,12 +648,11 @@ out_err_no_irq_srcu: out_err_no_srcu: hardware_disable_all(); out_err_no_disable: - for (i = 0; i < KVM_NR_BUSES; i++) + for (i = 0; i < GVM_NR_BUSES; i++) kfree(kvm->buses[i]); - for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) + for (i = 0; i < GVM_ADDRESS_SPACE_NUM; i++) kvm_free_memslots(kvm, kvm->memslots[i]); kvm_arch_free_vm(kvm); - mmdrop(current->mm); return ERR_PTR(r); } @@ -686,7 +660,7 @@ out_err_no_disable: * Avoid using vmalloc for a small buffer. * Should not be used when the size is statically known. */ -void *kvm_kvzalloc(unsigned long size) +void *kvm_kvzalloc(size_t size) { if (size > PAGE_SIZE) return vzalloc(size); @@ -694,71 +668,43 @@ void *kvm_kvzalloc(unsigned long size) return kzalloc(size, GFP_KERNEL); } -static void kvm_destroy_devices(struct kvm *kvm) -{ - struct kvm_device *dev, *tmp; - - /* - * We do not need to take the kvm->lock here, because nobody else - * has a reference to the struct kvm at this point and therefore - * cannot access the devices list anyhow. - */ - list_for_each_entry_safe(dev, tmp, &kvm->devices, vm_node) { - list_del(&dev->vm_node); - dev->ops->destroy(dev); - } -} - static void kvm_destroy_vm(struct kvm *kvm) { int i; - struct mm_struct *mm = kvm->mm; - kvm_destroy_vm_debugfs(kvm); - kvm_arch_sync_events(kvm); spin_lock(&kvm_lock); list_del(&kvm->vm_list); spin_unlock(&kvm_lock); kvm_free_irq_routing(kvm); - for (i = 0; i < KVM_NR_BUSES; i++) + for (i = 0; i < GVM_NR_BUSES; i++) kvm_io_bus_destroy(kvm->buses[i]); - kvm_coalesced_mmio_free(kvm); -#if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER) - mmu_notifier_unregister(&kvm->mmu_notifier, kvm->mm); -#else kvm_arch_flush_shadow_all(kvm); -#endif kvm_arch_destroy_vm(kvm); - kvm_destroy_devices(kvm); - for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) + for (i = 0; i < GVM_ADDRESS_SPACE_NUM; i++) kvm_free_memslots(kvm, kvm->memslots[i]); + kfree(kvm->rp_bitmap); cleanup_srcu_struct(&kvm->irq_srcu); cleanup_srcu_struct(&kvm->srcu); kvm_arch_free_vm(kvm); - preempt_notifier_dec(); hardware_disable_all(); - mmdrop(mm); } void kvm_get_kvm(struct kvm *kvm) { atomic_inc(&kvm->users_count); } -EXPORT_SYMBOL_GPL(kvm_get_kvm); void kvm_put_kvm(struct kvm *kvm) { if (atomic_dec_and_test(&kvm->users_count)) kvm_destroy_vm(kvm); } -EXPORT_SYMBOL_GPL(kvm_put_kvm); -static int kvm_vm_release(struct inode *inode, struct file *filp) +NTSTATUS kvm_vm_release(PDEVICE_OBJECT pDevObj, PIRP pIrp) { - struct kvm *kvm = filp->private_data; - - kvm_irqfd_release(kvm); + struct gvm_device_extension *devext = pDevObj->DeviceExtension; + struct kvm *kvm = devext->PrivData; kvm_put_kvm(kvm); return 0; @@ -770,7 +716,7 @@ static int kvm_vm_release(struct inode *inode, struct file *filp) */ static int kvm_create_dirty_bitmap(struct kvm_memory_slot *memslot) { - unsigned long dirty_bytes = 2 * kvm_dirty_bitmap_bytes(memslot); + size_t dirty_bytes = 2 * kvm_dirty_bitmap_bytes(memslot); memslot->dirty_bitmap = kvm_kvzalloc(dirty_bytes); if (!memslot->dirty_bitmap) @@ -802,7 +748,7 @@ static void update_memslots(struct kvm_memslots *slots, slots->used_slots++; } - while (i < KVM_MEM_SLOTS_NUM - 1 && + while (i < GVM_MEM_SLOTS_NUM - 1 && new->base_gfn <= mslots[i + 1].base_gfn) { if (!mslots[i + 1].npages) break; @@ -836,10 +782,10 @@ static void update_memslots(struct kvm_memslots *slots, static int check_memory_region_flags(const struct kvm_userspace_memory_region *mem) { - u32 valid_flags = KVM_MEM_LOG_DIRTY_PAGES; + u32 valid_flags = GVM_MEM_LOG_DIRTY_PAGES; -#ifdef __KVM_HAVE_READONLY_MEM - valid_flags |= KVM_MEM_READONLY; +#ifdef __GVM_HAVE_READONLY_MEM + valid_flags |= GVM_MEM_READONLY; #endif if (mem->flags & ~valid_flags) @@ -888,7 +834,7 @@ int __kvm_set_memory_region(struct kvm *kvm, { int r; gfn_t base_gfn; - unsigned long npages; + size_t npages; struct kvm_memory_slot *slot; struct kvm_memory_slot old, new; struct kvm_memslots *slots = NULL, *old_memslots; @@ -908,14 +854,7 @@ int __kvm_set_memory_region(struct kvm *kvm, goto out; if (mem->guest_phys_addr & (PAGE_SIZE - 1)) goto out; - /* We can read the guest memory with __xxx_user() later on. */ - if ((id < KVM_USER_MEM_SLOTS) && - ((mem->userspace_addr & (PAGE_SIZE - 1)) || - !access_ok(VERIFY_WRITE, - (void __user *)(unsigned long)mem->userspace_addr, - mem->memory_size))) - goto out; - if (as_id >= KVM_ADDRESS_SPACE_NUM || id >= KVM_MEM_SLOTS_NUM) + if (as_id >= GVM_ADDRESS_SPACE_NUM || id >= GVM_MEM_SLOTS_NUM) goto out; if (mem->guest_phys_addr + mem->memory_size < mem->guest_phys_addr) goto out; @@ -924,7 +863,7 @@ int __kvm_set_memory_region(struct kvm *kvm, base_gfn = mem->guest_phys_addr >> PAGE_SHIFT; npages = mem->memory_size >> PAGE_SHIFT; - if (npages > KVM_MEM_MAX_NR_PAGES) + if (npages > GVM_MEM_MAX_NR_PAGES) goto out; new = old = *slot; @@ -936,17 +875,17 @@ int __kvm_set_memory_region(struct kvm *kvm, if (npages) { if (!old.npages) - change = KVM_MR_CREATE; + change = GVM_MR_CREATE; else { /* Modify an existing slot. */ if ((mem->userspace_addr != old.userspace_addr) || (npages != old.npages) || - ((new.flags ^ old.flags) & KVM_MEM_READONLY)) + ((new.flags ^ old.flags) & GVM_MEM_READONLY)) goto out; if (base_gfn != old.base_gfn) - change = KVM_MR_MOVE; + change = GVM_MR_MOVE; else if (new.flags != old.flags) - change = KVM_MR_FLAGS_ONLY; + change = GVM_MR_FLAGS_ONLY; else { /* Nothing to change. */ r = 0; goto out; @@ -956,16 +895,16 @@ int __kvm_set_memory_region(struct kvm *kvm, if (!old.npages) goto out; - change = KVM_MR_DELETE; + change = GVM_MR_DELETE; new.base_gfn = 0; new.flags = 0; } - if ((change == KVM_MR_CREATE) || (change == KVM_MR_MOVE)) { + if ((change == GVM_MR_CREATE) || (change == GVM_MR_MOVE)) { /* Check for overlaps */ r = -EEXIST; kvm_for_each_memslot(slot, __kvm_memslots(kvm, as_id)) { - if ((slot->id >= KVM_USER_MEM_SLOTS) || + if ((slot->id >= GVM_USER_MEM_SLOTS) || (slot->id == id)) continue; if (!((base_gfn + npages <= slot->base_gfn) || @@ -975,36 +914,43 @@ int __kvm_set_memory_region(struct kvm *kvm, } /* Free page dirty bitmap if unneeded */ - if (!(new.flags & KVM_MEM_LOG_DIRTY_PAGES)) + if (!(new.flags & GVM_MEM_LOG_DIRTY_PAGES)) new.dirty_bitmap = NULL; r = -ENOMEM; - if (change == KVM_MR_CREATE) { + if (change == GVM_MR_CREATE) { new.userspace_addr = mem->userspace_addr; if (kvm_arch_create_memslot(kvm, &new, npages)) goto out_free; + } /* Allocate page dirty bitmap if needed */ - if ((new.flags & KVM_MEM_LOG_DIRTY_PAGES) && !new.dirty_bitmap) { + if ((new.flags & GVM_MEM_LOG_DIRTY_PAGES) && !new.dirty_bitmap) { if (kvm_create_dirty_bitmap(&new) < 0) goto out_free; } + /* Allocate physical page pinning data structure */ + if (!new.pmem_lock) { + new.pmem_lock = + kzalloc(sizeof(struct pmem_lock) * new.npages, GFP_KERNEL); + if (!new.pmem_lock) + goto out_free; + } + slots = kvm_kvzalloc(sizeof(struct kvm_memslots)); if (!slots) goto out_free; memcpy(slots, __kvm_memslots(kvm, as_id), sizeof(struct kvm_memslots)); - if ((change == KVM_MR_DELETE) || (change == KVM_MR_MOVE)) { + if ((change == GVM_MR_DELETE) || (change == GVM_MR_MOVE)) { slot = id_to_memslot(slots, id); - slot->flags |= KVM_MEMSLOT_INVALID; + slot->flags |= GVM_MEMSLOT_INVALID; old_memslots = install_new_memslots(kvm, as_id, slots); - /* slot was deleted or moved, clear iommu mapping */ - kvm_iommu_unmap_pages(kvm, &old); /* From this point no new shadow pages pointing to a deleted, * or moved, memslot will be created. * @@ -1027,8 +973,9 @@ int __kvm_set_memory_region(struct kvm *kvm, goto out_slots; /* actual memory is freed via old in kvm_free_memslot below */ - if (change == KVM_MR_DELETE) { + if (change == GVM_MR_DELETE) { new.dirty_bitmap = NULL; + new.pmem_lock = NULL; memset(&new.arch, 0, sizeof(new.arch)); } @@ -1040,20 +987,6 @@ int __kvm_set_memory_region(struct kvm *kvm, kvm_free_memslot(kvm, &old, &new); kvfree(old_memslots); - /* - * IOMMU mapping: New slots need to be mapped. Old slots need to be - * un-mapped and re-mapped if their base changes. Since base change - * unmapping is handled above with slot deletion, mapping alone is - * needed here. Anything else the iommu might care about for existing - * slots (size changes, userspace addr changes and read-only flag - * changes) is disallowed above, so any other attribute changes getting - * here can be skipped. - */ - if ((change == KVM_MR_CREATE) || (change == KVM_MR_MOVE)) { - r = kvm_iommu_map_pages(kvm, &new); - return r; - } - return 0; out_slots: @@ -1063,7 +996,6 @@ out_free: out: return r; } -EXPORT_SYMBOL_GPL(__kvm_set_memory_region); int kvm_set_memory_region(struct kvm *kvm, const struct kvm_userspace_memory_region *mem) @@ -1075,12 +1007,11 @@ int kvm_set_memory_region(struct kvm *kvm, mutex_unlock(&kvm->slots_lock); return r; } -EXPORT_SYMBOL_GPL(kvm_set_memory_region); static int kvm_vm_ioctl_set_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem) { - if ((u16)mem->slot >= KVM_USER_MEM_SLOTS) + if ((u16)mem->slot >= GVM_USER_MEM_SLOTS) return -EINVAL; return kvm_set_memory_region(kvm, mem); @@ -1092,13 +1023,13 @@ int kvm_get_dirty_log(struct kvm *kvm, struct kvm_memslots *slots; struct kvm_memory_slot *memslot; int r, i, as_id, id; - unsigned long n; - unsigned long any = 0; + size_t n; + size_t any = 0; r = -EINVAL; as_id = log->slot >> 16; id = (u16)log->slot; - if (as_id >= KVM_ADDRESS_SPACE_NUM || id >= KVM_USER_MEM_SLOTS) + if (as_id >= GVM_ADDRESS_SPACE_NUM || id >= GVM_USER_MEM_SLOTS) goto out; slots = __kvm_memslots(kvm, as_id); @@ -1113,7 +1044,7 @@ int kvm_get_dirty_log(struct kvm *kvm, any = memslot->dirty_bitmap[i]; r = -EFAULT; - if (copy_to_user(log->dirty_bitmap, memslot->dirty_bitmap, n)) + if ( __copy_to_user(log->dirty_bitmap, memslot->dirty_bitmap, n)) goto out; if (any) @@ -1123,9 +1054,7 @@ int kvm_get_dirty_log(struct kvm *kvm, out: return r; } -EXPORT_SYMBOL_GPL(kvm_get_dirty_log); -#ifdef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT /** * kvm_get_dirty_log_protect - get a snapshot of dirty pages, and if any pages * are dirty write protect them for next write. @@ -1154,14 +1083,14 @@ int kvm_get_dirty_log_protect(struct kvm *kvm, struct kvm_memslots *slots; struct kvm_memory_slot *memslot; int r, i, as_id, id; - unsigned long n; - unsigned long *dirty_bitmap; - unsigned long *dirty_bitmap_buffer; + size_t n; + size_t *dirty_bitmap; + size_t *dirty_bitmap_buffer; r = -EINVAL; as_id = log->slot >> 16; id = (u16)log->slot; - if (as_id >= KVM_ADDRESS_SPACE_NUM || id >= KVM_USER_MEM_SLOTS) + if (as_id >= GVM_ADDRESS_SPACE_NUM || id >= GVM_USER_MEM_SLOTS) goto out; slots = __kvm_memslots(kvm, as_id); @@ -1174,13 +1103,13 @@ int kvm_get_dirty_log_protect(struct kvm *kvm, n = kvm_dirty_bitmap_bytes(memslot); - dirty_bitmap_buffer = dirty_bitmap + n / sizeof(long); + dirty_bitmap_buffer = dirty_bitmap + n / sizeof(size_t); memset(dirty_bitmap_buffer, 0, n); spin_lock(&kvm->mmu_lock); *is_dirty = false; - for (i = 0; i < n / sizeof(long); i++) { - unsigned long mask; + for (i = 0; i < n / sizeof(size_t); i++) { + size_t mask; gfn_t offset; if (!dirty_bitmap[i]) @@ -1201,32 +1130,18 @@ int kvm_get_dirty_log_protect(struct kvm *kvm, spin_unlock(&kvm->mmu_lock); r = -EFAULT; - if (copy_to_user(log->dirty_bitmap, dirty_bitmap_buffer, n)) + if ( __copy_to_user(log->dirty_bitmap, dirty_bitmap_buffer, n)) goto out; r = 0; out: return r; } -EXPORT_SYMBOL_GPL(kvm_get_dirty_log_protect); -#endif - -bool kvm_largepages_enabled(void) -{ - return largepages_enabled; -} - -void kvm_disable_largepages(void) -{ - largepages_enabled = false; -} -EXPORT_SYMBOL_GPL(kvm_disable_largepages); struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn) { return __gfn_to_memslot(kvm_memslots(kvm), gfn); } -EXPORT_SYMBOL_GPL(gfn_to_memslot); struct kvm_memory_slot *kvm_vcpu_gfn_to_memslot(struct kvm_vcpu *vcpu, gfn_t gfn) { @@ -1237,51 +1152,31 @@ bool kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn) { struct kvm_memory_slot *memslot = gfn_to_memslot(kvm, gfn); - if (!memslot || memslot->id >= KVM_USER_MEM_SLOTS || - memslot->flags & KVM_MEMSLOT_INVALID) + if (!memslot || memslot->id >= GVM_USER_MEM_SLOTS || + memslot->flags & GVM_MEMSLOT_INVALID) return false; return true; } -EXPORT_SYMBOL_GPL(kvm_is_visible_gfn); -unsigned long kvm_host_page_size(struct kvm *kvm, gfn_t gfn) +size_t kvm_host_page_size(struct kvm *kvm, gfn_t gfn) { - struct vm_area_struct *vma; - unsigned long addr, size; - - size = PAGE_SIZE; - - addr = gfn_to_hva(kvm, gfn); - if (kvm_is_error_hva(addr)) - return PAGE_SIZE; - - down_read(¤t->mm->mmap_sem); - vma = find_vma(current->mm, addr); - if (!vma) - goto out; - - size = vma_kernel_pagesize(vma); - -out: - up_read(¤t->mm->mmap_sem); - - return size; + return PAGE_SIZE; } static bool memslot_is_readonly(struct kvm_memory_slot *slot) { - return slot->flags & KVM_MEM_READONLY; + return slot->flags & GVM_MEM_READONLY; } -static unsigned long __gfn_to_hva_many(struct kvm_memory_slot *slot, gfn_t gfn, +static size_t __gfn_to_hva_many(struct kvm_memory_slot *slot, gfn_t gfn, gfn_t *nr_pages, bool write) { - if (!slot || slot->flags & KVM_MEMSLOT_INVALID) - return KVM_HVA_ERR_BAD; + if (!slot || slot->flags & GVM_MEMSLOT_INVALID) + return GVM_HVA_ERR_BAD; if (memslot_is_readonly(slot) && write) - return KVM_HVA_ERR_RO_BAD; + return GVM_HVA_ERR_RO_BAD; if (nr_pages) *nr_pages = slot->npages - (gfn - slot->base_gfn); @@ -1289,39 +1184,36 @@ static unsigned long __gfn_to_hva_many(struct kvm_memory_slot *slot, gfn_t gfn, return __gfn_to_hva_memslot(slot, gfn); } -static unsigned long gfn_to_hva_many(struct kvm_memory_slot *slot, gfn_t gfn, +static size_t gfn_to_hva_many(struct kvm_memory_slot *slot, gfn_t gfn, gfn_t *nr_pages) { return __gfn_to_hva_many(slot, gfn, nr_pages, true); } -unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot, +size_t gfn_to_hva_memslot(struct kvm_memory_slot *slot, gfn_t gfn) { return gfn_to_hva_many(slot, gfn, NULL); } -EXPORT_SYMBOL_GPL(gfn_to_hva_memslot); -unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn) +size_t gfn_to_hva(struct kvm *kvm, gfn_t gfn) { return gfn_to_hva_many(gfn_to_memslot(kvm, gfn), gfn, NULL); } -EXPORT_SYMBOL_GPL(gfn_to_hva); -unsigned long kvm_vcpu_gfn_to_hva(struct kvm_vcpu *vcpu, gfn_t gfn) +size_t kvm_vcpu_gfn_to_hva(struct kvm_vcpu *vcpu, gfn_t gfn) { return gfn_to_hva_many(kvm_vcpu_gfn_to_memslot(vcpu, gfn), gfn, NULL); } -EXPORT_SYMBOL_GPL(kvm_vcpu_gfn_to_hva); /* * If writable is set to false, the hva returned by this function is only * allowed to be read. */ -unsigned long gfn_to_hva_memslot_prot(struct kvm_memory_slot *slot, +size_t gfn_to_hva_memslot_prot(struct kvm_memory_slot *slot, gfn_t gfn, bool *writable) { - unsigned long hva = __gfn_to_hva_many(slot, gfn, NULL, false); + size_t hva = __gfn_to_hva_many(slot, gfn, NULL, false); if (!kvm_is_error_hva(hva) && writable) *writable = !memslot_is_readonly(slot); @@ -1329,177 +1221,39 @@ unsigned long gfn_to_hva_memslot_prot(struct kvm_memory_slot *slot, return hva; } -unsigned long gfn_to_hva_prot(struct kvm *kvm, gfn_t gfn, bool *writable) +size_t gfn_to_hva_prot(struct kvm *kvm, gfn_t gfn, bool *writable) { struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn); return gfn_to_hva_memslot_prot(slot, gfn, writable); } -unsigned long kvm_vcpu_gfn_to_hva_prot(struct kvm_vcpu *vcpu, gfn_t gfn, bool *writable) +size_t kvm_vcpu_gfn_to_hva_prot(struct kvm_vcpu *vcpu, gfn_t gfn, bool *writable) { struct kvm_memory_slot *slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn); return gfn_to_hva_memslot_prot(slot, gfn, writable); } -static int get_user_page_nowait(unsigned long start, int write, - struct page **page) -{ - int flags = FOLL_NOWAIT | FOLL_HWPOISON; - - if (write) - flags |= FOLL_WRITE; - - return get_user_pages(start, 1, flags, page, NULL); -} - -static inline int check_user_page_hwpoison(unsigned long addr) -{ - int rc, flags = FOLL_HWPOISON | FOLL_WRITE; - - rc = get_user_pages(addr, 1, flags, NULL, NULL); - return rc == -EHWPOISON; -} - /* * The atomic path to get the writable pfn which will be stored in @pfn, * true indicates success, otherwise false is returned. */ -static bool hva_to_pfn_fast(unsigned long addr, bool atomic, bool *async, +static bool __hva_to_pfn(size_t addr, bool write_fault, bool *writable, kvm_pfn_t *pfn) { - struct page *page[1]; - int npages; - - if (!(async || atomic)) - return false; - - /* - * Fast pin a writable pfn only if it is a write fault request - * or the caller allows to map a writable pfn for a read fault - * request. - */ - if (!(write_fault || writable)) - return false; - - npages = __get_user_pages_fast(addr, 1, 1, page); - if (npages == 1) { - *pfn = page_to_pfn(page[0]); - - if (writable) - *writable = true; - return true; - } - - return false; -} - -/* - * The slow path to get the pfn of the specified host virtual address, - * 1 indicates success, -errno is returned if error is detected. - */ -static int hva_to_pfn_slow(unsigned long addr, bool *async, bool write_fault, - bool *writable, kvm_pfn_t *pfn) -{ - struct page *page[1]; - int npages = 0; - - might_sleep(); - if (writable) *writable = write_fault; - if (async) { - down_read(¤t->mm->mmap_sem); - npages = get_user_page_nowait(addr, write_fault, page); - up_read(¤t->mm->mmap_sem); - } else { - unsigned int flags = FOLL_TOUCH | FOLL_HWPOISON; - - if (write_fault) - flags |= FOLL_WRITE; - - npages = __get_user_pages_unlocked(current, current->mm, addr, 1, - page, flags); - } - if (npages != 1) - return npages; - /* map read fault as writable if possible */ - if (unlikely(!write_fault) && writable) { - struct page *wpage[1]; - - npages = __get_user_pages_fast(addr, 1, 1, wpage); - if (npages == 1) { - *writable = true; - put_page(page[0]); - page[0] = wpage[0]; - } + if (!write_fault && writable) + *writable = true; - npages = 1; - } - *pfn = page_to_pfn(page[0]); - return npages; -} - -static bool vma_is_valid(struct vm_area_struct *vma, bool write_fault) -{ - if (unlikely(!(vma->vm_flags & VM_READ))) - return false; - - if (write_fault && (unlikely(!(vma->vm_flags & VM_WRITE)))) - return false; + *pfn = __pa((void *)addr) >> PAGE_SHIFT; return true; } -static int hva_to_pfn_remapped(struct vm_area_struct *vma, - unsigned long addr, bool *async, - bool write_fault, kvm_pfn_t *p_pfn) -{ - unsigned long pfn; - int r; - - r = follow_pfn(vma, addr, &pfn); - if (r) { - /* - * get_user_pages fails for VM_IO and VM_PFNMAP vmas and does - * not call the fault handler, so do it here. - */ - bool unlocked = false; - r = fixup_user_fault(current, current->mm, addr, - (write_fault ? FAULT_FLAG_WRITE : 0), - &unlocked); - if (unlocked) - return -EAGAIN; - if (r) - return r; - - r = follow_pfn(vma, addr, &pfn); - if (r) - return r; - - } - - - /* - * Get a reference here because callers of *hva_to_pfn* and - * *gfn_to_pfn* ultimately call kvm_release_pfn_clean on the - * returned pfn. This is only needed if the VMA has VM_MIXEDMAP - * set, but the kvm_get_pfn/kvm_release_pfn_clean pair will - * simply do nothing for reserved pfns. - * - * Whoever called remap_pfn_range is also going to call e.g. - * unmap_mapping_range before the underlying pages are freed, - * causing a call to our MMU notifier. - */ - kvm_get_pfn(pfn); - - *p_pfn = pfn; - return 0; -} - /* * Pin guest page in memory and return its pfn. * @addr: host virtual address which maps memory to the guest @@ -1514,70 +1268,67 @@ static int hva_to_pfn_remapped(struct vm_area_struct *vma, * 2): @write_fault = false && @writable, @writable will tell the caller * whether the mapping is writable. */ -static kvm_pfn_t hva_to_pfn(unsigned long addr, bool atomic, bool *async, +static kvm_pfn_t hva_to_pfn(size_t addr, bool write_fault, bool *writable) { - struct vm_area_struct *vma; kvm_pfn_t pfn = 0; - int npages, r; - - /* we can do it either atomically or asynchronously, not both */ - BUG_ON(atomic && async); - if (hva_to_pfn_fast(addr, atomic, async, write_fault, writable, &pfn)) + if (__hva_to_pfn(addr, write_fault, writable, &pfn)) return pfn; - if (atomic) - return KVM_PFN_ERR_FAULT; + return GVM_PFN_ERR_FAULT; +} - npages = hva_to_pfn_slow(addr, async, write_fault, writable, &pfn); - if (npages == 1) - return pfn; +static int gvm_pin_user_memory(size_t addr, struct pmem_lock *pmem_lock) +{ + pmem_lock->lock_mdl = IoAllocateMdl((PVOID)addr, PAGE_SIZE, + FALSE, FALSE, NULL); + if (!pmem_lock->lock_mdl) + return -1; + MmProbeAndLockPages(pmem_lock->lock_mdl, UserMode, + IoWriteAccess); + return 0; +} - down_read(¤t->mm->mmap_sem); - if (npages == -EHWPOISON || - (!async && check_user_page_hwpoison(addr))) { - pfn = KVM_PFN_ERR_HWPOISON; - goto exit; - } +static int kvm_is_ram_prot(struct kvm* kvm, gfn_t gfn); +static int kvm_should_ram_prot_exit(struct kvm *kvm, gfn_t gfn) +{ + struct kvm_vcpu* vcpu; -retry: - vma = find_vma_intersection(current->mm, addr, addr + 1); - - if (vma == NULL) - pfn = KVM_PFN_ERR_FAULT; - else if (vma->vm_flags & (VM_IO | VM_PFNMAP)) { - r = hva_to_pfn_remapped(vma, addr, async, write_fault, &pfn); - if (r == -EAGAIN) - goto retry; - if (r < 0) - pfn = KVM_PFN_ERR_FAULT; - } else { - if (async && vma_is_valid(vma, write_fault)) - *async = true; - pfn = KVM_PFN_ERR_FAULT; - } -exit: - up_read(¤t->mm->mmap_sem); - return pfn; + if (!kvm_is_ram_prot(kvm, gfn)) + return 0; + + /* + * We assume get user pages always run + * in the vcpu thread requesting that + * page. + */ + vcpu = kvm_get_vcpu_by_thread(kvm, PsGetCurrentThread()); + vcpu->run->exit_reason = GVM_EXIT_RAM_PROT; + vcpu->run->rp.gfn = gfn; + return 1; } kvm_pfn_t __gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn, bool atomic, bool *async, bool write_fault, bool *writable) { - unsigned long addr = __gfn_to_hva_many(slot, gfn, NULL, write_fault); + size_t addr = __gfn_to_hva_many(slot, gfn, NULL, write_fault); + struct pmem_lock *pmem_lock = NULL; + + /* We removed async pafe fault support for gvm*/ + BUG_ON(async); - if (addr == KVM_HVA_ERR_RO_BAD) { + if (addr == GVM_HVA_ERR_RO_BAD) { if (writable) *writable = false; - return KVM_PFN_ERR_RO_FAULT; + return GVM_PFN_ERR_RO_FAULT; } if (kvm_is_error_hva(addr)) { if (writable) *writable = false; - return KVM_PFN_NOSLOT; + return GVM_PFN_NOSLOT; } /* Do not map writable pfn in the readonly memslot. */ @@ -1586,10 +1337,22 @@ kvm_pfn_t __gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn, writable = NULL; } - return hva_to_pfn(addr, atomic, async, write_fault, - writable); + if (kvm_should_ram_prot_exit(slot->kvm, gfn)) + return 0; + + pmem_lock = &slot->pmem_lock[gfn - slot->base_gfn]; + spin_lock(&pmem_lock->lock); + if (!pmem_lock->lock_mdl) { + gvm_pin_user_memory(addr, pmem_lock); + if (!pmem_lock->lock_mdl) { + spin_unlock(&pmem_lock->lock); + return GVM_PFN_ERR_FAULT; + } + } + spin_unlock(&pmem_lock->lock); + + return hva_to_pfn(addr, write_fault, writable); } -EXPORT_SYMBOL_GPL(__gfn_to_pfn_memslot); kvm_pfn_t gfn_to_pfn_prot(struct kvm *kvm, gfn_t gfn, bool write_fault, bool *writable) @@ -1597,49 +1360,44 @@ kvm_pfn_t gfn_to_pfn_prot(struct kvm *kvm, gfn_t gfn, bool write_fault, return __gfn_to_pfn_memslot(gfn_to_memslot(kvm, gfn), gfn, false, NULL, write_fault, writable); } -EXPORT_SYMBOL_GPL(gfn_to_pfn_prot); kvm_pfn_t gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn) { return __gfn_to_pfn_memslot(slot, gfn, false, NULL, true, NULL); } -EXPORT_SYMBOL_GPL(gfn_to_pfn_memslot); kvm_pfn_t gfn_to_pfn_memslot_atomic(struct kvm_memory_slot *slot, gfn_t gfn) { return __gfn_to_pfn_memslot(slot, gfn, true, NULL, true, NULL); } -EXPORT_SYMBOL_GPL(gfn_to_pfn_memslot_atomic); kvm_pfn_t gfn_to_pfn_atomic(struct kvm *kvm, gfn_t gfn) { return gfn_to_pfn_memslot_atomic(gfn_to_memslot(kvm, gfn), gfn); } -EXPORT_SYMBOL_GPL(gfn_to_pfn_atomic); kvm_pfn_t kvm_vcpu_gfn_to_pfn_atomic(struct kvm_vcpu *vcpu, gfn_t gfn) { return gfn_to_pfn_memslot_atomic(kvm_vcpu_gfn_to_memslot(vcpu, gfn), gfn); } -EXPORT_SYMBOL_GPL(kvm_vcpu_gfn_to_pfn_atomic); kvm_pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn) { return gfn_to_pfn_memslot(gfn_to_memslot(kvm, gfn), gfn); } -EXPORT_SYMBOL_GPL(gfn_to_pfn); kvm_pfn_t kvm_vcpu_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn) { return gfn_to_pfn_memslot(kvm_vcpu_gfn_to_memslot(vcpu, gfn), gfn); } -EXPORT_SYMBOL_GPL(kvm_vcpu_gfn_to_pfn); -int gfn_to_page_many_atomic(struct kvm_memory_slot *slot, gfn_t gfn, - struct page **pages, int nr_pages) +int gfn_to_pfn_many_atomic(struct kvm_memory_slot *slot, gfn_t gfn, + pfn_t *pfn, int nr_pages) { - unsigned long addr; + size_t addr; gfn_t entry; + size_t i; + struct pmem_lock *pmem_lock; addr = gfn_to_hva_many(slot, gfn, &entry); if (kvm_is_error_hva(addr)) @@ -1648,32 +1406,36 @@ int gfn_to_page_many_atomic(struct kvm_memory_slot *slot, gfn_t gfn, if (entry < nr_pages) return 0; - return __get_user_pages_fast(addr, nr_pages, 1, pages); -} -EXPORT_SYMBOL_GPL(gfn_to_page_many_atomic); + for (i = 0; i < nr_pages; i++) { + if (kvm_should_ram_prot_exit(slot->kvm, gfn + i)) + return 0; -static struct page *kvm_pfn_to_page(kvm_pfn_t pfn) -{ - if (is_error_noslot_pfn(pfn)) - return KVM_ERR_PTR_BAD_PAGE; - - if (kvm_is_reserved_pfn(pfn)) { - WARN_ON(1); - return KVM_ERR_PTR_BAD_PAGE; + pmem_lock = &slot->pmem_lock[gfn + i - slot->base_gfn]; + spin_lock(&pmem_lock->lock); + if (!pmem_lock->lock_mdl) { + gvm_pin_user_memory(addr + i * PAGE_SIZE, pmem_lock); + if (!pmem_lock->lock_mdl) { + spin_unlock(&pmem_lock->lock); + break; + } + } + spin_unlock(&pmem_lock->lock); } - return pfn_to_page(pfn); + nr_pages = i; + + while(i--) + pfn[i] = __pa((void*)(addr + i * PAGE_SIZE)) >> PAGE_SHIFT; + return nr_pages; } -struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn) +static struct page *kvm_pfn_to_page(kvm_pfn_t pfn) { - kvm_pfn_t pfn; - - pfn = gfn_to_pfn(kvm, gfn); + if (is_error_noslot_pfn(pfn)) + return GVM_ERR_PTR_BAD_PAGE; - return kvm_pfn_to_page(pfn); + return pfn_to_page(pfn); } -EXPORT_SYMBOL_GPL(gfn_to_page); struct page *kvm_vcpu_gfn_to_page(struct kvm_vcpu *vcpu, gfn_t gfn) { @@ -1683,63 +1445,8 @@ struct page *kvm_vcpu_gfn_to_page(struct kvm_vcpu *vcpu, gfn_t gfn) return kvm_pfn_to_page(pfn); } -EXPORT_SYMBOL_GPL(kvm_vcpu_gfn_to_page); - -void kvm_release_page_clean(struct page *page) -{ - WARN_ON(is_error_page(page)); - - kvm_release_pfn_clean(page_to_pfn(page)); -} -EXPORT_SYMBOL_GPL(kvm_release_page_clean); - -void kvm_release_pfn_clean(kvm_pfn_t pfn) -{ - if (!is_error_noslot_pfn(pfn) && !kvm_is_reserved_pfn(pfn)) - put_page(pfn_to_page(pfn)); -} -EXPORT_SYMBOL_GPL(kvm_release_pfn_clean); - -void kvm_release_page_dirty(struct page *page) -{ - WARN_ON(is_error_page(page)); - - kvm_release_pfn_dirty(page_to_pfn(page)); -} -EXPORT_SYMBOL_GPL(kvm_release_page_dirty); - -static void kvm_release_pfn_dirty(kvm_pfn_t pfn) -{ - kvm_set_pfn_dirty(pfn); - kvm_release_pfn_clean(pfn); -} - -void kvm_set_pfn_dirty(kvm_pfn_t pfn) -{ - if (!kvm_is_reserved_pfn(pfn)) { - struct page *page = pfn_to_page(pfn); - - if (!PageReserved(page)) - SetPageDirty(page); - } -} -EXPORT_SYMBOL_GPL(kvm_set_pfn_dirty); -void kvm_set_pfn_accessed(kvm_pfn_t pfn) -{ - if (!kvm_is_reserved_pfn(pfn)) - mark_page_accessed(pfn_to_page(pfn)); -} -EXPORT_SYMBOL_GPL(kvm_set_pfn_accessed); - -void kvm_get_pfn(kvm_pfn_t pfn) -{ - if (!kvm_is_reserved_pfn(pfn)) - get_page(pfn_to_page(pfn)); -} -EXPORT_SYMBOL_GPL(kvm_get_pfn); - -static int next_segment(unsigned long len, int offset) +static int next_segment(size_t len, int offset) { if (len > PAGE_SIZE - offset) return PAGE_SIZE - offset; @@ -1750,13 +1457,13 @@ static int next_segment(unsigned long len, int offset) static int __kvm_read_guest_page(struct kvm_memory_slot *slot, gfn_t gfn, void *data, int offset, int len) { - int r; - unsigned long addr; + int r = 0; + size_t addr; addr = gfn_to_hva_memslot_prot(slot, gfn, NULL); if (kvm_is_error_hva(addr)) return -EFAULT; - r = __copy_from_user(data, (void __user *)addr + offset, len); + r = __copy_from_user(data, (char __user *)addr + offset, len); if (r) return -EFAULT; return 0; @@ -1769,7 +1476,6 @@ int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset, return __kvm_read_guest_page(slot, gfn, data, offset, len); } -EXPORT_SYMBOL_GPL(kvm_read_guest_page); int kvm_vcpu_read_guest_page(struct kvm_vcpu *vcpu, gfn_t gfn, void *data, int offset, int len) @@ -1778,9 +1484,8 @@ int kvm_vcpu_read_guest_page(struct kvm_vcpu *vcpu, gfn_t gfn, void *data, return __kvm_read_guest_page(slot, gfn, data, offset, len); } -EXPORT_SYMBOL_GPL(kvm_vcpu_read_guest_page); -int kvm_read_guest(struct kvm *kvm, gpa_t gpa, void *data, unsigned long len) +int kvm_read_guest(struct kvm *kvm, gpa_t gpa, void *data, size_t len) { gfn_t gfn = gpa >> PAGE_SHIFT; int seg; @@ -1793,14 +1498,13 @@ int kvm_read_guest(struct kvm *kvm, gpa_t gpa, void *data, unsigned long len) return ret; offset = 0; len -= seg; - data += seg; + //data += seg; ++gfn; } return 0; } -EXPORT_SYMBOL_GPL(kvm_read_guest); -int kvm_vcpu_read_guest(struct kvm_vcpu *vcpu, gpa_t gpa, void *data, unsigned long len) +int kvm_vcpu_read_guest(struct kvm_vcpu *vcpu, gpa_t gpa, void *data, size_t len) { gfn_t gfn = gpa >> PAGE_SHIFT; int seg; @@ -1813,43 +1517,29 @@ int kvm_vcpu_read_guest(struct kvm_vcpu *vcpu, gpa_t gpa, void *data, unsigned l return ret; offset = 0; len -= seg; - data += seg; + //data += seg; ++gfn; } return 0; } -EXPORT_SYMBOL_GPL(kvm_vcpu_read_guest); static int __kvm_read_guest_atomic(struct kvm_memory_slot *slot, gfn_t gfn, - void *data, int offset, unsigned long len) + void *data, int offset, size_t len) { - int r; - unsigned long addr; + int r = 0; + size_t addr; addr = gfn_to_hva_memslot_prot(slot, gfn, NULL); if (kvm_is_error_hva(addr)) return -EFAULT; - pagefault_disable(); - r = __copy_from_user_inatomic(data, (void __user *)addr + offset, len); - pagefault_enable(); + r = __copy_from_user(data, (char __user *)addr + offset, len); if (r) return -EFAULT; return 0; } -int kvm_read_guest_atomic(struct kvm *kvm, gpa_t gpa, void *data, - unsigned long len) -{ - gfn_t gfn = gpa >> PAGE_SHIFT; - struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn); - int offset = offset_in_page(gpa); - - return __kvm_read_guest_atomic(slot, gfn, data, offset, len); -} -EXPORT_SYMBOL_GPL(kvm_read_guest_atomic); - int kvm_vcpu_read_guest_atomic(struct kvm_vcpu *vcpu, gpa_t gpa, - void *data, unsigned long len) + void *data, size_t len) { gfn_t gfn = gpa >> PAGE_SHIFT; struct kvm_memory_slot *slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn); @@ -1857,18 +1547,17 @@ int kvm_vcpu_read_guest_atomic(struct kvm_vcpu *vcpu, gpa_t gpa, return __kvm_read_guest_atomic(slot, gfn, data, offset, len); } -EXPORT_SYMBOL_GPL(kvm_vcpu_read_guest_atomic); static int __kvm_write_guest_page(struct kvm_memory_slot *memslot, gfn_t gfn, const void *data, int offset, int len) { - int r; - unsigned long addr; + int r = 0; + size_t addr; addr = gfn_to_hva_memslot(memslot, gfn); if (kvm_is_error_hva(addr)) return -EFAULT; - r = __copy_to_user((void __user *)addr + offset, data, len); + r = __copy_to_user((void __user *)(addr + offset), data, len); if (r) return -EFAULT; mark_page_dirty_in_slot(memslot, gfn); @@ -1882,7 +1571,6 @@ int kvm_write_guest_page(struct kvm *kvm, gfn_t gfn, return __kvm_write_guest_page(slot, gfn, data, offset, len); } -EXPORT_SYMBOL_GPL(kvm_write_guest_page); int kvm_vcpu_write_guest_page(struct kvm_vcpu *vcpu, gfn_t gfn, const void *data, int offset, int len) @@ -1891,10 +1579,9 @@ int kvm_vcpu_write_guest_page(struct kvm_vcpu *vcpu, gfn_t gfn, return __kvm_write_guest_page(slot, gfn, data, offset, len); } -EXPORT_SYMBOL_GPL(kvm_vcpu_write_guest_page); int kvm_write_guest(struct kvm *kvm, gpa_t gpa, const void *data, - unsigned long len) + size_t len) { gfn_t gfn = gpa >> PAGE_SHIFT; int seg; @@ -1907,15 +1594,14 @@ int kvm_write_guest(struct kvm *kvm, gpa_t gpa, const void *data, return ret; offset = 0; len -= seg; - data += seg; + //data += seg; ++gfn; } return 0; } -EXPORT_SYMBOL_GPL(kvm_write_guest); int kvm_vcpu_write_guest(struct kvm_vcpu *vcpu, gpa_t gpa, const void *data, - unsigned long len) + size_t len) { gfn_t gfn = gpa >> PAGE_SHIFT; int seg; @@ -1928,15 +1614,14 @@ int kvm_vcpu_write_guest(struct kvm_vcpu *vcpu, gpa_t gpa, const void *data, return ret; offset = 0; len -= seg; - data += seg; + //data += seg; ++gfn; } return 0; } -EXPORT_SYMBOL_GPL(kvm_vcpu_write_guest); int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc, - gpa_t gpa, unsigned long len) + gpa_t gpa, size_t len) { struct kvm_memslots *slots = kvm_memslots(kvm); int offset = offset_in_page(gpa); @@ -1970,10 +1655,9 @@ int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc, } return 0; } -EXPORT_SYMBOL_GPL(kvm_gfn_to_hva_cache_init); int kvm_write_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc, - void *data, unsigned long len) + void *data, size_t len) { struct kvm_memslots *slots = kvm_memslots(kvm); int r; @@ -1996,10 +1680,9 @@ int kvm_write_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc, return 0; } -EXPORT_SYMBOL_GPL(kvm_write_guest_cached); int kvm_read_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc, - void *data, unsigned long len) + void *data, size_t len) { struct kvm_memslots *slots = kvm_memslots(kvm); int r; @@ -2021,17 +1704,13 @@ int kvm_read_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc, return 0; } -EXPORT_SYMBOL_GPL(kvm_read_guest_cached); int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len) { - const void *zero_page = (const void *) __va(page_to_phys(ZERO_PAGE(0))); - - return kvm_write_guest_page(kvm, gfn, zero_page, offset, len); + return kvm_write_guest_page(kvm, gfn, pZeroPage, offset, len); } -EXPORT_SYMBOL_GPL(kvm_clear_guest_page); -int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len) +int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, size_t len) { gfn_t gfn = gpa >> PAGE_SHIFT; int seg; @@ -2048,15 +1727,14 @@ int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len) } return 0; } -EXPORT_SYMBOL_GPL(kvm_clear_guest); static void mark_page_dirty_in_slot(struct kvm_memory_slot *memslot, gfn_t gfn) { if (memslot && memslot->dirty_bitmap) { - unsigned long rel_gfn = gfn - memslot->base_gfn; + size_t rel_gfn = gfn - memslot->base_gfn; - set_bit_le(rel_gfn, memslot->dirty_bitmap); + set_bit(rel_gfn, memslot->dirty_bitmap); } } @@ -2067,7 +1745,6 @@ void mark_page_dirty(struct kvm *kvm, gfn_t gfn) memslot = gfn_to_memslot(kvm, gfn); mark_page_dirty_in_slot(memslot, gfn); } -EXPORT_SYMBOL_GPL(mark_page_dirty); void kvm_vcpu_mark_page_dirty(struct kvm_vcpu *vcpu, gfn_t gfn) { @@ -2076,138 +1753,52 @@ void kvm_vcpu_mark_page_dirty(struct kvm_vcpu *vcpu, gfn_t gfn) memslot = kvm_vcpu_gfn_to_memslot(vcpu, gfn); mark_page_dirty_in_slot(memslot, gfn); } -EXPORT_SYMBOL_GPL(kvm_vcpu_mark_page_dirty); - -static void grow_halt_poll_ns(struct kvm_vcpu *vcpu) -{ - unsigned int old, val, grow; - - old = val = vcpu->halt_poll_ns; - grow = READ_ONCE(halt_poll_ns_grow); - /* 10us base */ - if (val == 0 && grow) - val = 10000; - else - val *= grow; - - if (val > halt_poll_ns) - val = halt_poll_ns; - - vcpu->halt_poll_ns = val; - trace_kvm_halt_poll_ns_grow(vcpu->vcpu_id, val, old); -} - -static void shrink_halt_poll_ns(struct kvm_vcpu *vcpu) -{ - unsigned int old, val, shrink; - - old = val = vcpu->halt_poll_ns; - shrink = READ_ONCE(halt_poll_ns_shrink); - if (shrink == 0) - val = 0; - else - val /= shrink; - - vcpu->halt_poll_ns = val; - trace_kvm_halt_poll_ns_shrink(vcpu->vcpu_id, val, old); -} static int kvm_vcpu_check_block(struct kvm_vcpu *vcpu) { if (kvm_arch_vcpu_runnable(vcpu)) { - kvm_make_request(KVM_REQ_UNHALT, vcpu); + kvm_make_request(GVM_REQ_UNHALT, vcpu); return -EINTR; } if (kvm_cpu_has_pending_timer(vcpu)) return -EINTR; - if (signal_pending(current)) + if (vcpu->run->user_event_pending) return -EINTR; return 0; } +static void hardware_disable_nolock(void *junk); +static void hardware_enable_nolock(void *junk); + /* * The vCPU has executed a HLT instruction with in-kernel mode enabled. */ void kvm_vcpu_block(struct kvm_vcpu *vcpu) { - ktime_t start, cur; - DECLARE_SWAITQUEUE(wait); - bool waited = false; - u64 block_ns; - - start = cur = ktime_get(); - if (vcpu->halt_poll_ns) { - ktime_t stop = ktime_add_ns(ktime_get(), vcpu->halt_poll_ns); - - ++vcpu->stat.halt_attempted_poll; - do { - /* - * This sets KVM_REQ_UNHALT if an interrupt - * arrives. - */ - if (kvm_vcpu_check_block(vcpu) < 0) { - ++vcpu->stat.halt_successful_poll; - if (!vcpu_valid_wakeup(vcpu)) - ++vcpu->stat.halt_poll_invalid; - goto out; - } - cur = ktime_get(); - } while (single_task_running() && ktime_before(cur, stop)); - } + LARGE_INTEGER expire; + expire.QuadPart = (u64)-1000000; kvm_arch_vcpu_blocking(vcpu); - for (;;) { - prepare_to_swait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE); - - if (kvm_vcpu_check_block(vcpu) < 0) + vcpu->blocked = 1; + for (;;) + { + if (kvm_vcpu_check_block(vcpu)) break; - - waited = true; - schedule(); + KeWaitForSingleObject(&vcpu->kick_event, Executive, KernelMode, FALSE, &expire); } - - finish_swait(&vcpu->wq, &wait); - cur = ktime_get(); - + vcpu->blocked = 0; + KeClearEvent(&vcpu->kick_event); kvm_arch_vcpu_unblocking(vcpu); -out: - block_ns = ktime_to_ns(cur) - ktime_to_ns(start); - - if (!vcpu_valid_wakeup(vcpu)) - shrink_halt_poll_ns(vcpu); - else if (halt_poll_ns) { - if (block_ns <= vcpu->halt_poll_ns) - ; - /* we had a long block, shrink polling */ - else if (vcpu->halt_poll_ns && block_ns > halt_poll_ns) - shrink_halt_poll_ns(vcpu); - /* we had a short halt and our poll time is too small */ - else if (vcpu->halt_poll_ns < halt_poll_ns && - block_ns < halt_poll_ns) - grow_halt_poll_ns(vcpu); - } else - vcpu->halt_poll_ns = 0; - - trace_kvm_vcpu_wakeup(block_ns, waited, vcpu_valid_wakeup(vcpu)); kvm_arch_vcpu_block_finish(vcpu); } -EXPORT_SYMBOL_GPL(kvm_vcpu_block); -#ifndef CONFIG_S390 void kvm_vcpu_wake_up(struct kvm_vcpu *vcpu) { - struct swait_queue_head *wqp; - - wqp = kvm_arch_vcpu_wq(vcpu); - if (swait_active(wqp)) { - swake_up(wqp); - ++vcpu->stat.halt_wakeup; - } - + if(vcpu->blocked) + KeSetEvent(&vcpu->kick_event, IO_NO_INCREMENT, FALSE); } -EXPORT_SYMBOL_GPL(kvm_vcpu_wake_up); /* * Kick a sleeping VCPU, or a guest VCPU in guest mode, into host kernel mode. @@ -2218,224 +1809,36 @@ void kvm_vcpu_kick(struct kvm_vcpu *vcpu) int cpu = vcpu->cpu; kvm_vcpu_wake_up(vcpu); - me = get_cpu(); - if (cpu != me && (unsigned)cpu < nr_cpu_ids && cpu_online(cpu)) + me = smp_processor_id(); + if (cpu != -1 && cpu != me && cpu_online(cpu)) if (kvm_arch_vcpu_should_kick(vcpu)) smp_send_reschedule(cpu); - put_cpu(); -} -EXPORT_SYMBOL_GPL(kvm_vcpu_kick); -#endif /* !CONFIG_S390 */ - -int kvm_vcpu_yield_to(struct kvm_vcpu *target) -{ - struct pid *pid; - struct task_struct *task = NULL; - int ret = 0; - - rcu_read_lock(); - pid = rcu_dereference(target->pid); - if (pid) - task = get_pid_task(pid, PIDTYPE_PID); - rcu_read_unlock(); - if (!task) - return ret; - ret = yield_to(task, 1); - put_task_struct(task); - - return ret; -} -EXPORT_SYMBOL_GPL(kvm_vcpu_yield_to); - -/* - * Helper that checks whether a VCPU is eligible for directed yield. - * Most eligible candidate to yield is decided by following heuristics: - * - * (a) VCPU which has not done pl-exit or cpu relax intercepted recently - * (preempted lock holder), indicated by @in_spin_loop. - * Set at the beiginning and cleared at the end of interception/PLE handler. - * - * (b) VCPU which has done pl-exit/ cpu relax intercepted but did not get - * chance last time (mostly it has become eligible now since we have probably - * yielded to lockholder in last iteration. This is done by toggling - * @dy_eligible each time a VCPU checked for eligibility.) - * - * Yielding to a recently pl-exited/cpu relax intercepted VCPU before yielding - * to preempted lock-holder could result in wrong VCPU selection and CPU - * burning. Giving priority for a potential lock-holder increases lock - * progress. - * - * Since algorithm is based on heuristics, accessing another VCPU data without - * locking does not harm. It may result in trying to yield to same VCPU, fail - * and continue with next VCPU and so on. - */ -static bool kvm_vcpu_eligible_for_directed_yield(struct kvm_vcpu *vcpu) -{ -#ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT - bool eligible; - - eligible = !vcpu->spin_loop.in_spin_loop || - vcpu->spin_loop.dy_eligible; - - if (vcpu->spin_loop.in_spin_loop) - kvm_vcpu_set_dy_eligible(vcpu, !vcpu->spin_loop.dy_eligible); - - return eligible; -#else - return true; -#endif -} - -void kvm_vcpu_on_spin(struct kvm_vcpu *me) -{ - struct kvm *kvm = me->kvm; - struct kvm_vcpu *vcpu; - int last_boosted_vcpu = me->kvm->last_boosted_vcpu; - int yielded = 0; - int try = 3; - int pass; - int i; - - kvm_vcpu_set_in_spin_loop(me, true); - /* - * We boost the priority of a VCPU that is runnable but not - * currently running, because it got preempted by something - * else and called schedule in __vcpu_run. Hopefully that - * VCPU is holding the lock that we need and will release it. - * We approximate round-robin by starting at the last boosted VCPU. - */ - for (pass = 0; pass < 2 && !yielded && try; pass++) { - kvm_for_each_vcpu(i, vcpu, kvm) { - if (!pass && i <= last_boosted_vcpu) { - i = last_boosted_vcpu; - continue; - } else if (pass && i > last_boosted_vcpu) - break; - if (!ACCESS_ONCE(vcpu->preempted)) - continue; - if (vcpu == me) - continue; - if (swait_active(&vcpu->wq) && !kvm_arch_vcpu_runnable(vcpu)) - continue; - if (!kvm_vcpu_eligible_for_directed_yield(vcpu)) - continue; - - yielded = kvm_vcpu_yield_to(vcpu); - if (yielded > 0) { - kvm->last_boosted_vcpu = i; - break; - } else if (yielded < 0) { - try--; - if (!try) - break; - } - } - } - kvm_vcpu_set_in_spin_loop(me, false); - - /* Ensure vcpu is not eligible during next spinloop */ - kvm_vcpu_set_dy_eligible(me, false); } -EXPORT_SYMBOL_GPL(kvm_vcpu_on_spin); -static int kvm_vcpu_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +NTSTATUS kvm_vcpu_release(PDEVICE_OBJECT pDevObj, PIRP pIrp) { - struct kvm_vcpu *vcpu = vma->vm_file->private_data; - struct page *page; - - if (vmf->pgoff == 0) - page = virt_to_page(vcpu->run); -#ifdef CONFIG_X86 - else if (vmf->pgoff == KVM_PIO_PAGE_OFFSET) - page = virt_to_page(vcpu->arch.pio_data); -#endif -#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET - else if (vmf->pgoff == KVM_COALESCED_MMIO_PAGE_OFFSET) - page = virt_to_page(vcpu->kvm->coalesced_mmio_ring); -#endif - else - return kvm_arch_vcpu_fault(vcpu, vmf); - get_page(page); - vmf->page = page; - return 0; -} - -static const struct vm_operations_struct kvm_vcpu_vm_ops = { - .fault = kvm_vcpu_fault, -}; + struct gvm_device_extension *devext = pDevObj->DeviceExtension; + struct kvm_vcpu *vcpu = devext->PrivData; -static int kvm_vcpu_mmap(struct file *file, struct vm_area_struct *vma) -{ - vma->vm_ops = &kvm_vcpu_vm_ops; - return 0; -} - -static int kvm_vcpu_release(struct inode *inode, struct file *filp) -{ - struct kvm_vcpu *vcpu = filp->private_data; - - debugfs_remove_recursive(vcpu->debugfs_dentry); kvm_put_kvm(vcpu->kvm); return 0; } -static struct file_operations kvm_vcpu_fops = { - .release = kvm_vcpu_release, - .unlocked_ioctl = kvm_vcpu_ioctl, -#ifdef CONFIG_KVM_COMPAT - .compat_ioctl = kvm_vcpu_compat_ioctl, -#endif - .mmap = kvm_vcpu_mmap, - .llseek = noop_llseek, -}; - -/* - * Allocates an inode for the vcpu. - */ -static int create_vcpu_fd(struct kvm_vcpu *vcpu) -{ - return anon_inode_getfd("kvm-vcpu", &kvm_vcpu_fops, vcpu, O_RDWR | O_CLOEXEC); -} - -static int kvm_create_vcpu_debugfs(struct kvm_vcpu *vcpu) -{ - char dir_name[ITOA_MAX_LEN * 2]; - int ret; - - if (!kvm_arch_has_vcpu_debugfs()) - return 0; - - if (!debugfs_initialized()) - return 0; - - snprintf(dir_name, sizeof(dir_name), "vcpu%d", vcpu->vcpu_id); - vcpu->debugfs_dentry = debugfs_create_dir(dir_name, - vcpu->kvm->debugfs_dentry); - if (!vcpu->debugfs_dentry) - return -ENOMEM; - - ret = kvm_arch_create_vcpu_debugfs(vcpu); - if (ret < 0) { - debugfs_remove_recursive(vcpu->debugfs_dentry); - return ret; - } - - return 0; -} - -/* - * Creates some virtual cpus. Good luck creating more than one. - */ -static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id) +static int kvm_vm_ioctl_create_vcpu(PDEVICE_OBJECT pDevObj, PIRP pIrp, void *arg) { int r; struct kvm_vcpu *vcpu; + struct gvm_device_extension *devext = pDevObj->DeviceExtension; + struct kvm *kvm = devext->PrivData; + HANDLE handle; + int id = *(int *)arg; + KAFFINITY Affinity; - if (id >= KVM_MAX_VCPU_ID) + mutex_lock(&kvm->lock); + if (id >= GVM_MAX_VCPU_ID) return -EINVAL; - mutex_lock(&kvm->lock); - if (kvm->created_vcpus == KVM_MAX_VCPUS) { + if (kvm->created_vcpus == GVM_MAX_VCPUS) { mutex_unlock(&kvm->lock); return -EINVAL; } @@ -2449,16 +1852,10 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id) goto vcpu_decrement; } - preempt_notifier_init(&vcpu->preempt_notifier, &kvm_preempt_ops); - r = kvm_arch_vcpu_setup(vcpu); if (r) goto vcpu_destroy; - r = kvm_create_vcpu_debugfs(vcpu); - if (r) - goto vcpu_destroy; - mutex_lock(&kvm->lock); if (kvm_get_vcpu_by_id(kvm, id)) { r = -EEXIST; @@ -2469,8 +1866,14 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id) /* Now it's all set up, let userspace reach it */ kvm_get_kvm(kvm); - r = create_vcpu_fd(vcpu); - if (r < 0) { + r = gvmCreateVMDevice(&handle, kvm->vm_id, id, vcpu); + if (!NT_SUCCESS(r)) { + kvm_put_kvm(kvm); + goto unlock_vcpu_destroy; + } + r = gvmUpdateReturnBuffer(pIrp, 0, &handle, sizeof(handle)); + if (r) { + gvmDeleteVMDevice(NULL, 0, id); kvm_put_kvm(kvm); goto unlock_vcpu_destroy; } @@ -2486,11 +1889,16 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id) mutex_unlock(&kvm->lock); kvm_arch_vcpu_postcreate(vcpu); + + Affinity = (KAFFINITY)1 << ( + cpu_online_count - 1 + - 2 * vcpu->vcpu_id / cpu_online_count % 2 + - vcpu->vcpu_id * 2 % cpu_online_count); + KeSetSystemAffinityThread(Affinity); return r; unlock_vcpu_destroy: mutex_unlock(&kvm->lock); - debugfs_remove_recursive(vcpu->debugfs_dentry); vcpu_destroy: kvm_arch_vcpu_destroy(vcpu); vcpu_decrement: @@ -2500,64 +1908,171 @@ vcpu_decrement: return r; } -static int kvm_vcpu_ioctl_set_sigmask(struct kvm_vcpu *vcpu, sigset_t *sigset) +static int kvm_vm_ioctl_kick_vcpu(PDEVICE_OBJECT pDevObj, PIRP pIrp, void *arg) { - if (sigset) { - sigdelsetmask(sigset, sigmask(SIGKILL)|sigmask(SIGSTOP)); - vcpu->sigset_active = 1; - vcpu->sigset = *sigset; - } else - vcpu->sigset_active = 0; + struct kvm_vcpu *vcpu; + struct gvm_device_extension *devext = pDevObj->DeviceExtension; + struct kvm *kvm = devext->PrivData; + int id = *(int *)arg; + + if (id >= GVM_MAX_VCPU_ID) + return -EINVAL; + + vcpu = kvm_get_vcpu_by_id(kvm, id); + if (!vcpu) + return -EINVAL; + + kvm_vcpu_kick(vcpu); + return 0; } -static long kvm_vcpu_ioctl(struct file *filp, - unsigned int ioctl, unsigned long arg) +static bool kvm_is_valid_prot_flags(u32 flags) { - struct kvm_vcpu *vcpu = filp->private_data; - void __user *argp = (void __user *)arg; - int r; - struct kvm_fpu *fpu = NULL; - struct kvm_sregs *kvm_sregs = NULL; + return (flags == RP_NOACCESS || flags == RP_RDWREX); +} - if (vcpu->kvm->mm != current->mm) - return -EIO; +static int kvm_adjust_rp_bitmap(struct kvm *kvm, u64 size) +{ + int old_size, new_size; + size_t *old_bitmap, *new_bitmap; - if (unlikely(_IOC_TYPE(ioctl) != KVMIO)) - return -EINVAL; + if (kvm->rp_bitmap_size >= size) + return 0; -#if defined(CONFIG_S390) || defined(CONFIG_PPC) || defined(CONFIG_MIPS) - /* - * Special cases: vcpu ioctls that are asynchronous to vcpu execution, - * so vcpu_load() would break it. - */ - if (ioctl == KVM_S390_INTERRUPT || ioctl == KVM_S390_IRQ || ioctl == KVM_INTERRUPT) - return kvm_arch_vcpu_ioctl(filp, ioctl, arg); -#endif + new_size = ALIGN(size, (u64)BITS_PER_LONG) / 8; + new_bitmap = kvm_kvzalloc(new_size); + if (!new_bitmap) + return -ENOMEM; + + old_size = kvm->rp_bitmap_size; + old_bitmap = kvm->rp_bitmap; + memcpy(new_bitmap, old_bitmap, old_size); + + kvm->rp_bitmap = new_bitmap; + kvm->rp_bitmap_size = new_size; + + return 0; +} + +/* + * For set bulk bitmap instead of looping set_bit + */ +static inline void set_bits_in_long(size_t *byte, int start, int nbits, bool set) +{ + size_t mask; - r = vcpu_load(vcpu); + BUG_ON(byte == NULL); + BUG_ON(start < 0 || start > BITS_PER_LONG); + BUG_ON(nbits < 0 || start + nbits > BITS_PER_LONG); + + mask = ((1 << nbits) - 1) << start; + if (set) + *byte |= mask; + else + *byte &= ~mask; +} + +static void set_bit_block(size_t *bitmap, u64 start, u64 nbits, bool set) +{ + u64 first_long_index = start / BITS_PER_LONG; + u64 last_long_index = (start + nbits - 1) / BITS_PER_LONG; + u64 i; + int first_bit_index = (int)(start % BITS_PER_LONG); + int last_bit_index = (int)((start + nbits - 1) % BITS_PER_LONG); + + if (first_long_index == last_long_index) { + set_bits_in_long(&bitmap[first_long_index], first_bit_index, (int)nbits, + set); + return; + } + + set_bits_in_long(&bitmap[first_long_index], first_bit_index, + BITS_PER_LONG - first_bit_index, set); + for (i = first_long_index + 1; i < last_long_index; i++) { + bitmap[i] = set ? (size_t)-1 : 0; + } + set_bits_in_long(&bitmap[last_long_index], 0, last_bit_index + 1, set); +} + +static int kvm_is_ram_prot(struct kvm *kvm, gfn_t gfn) +{ + if (!kvm->rp_bitmap) + return 0; + + return test_bit(gfn, kvm->rp_bitmap); +} + +static int kvm_vm_ioctl_ram_prot(struct kvm *kvm, struct gvm_ram_protect *rp) +{ + int r = -EFAULT; + gfn_t first_gfn = rp->pa >> PAGE_SHIFT; + gfn_t last_gfn = (rp->pa + rp->size - 1) >> PAGE_SHIFT; + + if (!rp->reserved) + return -EINVAL; + + if (!kvm_is_valid_prot_flags(rp->flags)) + return -EINVAL; + + r = kvm_adjust_rp_bitmap(kvm, last_gfn + 1); if (r) return r; + + set_bit_block(kvm->rp_bitmap, first_gfn, last_gfn + 1 - first_gfn, + rp->flags == RP_NOACCESS); + + /* only need flush shadow when page access right is lowered */ + if (rp->flags == RP_NOACCESS) + kvm_arch_flush_shadow_all(kvm); + + return 0; +} + +NTSTATUS kvm_vcpu_ioctl(PDEVICE_OBJECT pDevObj, PIRP pIrp, + unsigned int ioctl) +{ + struct gvm_device_extension *devext = pDevObj->DeviceExtension; + struct kvm_vcpu *vcpu = devext->PrivData; + void __user *argp = (void __user *)pIrp->AssociatedIrp.SystemBuffer; + int r; + struct kvm_fpu *fpu = NULL; + struct kvm_sregs *kvm_sregs = NULL; + + if (vcpu->kvm->process != IoGetCurrentProcess()) + return -EIO; + switch (ioctl) { - case KVM_RUN: + case GVM_RUN: r = -EINVAL; - if (arg) - goto out; - if (unlikely(vcpu->pid != current->pids[PIDTYPE_PID].pid)) { - /* The thread running this VCPU changed. */ - struct pid *oldpid = vcpu->pid; - struct pid *newpid = get_task_pid(current, PIDTYPE_PID); - - rcu_assign_pointer(vcpu->pid, newpid); - if (oldpid) - synchronize_rcu(); - put_pid(oldpid); + if (vcpu->thread != PsGetCurrentThread()) { + vcpu->thread = PsGetCurrentThread(); + KeInitializeApc(&vcpu->apc, vcpu->thread, + OriginalApcEnvironment, + gvmWaitSuspend, + NULL, + NULL, + KernelMode, + NULL); } r = kvm_arch_vcpu_ioctl_run(vcpu, vcpu->run); - trace_kvm_userspace_exit(vcpu->run->exit_reason, r); break; - case KVM_GET_REGS: { + case GVM_VCPU_MMAP: + r = -EINVAL; + size_t mmap_size = 2 * PAGE_SIZE; + size_t userva = __vm_mmap(NULL, 0, mmap_size, PROT_READ |PROT_WRITE, + MAP_SHARED | MAP_ANONYMOUS, 0, (size_t)vcpu->run); + if (!userva) + break; + r = gvmUpdateReturnBuffer(pIrp, 0, &userva, sizeof(userva)); + if (r) { + __vm_munmap(userva, 2 * PAGE_SIZE, false); + break; + } + vcpu->run_userva = userva; + break; + case GVM_GET_REGS: { struct kvm_regs *kvm_regs; r = -ENOMEM; @@ -2567,15 +2082,15 @@ static long kvm_vcpu_ioctl(struct file *filp, r = kvm_arch_vcpu_ioctl_get_regs(vcpu, kvm_regs); if (r) goto out_free1; - r = -EFAULT; - if (copy_to_user(argp, kvm_regs, sizeof(struct kvm_regs))) + r = gvmUpdateReturnBuffer(pIrp, 0, kvm_regs, sizeof(struct kvm_regs)); + if (r) goto out_free1; r = 0; out_free1: kfree(kvm_regs); break; } - case KVM_SET_REGS: { + case GVM_SET_REGS: { struct kvm_regs *kvm_regs; r = -ENOMEM; @@ -2588,7 +2103,7 @@ out_free1: kfree(kvm_regs); break; } - case KVM_GET_SREGS: { + case GVM_GET_SREGS: { kvm_sregs = kzalloc(sizeof(struct kvm_sregs), GFP_KERNEL); r = -ENOMEM; if (!kvm_sregs) @@ -2596,13 +2111,13 @@ out_free1: r = kvm_arch_vcpu_ioctl_get_sregs(vcpu, kvm_sregs); if (r) goto out; - r = -EFAULT; - if (copy_to_user(argp, kvm_sregs, sizeof(struct kvm_sregs))) + r = gvmUpdateReturnBuffer(pIrp, 0, kvm_sregs, sizeof(struct kvm_sregs)); + if (r) goto out; r = 0; break; } - case KVM_SET_SREGS: { + case GVM_SET_SREGS: { kvm_sregs = memdup_user(argp, sizeof(*kvm_sregs)); if (IS_ERR(kvm_sregs)) { r = PTR_ERR(kvm_sregs); @@ -2612,19 +2127,16 @@ out_free1: r = kvm_arch_vcpu_ioctl_set_sregs(vcpu, kvm_sregs); break; } - case KVM_GET_MP_STATE: { + case GVM_GET_MP_STATE: { struct kvm_mp_state mp_state; r = kvm_arch_vcpu_ioctl_get_mpstate(vcpu, &mp_state); if (r) goto out; - r = -EFAULT; - if (copy_to_user(argp, &mp_state, sizeof(mp_state))) - goto out; - r = 0; + r = gvmUpdateReturnBuffer(pIrp, 0, &mp_state, sizeof(mp_state)); break; } - case KVM_SET_MP_STATE: { + case GVM_SET_MP_STATE: { struct kvm_mp_state mp_state; r = -EFAULT; @@ -2633,7 +2145,7 @@ out_free1: r = kvm_arch_vcpu_ioctl_set_mpstate(vcpu, &mp_state); break; } - case KVM_TRANSLATE: { + case GVM_TRANSLATE: { struct kvm_translation tr; r = -EFAULT; @@ -2642,13 +2154,10 @@ out_free1: r = kvm_arch_vcpu_ioctl_translate(vcpu, &tr); if (r) goto out; - r = -EFAULT; - if (copy_to_user(argp, &tr, sizeof(tr))) - goto out; - r = 0; + r = gvmUpdateReturnBuffer(pIrp, 0, &tr, sizeof(tr)); break; } - case KVM_SET_GUEST_DEBUG: { + case GVM_SET_GUEST_DEBUG: { struct kvm_guest_debug dbg; r = -EFAULT; @@ -2657,30 +2166,7 @@ out_free1: r = kvm_arch_vcpu_ioctl_set_guest_debug(vcpu, &dbg); break; } - case KVM_SET_SIGNAL_MASK: { - struct kvm_signal_mask __user *sigmask_arg = argp; - struct kvm_signal_mask kvm_sigmask; - sigset_t sigset, *p; - - p = NULL; - if (argp) { - r = -EFAULT; - if (copy_from_user(&kvm_sigmask, argp, - sizeof(kvm_sigmask))) - goto out; - r = -EINVAL; - if (kvm_sigmask.len != sizeof(sigset)) - goto out; - r = -EFAULT; - if (copy_from_user(&sigset, sigmask_arg->sigset, - sizeof(sigset))) - goto out; - p = &sigset; - } - r = kvm_vcpu_ioctl_set_sigmask(vcpu, p); - break; - } - case KVM_GET_FPU: { + case GVM_GET_FPU: { fpu = kzalloc(sizeof(struct kvm_fpu), GFP_KERNEL); r = -ENOMEM; if (!fpu) @@ -2688,13 +2174,10 @@ out_free1: r = kvm_arch_vcpu_ioctl_get_fpu(vcpu, fpu); if (r) goto out; - r = -EFAULT; - if (copy_to_user(argp, fpu, sizeof(struct kvm_fpu))) - goto out; - r = 0; + r = gvmUpdateReturnBuffer(pIrp, 0, fpu, sizeof(struct kvm_fpu)); break; } - case KVM_SET_FPU: { + case GVM_SET_FPU: { fpu = memdup_user(argp, sizeof(*fpu)); if (IS_ERR(fpu)) { r = PTR_ERR(fpu); @@ -2705,260 +2188,57 @@ out_free1: break; } default: - r = kvm_arch_vcpu_ioctl(filp, ioctl, arg); + r = kvm_arch_vcpu_ioctl(devext, pIrp, ioctl); } out: - vcpu_put(vcpu); kfree(fpu); kfree(kvm_sregs); return r; } -#ifdef CONFIG_KVM_COMPAT -static long kvm_vcpu_compat_ioctl(struct file *filp, - unsigned int ioctl, unsigned long arg) -{ - struct kvm_vcpu *vcpu = filp->private_data; - void __user *argp = compat_ptr(arg); - int r; - - if (vcpu->kvm->mm != current->mm) - return -EIO; - - switch (ioctl) { - case KVM_SET_SIGNAL_MASK: { - struct kvm_signal_mask __user *sigmask_arg = argp; - struct kvm_signal_mask kvm_sigmask; - compat_sigset_t csigset; - sigset_t sigset; - - if (argp) { - r = -EFAULT; - if (copy_from_user(&kvm_sigmask, argp, - sizeof(kvm_sigmask))) - goto out; - r = -EINVAL; - if (kvm_sigmask.len != sizeof(csigset)) - goto out; - r = -EFAULT; - if (copy_from_user(&csigset, sigmask_arg->sigset, - sizeof(csigset))) - goto out; - sigset_from_compat(&sigset, &csigset); - r = kvm_vcpu_ioctl_set_sigmask(vcpu, &sigset); - } else - r = kvm_vcpu_ioctl_set_sigmask(vcpu, NULL); - break; - } - default: - r = kvm_vcpu_ioctl(filp, ioctl, arg); - } - -out: - return r; -} -#endif - -static int kvm_device_ioctl_attr(struct kvm_device *dev, - int (*accessor)(struct kvm_device *dev, - struct kvm_device_attr *attr), - unsigned long arg) -{ - struct kvm_device_attr attr; - - if (!accessor) - return -EPERM; - - if (copy_from_user(&attr, (void __user *)arg, sizeof(attr))) - return -EFAULT; - - return accessor(dev, &attr); -} - -static long kvm_device_ioctl(struct file *filp, unsigned int ioctl, - unsigned long arg) -{ - struct kvm_device *dev = filp->private_data; - - switch (ioctl) { - case KVM_SET_DEVICE_ATTR: - return kvm_device_ioctl_attr(dev, dev->ops->set_attr, arg); - case KVM_GET_DEVICE_ATTR: - return kvm_device_ioctl_attr(dev, dev->ops->get_attr, arg); - case KVM_HAS_DEVICE_ATTR: - return kvm_device_ioctl_attr(dev, dev->ops->has_attr, arg); - default: - if (dev->ops->ioctl) - return dev->ops->ioctl(dev, ioctl, arg); - - return -ENOTTY; - } -} - -static int kvm_device_release(struct inode *inode, struct file *filp) -{ - struct kvm_device *dev = filp->private_data; - struct kvm *kvm = dev->kvm; - - kvm_put_kvm(kvm); - return 0; -} - -static const struct file_operations kvm_device_fops = { - .unlocked_ioctl = kvm_device_ioctl, -#ifdef CONFIG_KVM_COMPAT - .compat_ioctl = kvm_device_ioctl, -#endif - .release = kvm_device_release, -}; - -struct kvm_device *kvm_device_from_filp(struct file *filp) -{ - if (filp->f_op != &kvm_device_fops) - return NULL; - - return filp->private_data; -} - -static struct kvm_device_ops *kvm_device_ops_table[KVM_DEV_TYPE_MAX] = { -#ifdef CONFIG_KVM_MPIC - [KVM_DEV_TYPE_FSL_MPIC_20] = &kvm_mpic_ops, - [KVM_DEV_TYPE_FSL_MPIC_42] = &kvm_mpic_ops, -#endif - -#ifdef CONFIG_KVM_XICS - [KVM_DEV_TYPE_XICS] = &kvm_xics_ops, -#endif -}; - -int kvm_register_device_ops(struct kvm_device_ops *ops, u32 type) -{ - if (type >= ARRAY_SIZE(kvm_device_ops_table)) - return -ENOSPC; - - if (kvm_device_ops_table[type] != NULL) - return -EEXIST; - - kvm_device_ops_table[type] = ops; - return 0; -} - -void kvm_unregister_device_ops(u32 type) -{ - if (kvm_device_ops_table[type] != NULL) - kvm_device_ops_table[type] = NULL; -} - -static int kvm_ioctl_create_device(struct kvm *kvm, - struct kvm_create_device *cd) -{ - struct kvm_device_ops *ops = NULL; - struct kvm_device *dev; - bool test = cd->flags & KVM_CREATE_DEVICE_TEST; - int ret; - - if (cd->type >= ARRAY_SIZE(kvm_device_ops_table)) - return -ENODEV; - - ops = kvm_device_ops_table[cd->type]; - if (ops == NULL) - return -ENODEV; - - if (test) - return 0; - - dev = kzalloc(sizeof(*dev), GFP_KERNEL); - if (!dev) - return -ENOMEM; - - dev->ops = ops; - dev->kvm = kvm; - - mutex_lock(&kvm->lock); - ret = ops->create(dev, cd->type); - if (ret < 0) { - mutex_unlock(&kvm->lock); - kfree(dev); - return ret; - } - list_add(&dev->vm_node, &kvm->devices); - mutex_unlock(&kvm->lock); - - if (ops->init) - ops->init(dev); - - ret = anon_inode_getfd(ops->name, &kvm_device_fops, dev, O_RDWR | O_CLOEXEC); - if (ret < 0) { - ops->destroy(dev); - mutex_lock(&kvm->lock); - list_del(&dev->vm_node); - mutex_unlock(&kvm->lock); - return ret; - } - - kvm_get_kvm(kvm); - cd->fd = ret; - return 0; -} - static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg) { switch (arg) { - case KVM_CAP_USER_MEMORY: - case KVM_CAP_DESTROY_MEMORY_REGION_WORKS: - case KVM_CAP_JOIN_MEMORY_REGIONS_WORKS: - case KVM_CAP_INTERNAL_ERROR_DATA: -#ifdef CONFIG_HAVE_KVM_MSI - case KVM_CAP_SIGNAL_MSI: -#endif -#ifdef CONFIG_HAVE_KVM_IRQFD - case KVM_CAP_IRQFD: - case KVM_CAP_IRQFD_RESAMPLE: -#endif - case KVM_CAP_IOEVENTFD_ANY_LENGTH: - case KVM_CAP_CHECK_EXTENSION_VM: - return 1; -#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING - case KVM_CAP_IRQ_ROUTING: - return KVM_MAX_IRQ_ROUTES; +#ifdef CONFIG_HAVE_GVM_MSI + case GVM_CAP_SIGNAL_MSI: #endif -#if KVM_ADDRESS_SPACE_NUM > 1 - case KVM_CAP_MULTI_ADDRESS_SPACE: - return KVM_ADDRESS_SPACE_NUM; + case GVM_CAP_IRQ_ROUTING: + return GVM_MAX_IRQ_ROUTES; +#if GVM_ADDRESS_SPACE_NUM > 1 + case GVM_CAP_MULTI_ADDRESS_SPACE: + return GVM_ADDRESS_SPACE_NUM; #endif - case KVM_CAP_MAX_VCPU_ID: - return KVM_MAX_VCPU_ID; + case GVM_CAP_MAX_VCPU_ID: + return GVM_MAX_VCPU_ID; default: break; } return kvm_vm_ioctl_check_extension(kvm, arg); } -static long kvm_vm_ioctl(struct file *filp, - unsigned int ioctl, unsigned long arg) +NTSTATUS kvm_vm_ioctl(PDEVICE_OBJECT pDevObj, PIRP pIrp, + unsigned int ioctl) { - struct kvm *kvm = filp->private_data; - void __user *argp = (void __user *)arg; + struct gvm_device_extension *devext = pDevObj->DeviceExtension; + struct kvm *kvm = devext->PrivData; + void __user *argp = (void __user *)pIrp->AssociatedIrp.SystemBuffer; int r; - if (kvm->mm != current->mm) + if (kvm->process != IoGetCurrentProcess()) return -EIO; switch (ioctl) { - case KVM_CREATE_VCPU: - r = kvm_vm_ioctl_create_vcpu(kvm, arg); + case GVM_CREATE_VCPU: + r = kvm_vm_ioctl_create_vcpu(pDevObj, pIrp, argp); break; - case KVM_SET_USER_MEMORY_REGION: { + case GVM_SET_USER_MEMORY_REGION: { struct kvm_userspace_memory_region kvm_userspace_mem; r = -EFAULT; - if (copy_from_user(&kvm_userspace_mem, argp, - sizeof(kvm_userspace_mem))) - goto out; - + RtlCopyBytes(&kvm_userspace_mem, pIrp->AssociatedIrp.SystemBuffer, sizeof(kvm_userspace_mem)); r = kvm_vm_ioctl_set_memory_region(kvm, &kvm_userspace_mem); break; } - case KVM_GET_DIRTY_LOG: { + case GVM_GET_DIRTY_LOG: { struct kvm_dirty_log log; r = -EFAULT; @@ -2967,46 +2247,18 @@ static long kvm_vm_ioctl(struct file *filp, r = kvm_vm_ioctl_get_dirty_log(kvm, &log); break; } -#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET - case KVM_REGISTER_COALESCED_MMIO: { - struct kvm_coalesced_mmio_zone zone; - - r = -EFAULT; - if (copy_from_user(&zone, argp, sizeof(zone))) - goto out; - r = kvm_vm_ioctl_register_coalesced_mmio(kvm, &zone); + case GVM_KICK_VCPU: + r = kvm_vm_ioctl_kick_vcpu(pDevObj, pIrp, argp); break; - } - case KVM_UNREGISTER_COALESCED_MMIO: { - struct kvm_coalesced_mmio_zone zone; + case GVM_RAM_PROTECT: + struct gvm_ram_protect rp; r = -EFAULT; - if (copy_from_user(&zone, argp, sizeof(zone))) - goto out; - r = kvm_vm_ioctl_unregister_coalesced_mmio(kvm, &zone); + RtlCopyBytes(&rp, pIrp->AssociatedIrp.SystemBuffer, sizeof(rp)); + r = kvm_vm_ioctl_ram_prot(kvm, &rp); break; - } -#endif - case KVM_IRQFD: { - struct kvm_irqfd data; - - r = -EFAULT; - if (copy_from_user(&data, argp, sizeof(data))) - goto out; - r = kvm_irqfd(kvm, &data); - break; - } - case KVM_IOEVENTFD: { - struct kvm_ioeventfd data; - - r = -EFAULT; - if (copy_from_user(&data, argp, sizeof(data))) - goto out; - r = kvm_ioeventfd(kvm, &data); - break; - } -#ifdef CONFIG_HAVE_KVM_MSI - case KVM_SIGNAL_MSI: { +#ifdef CONFIG_HAVE_GVM_MSI + case GVM_SIGNAL_MSI: { struct kvm_msi msi; r = -EFAULT; @@ -3016,32 +2268,28 @@ static long kvm_vm_ioctl(struct file *filp, break; } #endif -#ifdef __KVM_HAVE_IRQ_LINE - case KVM_IRQ_LINE_STATUS: - case KVM_IRQ_LINE: { + case GVM_IRQ_LINE_STATUS: { struct kvm_irq_level irq_event; r = -EFAULT; if (copy_from_user(&irq_event, argp, sizeof(irq_event))) goto out; - r = kvm_vm_ioctl_irq_line(kvm, &irq_event, - ioctl == KVM_IRQ_LINE_STATUS); + r = kvm_vm_ioctl_irq_line(kvm, &irq_event, true); if (r) goto out; - r = -EFAULT; - if (ioctl == KVM_IRQ_LINE_STATUS) { - if (copy_to_user(argp, &irq_event, sizeof(irq_event))) + if (ioctl == GVM_IRQ_LINE_STATUS) { + r = gvmUpdateReturnBuffer(pIrp, 0, &irq_event, + sizeof(irq_event)); + if (r) goto out; } r = 0; break; } -#endif -#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING - case KVM_SET_GSI_ROUTING: { + case GVM_SET_GSI_ROUTING: { struct kvm_irq_routing routing; struct kvm_irq_routing __user *urouting; struct kvm_irq_routing_entry *entries = NULL; @@ -3050,7 +2298,7 @@ static long kvm_vm_ioctl(struct file *filp, if (copy_from_user(&routing, argp, sizeof(routing))) goto out; r = -EINVAL; - if (routing.nr > KVM_MAX_IRQ_ROUTES) + if (routing.nr > GVM_MAX_IRQ_ROUTES) goto out; if (routing.flags) goto out; @@ -3071,178 +2319,66 @@ out_free_irq_routing: vfree(entries); break; } -#endif /* CONFIG_HAVE_KVM_IRQ_ROUTING */ - case KVM_CREATE_DEVICE: { - struct kvm_create_device cd; - - r = -EFAULT; - if (copy_from_user(&cd, argp, sizeof(cd))) - goto out; - - r = kvm_ioctl_create_device(kvm, &cd); - if (r) - goto out; - - r = -EFAULT; - if (copy_to_user(argp, &cd, sizeof(cd))) - goto out; - - r = 0; - break; - } - case KVM_CHECK_EXTENSION: - r = kvm_vm_ioctl_check_extension_generic(kvm, arg); + case GVM_CHECK_EXTENSION: + r = kvm_vm_ioctl_check_extension_generic(kvm, *(long *)argp); + gvmUpdateReturnBuffer(pIrp, 0, &r, sizeof(r)); + r = STATUS_SUCCESS; break; default: - r = kvm_arch_vm_ioctl(filp, ioctl, arg); + r = kvm_arch_vm_ioctl(devext, pIrp, ioctl); } out: return r; } -#ifdef CONFIG_KVM_COMPAT -struct compat_kvm_dirty_log { - __u32 slot; - __u32 padding1; - union { - compat_uptr_t dirty_bitmap; /* one bit per page */ - __u64 padding2; - }; -}; - -static long kvm_vm_compat_ioctl(struct file *filp, - unsigned int ioctl, unsigned long arg) +static int kvm_dev_ioctl_create_vm(PDEVICE_OBJECT pDevObj, PIRP pIrp, unsigned long arg) { - struct kvm *kvm = filp->private_data; - int r; - - if (kvm->mm != current->mm) - return -EIO; - switch (ioctl) { - case KVM_GET_DIRTY_LOG: { - struct compat_kvm_dirty_log compat_log; - struct kvm_dirty_log log; - - r = -EFAULT; - if (copy_from_user(&compat_log, (void __user *)arg, - sizeof(compat_log))) - goto out; - log.slot = compat_log.slot; - log.padding1 = compat_log.padding1; - log.padding2 = compat_log.padding2; - log.dirty_bitmap = compat_ptr(compat_log.dirty_bitmap); - - r = kvm_vm_ioctl_get_dirty_log(kvm, &log); - break; - } - default: - r = kvm_vm_ioctl(filp, ioctl, arg); - } - -out: - return r; -} -#endif - -static struct file_operations kvm_vm_fops = { - .release = kvm_vm_release, - .unlocked_ioctl = kvm_vm_ioctl, -#ifdef CONFIG_KVM_COMPAT - .compat_ioctl = kvm_vm_compat_ioctl, -#endif - .llseek = noop_llseek, -}; - -static int kvm_dev_ioctl_create_vm(unsigned long type) -{ - int r; struct kvm *kvm; - struct file *file; + NTSTATUS rc; + HANDLE handle; + unsigned int type = arg; kvm = kvm_create_vm(type); if (IS_ERR(kvm)) return PTR_ERR(kvm); -#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET - r = kvm_coalesced_mmio_init(kvm); - if (r < 0) { - kvm_put_kvm(kvm); - return r; - } -#endif - r = get_unused_fd_flags(O_CLOEXEC); - if (r < 0) { - kvm_put_kvm(kvm); - return r; - } - file = anon_inode_getfile("kvm-vm", &kvm_vm_fops, kvm, O_RDWR); - if (IS_ERR(file)) { - put_unused_fd(r); - kvm_put_kvm(kvm); - return PTR_ERR(file); - } - if (kvm_create_vm_debugfs(kvm, r) < 0) { - put_unused_fd(r); - fput(file); - return -ENOMEM; - } - - fd_install(r, file); - return r; + rc = gvmCreateVMDevice(&handle, kvm->vm_id, -1, kvm); + if (NT_SUCCESS(rc)) + gvmUpdateReturnBuffer(pIrp, 0, &handle, sizeof(handle)); + return rc; } -static long kvm_dev_ioctl(struct file *filp, - unsigned int ioctl, unsigned long arg) +NTSTATUS kvm_dev_ioctl(PDEVICE_OBJECT pDevObj, PIRP pIrp, + unsigned int ioctl) { long r = -EINVAL; + struct gvm_device_extension *devext = pDevObj->DeviceExtension; + void* pin = pIrp->AssociatedIrp.SystemBuffer; switch (ioctl) { - case KVM_GET_API_VERSION: - if (arg) - goto out; - r = KVM_API_VERSION; + case GVM_GET_API_VERSION: + r = GVM_VERSION; + gvmUpdateReturnBuffer(pIrp, 0, &r, sizeof(r)); + r = STATUS_SUCCESS; break; - case KVM_CREATE_VM: - r = kvm_dev_ioctl_create_vm(arg); + case GVM_CREATE_VM: + r = kvm_dev_ioctl_create_vm(pDevObj, pIrp, 0); break; - case KVM_CHECK_EXTENSION: - r = kvm_vm_ioctl_check_extension_generic(NULL, arg); + case GVM_CHECK_EXTENSION: + r = kvm_vm_ioctl_check_extension_generic(NULL, *(long *)pin); + gvmUpdateReturnBuffer(pIrp, 0, &r, sizeof(r)); + r = STATUS_SUCCESS; break; - case KVM_GET_VCPU_MMAP_SIZE: - if (arg) - goto out; - r = PAGE_SIZE; /* struct kvm_run */ -#ifdef CONFIG_X86 - r += PAGE_SIZE; /* pio data page */ -#endif -#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET - r += PAGE_SIZE; /* coalesced mmio ring page */ -#endif - break; - case KVM_TRACE_ENABLE: - case KVM_TRACE_PAUSE: - case KVM_TRACE_DISABLE: - r = -EOPNOTSUPP; + case GVM_GET_VCPU_MMAP_SIZE: + long mmap_size = 2 * PAGE_SIZE; + r = gvmUpdateReturnBuffer(pIrp, 0, &mmap_size, sizeof(mmap_size)); break; default: - return kvm_arch_dev_ioctl(filp, ioctl, arg); + return kvm_arch_dev_ioctl(devext, pIrp, ioctl); } -out: return r; } -static struct file_operations kvm_chardev_ops = { - .unlocked_ioctl = kvm_dev_ioctl, - .compat_ioctl = kvm_dev_ioctl, - .llseek = noop_llseek, -}; - -static struct miscdevice kvm_dev = { - KVM_MINOR, - "kvm", - &kvm_chardev_ops, -}; - static void hardware_enable_nolock(void *junk) { int cpu = raw_smp_processor_id(); @@ -3260,6 +2396,8 @@ static void hardware_enable_nolock(void *junk) atomic_inc(&hardware_enable_failed); pr_info("kvm: enabling virtualization on CPU%d failed\n", cpu); } + + return; } static int kvm_starting_cpu(unsigned int cpu) @@ -3296,7 +2434,7 @@ static void hardware_disable_all_nolock(void) kvm_usage_count--; if (!kvm_usage_count) - on_each_cpu(hardware_disable_nolock, NULL, 1); + smp_call_function_many(cpu_online_mask, hardware_disable_nolock, NULL, 1); } static void hardware_disable_all(void) @@ -3315,8 +2453,7 @@ static int hardware_enable_all(void) kvm_usage_count++; if (kvm_usage_count == 1) { atomic_set(&hardware_enable_failed, 0); - on_each_cpu(hardware_enable_nolock, NULL, 1); - + smp_call_function_many(cpu_online_mask, hardware_enable_nolock, NULL, 1); if (atomic_read(&hardware_enable_failed)) { hardware_disable_all_nolock(); r = -EBUSY; @@ -3328,26 +2465,6 @@ static int hardware_enable_all(void) return r; } -static int kvm_reboot(struct notifier_block *notifier, unsigned long val, - void *v) -{ - /* - * Some (well, at least mine) BIOSes hang on reboot if - * in vmx root mode. - * - * And Intel TXT required VMX off for all cpu when system shutdown. - */ - pr_info("kvm: exiting hardware virtualization\n"); - kvm_rebooting = true; - on_each_cpu(hardware_disable_nolock, NULL, 1); - return NOTIFY_OK; -} - -static struct notifier_block kvm_reboot_notifier = { - .notifier_call = kvm_reboot, - .priority = 0, -}; - static void kvm_io_bus_destroy(struct kvm_io_bus *bus) { int i; @@ -3463,6 +2580,7 @@ int kvm_io_bus_write(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr, }; bus = srcu_dereference(vcpu->kvm->buses[bus_idx], &vcpu->kvm->srcu); + bus = vcpu->kvm->buses[bus_idx]; r = __kvm_io_bus_write(vcpu, bus, &range, val); return r < 0 ? r : 0; } @@ -3480,6 +2598,7 @@ int kvm_io_bus_write_cookie(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, }; bus = srcu_dereference(vcpu->kvm->buses[bus_idx], &vcpu->kvm->srcu); + bus = vcpu->kvm->buses[bus_idx]; /* First try the device referenced by cookie. */ if ((cookie >= 0) && (cookie < bus->dev_count) && @@ -3514,7 +2633,6 @@ static int __kvm_io_bus_read(struct kvm_vcpu *vcpu, struct kvm_io_bus *bus, return -EOPNOTSUPP; } -EXPORT_SYMBOL_GPL(kvm_io_bus_write); /* kvm_io_bus_read - called under kvm->slots_lock */ int kvm_io_bus_read(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr, @@ -3530,6 +2648,7 @@ int kvm_io_bus_read(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr, }; bus = srcu_dereference(vcpu->kvm->buses[bus_idx], &vcpu->kvm->srcu); + bus = vcpu->kvm->buses[bus_idx]; r = __kvm_io_bus_read(vcpu, bus, &range, val); return r < 0 ? r : 0; } @@ -3542,8 +2661,7 @@ int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, struct kvm_io_bus *new_bus, *bus; bus = kvm->buses[bus_idx]; - /* exclude ioeventfd which is limited by maximum fd */ - if (bus->dev_count - bus->ioeventfd_count > NR_IOBUS_DEVS - 1) + if (bus->dev_count > NR_IOBUS_DEVS - 1) return -ENOSPC; new_bus = kmalloc(sizeof(*bus) + ((bus->dev_count + 1) * @@ -3604,6 +2722,7 @@ struct kvm_io_device *kvm_io_bus_get_dev(struct kvm *kvm, enum kvm_bus bus_idx, srcu_idx = srcu_read_lock(&kvm->srcu); bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu); + bus = kvm->buses[bus_idx]; dev_idx = kvm_io_bus_get_first_dev(bus, addr, 1); if (dev_idx < 0) @@ -3616,241 +2735,35 @@ out_unlock: return iodev; } -EXPORT_SYMBOL_GPL(kvm_io_bus_get_dev); - -static int kvm_debugfs_open(struct inode *inode, struct file *file, - int (*get)(void *, u64 *), int (*set)(void *, u64), - const char *fmt) -{ - struct kvm_stat_data *stat_data = (struct kvm_stat_data *) - inode->i_private; - - /* The debugfs files are a reference to the kvm struct which - * is still valid when kvm_destroy_vm is called. - * To avoid the race between open and the removal of the debugfs - * directory we test against the users count. - */ - if (!atomic_add_unless(&stat_data->kvm->users_count, 1, 0)) - return -ENOENT; - - if (simple_attr_open(inode, file, get, set, fmt)) { - kvm_put_kvm(stat_data->kvm); - return -ENOMEM; - } - - return 0; -} - -static int kvm_debugfs_release(struct inode *inode, struct file *file) -{ - struct kvm_stat_data *stat_data = (struct kvm_stat_data *) - inode->i_private; - - simple_attr_release(inode, file); - kvm_put_kvm(stat_data->kvm); - - return 0; -} - -static int vm_stat_get_per_vm(void *data, u64 *val) -{ - struct kvm_stat_data *stat_data = (struct kvm_stat_data *)data; - - *val = *(ulong *)((void *)stat_data->kvm + stat_data->offset); - - return 0; -} - -static int vm_stat_get_per_vm_open(struct inode *inode, struct file *file) -{ - __simple_attr_check_format("%llu\n", 0ull); - return kvm_debugfs_open(inode, file, vm_stat_get_per_vm, - NULL, "%llu\n"); -} - -static const struct file_operations vm_stat_get_per_vm_fops = { - .owner = THIS_MODULE, - .open = vm_stat_get_per_vm_open, - .release = kvm_debugfs_release, - .read = simple_attr_read, - .write = simple_attr_write, - .llseek = generic_file_llseek, -}; - -static int vcpu_stat_get_per_vm(void *data, u64 *val) -{ - int i; - struct kvm_stat_data *stat_data = (struct kvm_stat_data *)data; - struct kvm_vcpu *vcpu; - - *val = 0; - kvm_for_each_vcpu(i, vcpu, stat_data->kvm) - *val += *(u64 *)((void *)vcpu + stat_data->offset); - - return 0; -} - -static int vcpu_stat_get_per_vm_open(struct inode *inode, struct file *file) -{ - __simple_attr_check_format("%llu\n", 0ull); - return kvm_debugfs_open(inode, file, vcpu_stat_get_per_vm, - NULL, "%llu\n"); -} - -static const struct file_operations vcpu_stat_get_per_vm_fops = { - .owner = THIS_MODULE, - .open = vcpu_stat_get_per_vm_open, - .release = kvm_debugfs_release, - .read = simple_attr_read, - .write = simple_attr_write, - .llseek = generic_file_llseek, -}; - -static const struct file_operations *stat_fops_per_vm[] = { - [KVM_STAT_VCPU] = &vcpu_stat_get_per_vm_fops, - [KVM_STAT_VM] = &vm_stat_get_per_vm_fops, -}; - -static int vm_stat_get(void *_offset, u64 *val) -{ - unsigned offset = (long)_offset; - struct kvm *kvm; - struct kvm_stat_data stat_tmp = {.offset = offset}; - u64 tmp_val; - - *val = 0; - spin_lock(&kvm_lock); - list_for_each_entry(kvm, &vm_list, vm_list) { - stat_tmp.kvm = kvm; - vm_stat_get_per_vm((void *)&stat_tmp, &tmp_val); - *val += tmp_val; - } - spin_unlock(&kvm_lock); - return 0; -} - -DEFINE_SIMPLE_ATTRIBUTE(vm_stat_fops, vm_stat_get, NULL, "%llu\n"); - -static int vcpu_stat_get(void *_offset, u64 *val) -{ - unsigned offset = (long)_offset; - struct kvm *kvm; - struct kvm_stat_data stat_tmp = {.offset = offset}; - u64 tmp_val; - - *val = 0; - spin_lock(&kvm_lock); - list_for_each_entry(kvm, &vm_list, vm_list) { - stat_tmp.kvm = kvm; - vcpu_stat_get_per_vm((void *)&stat_tmp, &tmp_val); - *val += tmp_val; - } - spin_unlock(&kvm_lock); - return 0; -} - -DEFINE_SIMPLE_ATTRIBUTE(vcpu_stat_fops, vcpu_stat_get, NULL, "%llu\n"); - -static const struct file_operations *stat_fops[] = { - [KVM_STAT_VCPU] = &vcpu_stat_fops, - [KVM_STAT_VM] = &vm_stat_fops, -}; - -static int kvm_init_debug(void) -{ - int r = -EEXIST; - struct kvm_stats_debugfs_item *p; - - kvm_debugfs_dir = debugfs_create_dir("kvm", NULL); - if (kvm_debugfs_dir == NULL) - goto out; - - kvm_debugfs_num_entries = 0; - for (p = debugfs_entries; p->name; ++p, kvm_debugfs_num_entries++) { - if (!debugfs_create_file(p->name, 0444, kvm_debugfs_dir, - (void *)(long)p->offset, - stat_fops[p->kind])) - goto out_dir; - } - - return 0; - -out_dir: - debugfs_remove_recursive(kvm_debugfs_dir); -out: - return r; -} - -static int kvm_suspend(void) +/* + * The following two functions are kept here so that they + * could be used once hooking driver with Windows Power State + * chage. + */ +int kvm_suspend(void) { if (kvm_usage_count) - hardware_disable_nolock(NULL); + smp_call_function_many(cpu_online_mask, + hardware_disable_nolock, NULL, 1); return 0; } -static void kvm_resume(void) -{ - if (kvm_usage_count) { - WARN_ON(raw_spin_is_locked(&kvm_count_lock)); - hardware_enable_nolock(NULL); - } -} - -static struct syscore_ops kvm_syscore_ops = { - .suspend = kvm_suspend, - .resume = kvm_resume, -}; - -static inline -struct kvm_vcpu *preempt_notifier_to_vcpu(struct preempt_notifier *pn) -{ - return container_of(pn, struct kvm_vcpu, preempt_notifier); -} - -static void kvm_sched_in(struct preempt_notifier *pn, int cpu) +void kvm_resume(void) { - struct kvm_vcpu *vcpu = preempt_notifier_to_vcpu(pn); - - if (vcpu->preempted) - vcpu->preempted = false; - - kvm_arch_sched_in(vcpu, cpu); - - kvm_arch_vcpu_load(vcpu, cpu); -} - -static void kvm_sched_out(struct preempt_notifier *pn, - struct task_struct *next) -{ - struct kvm_vcpu *vcpu = preempt_notifier_to_vcpu(pn); - - if (current->state == TASK_RUNNING) - vcpu->preempted = true; - kvm_arch_vcpu_put(vcpu); + if (kvm_usage_count) + smp_call_function_many(cpu_online_mask, + hardware_enable_nolock, NULL, 1); } -int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align, - struct module *module) +int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align) { int r; - int cpu; r = kvm_arch_init(opaque); if (r) goto out_fail; - /* - * kvm_arch_init makes sure there's at most one caller - * for architectures that support multiple implementations, - * like intel and amd on x86. - * kvm_arch_init must be called before kvm_irqfd_init to avoid creating - * conflicts in case kvm is already setup for another implementation. - */ - r = kvm_irqfd_init(); - if (r) - goto out_irqfd; - if (!zalloc_cpumask_var(&cpus_hardware_enabled, GFP_KERNEL)) { r = -ENOMEM; goto out_free_0; @@ -3860,98 +2773,27 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align, if (r < 0) goto out_free_0a; - for_each_online_cpu(cpu) { - smp_call_function_single(cpu, - kvm_arch_check_processor_compat, - &r, 1); - if (r < 0) - goto out_free_1; - } - - r = cpuhp_setup_state_nocalls(CPUHP_AP_KVM_STARTING, "AP_KVM_STARTING", - kvm_starting_cpu, kvm_dying_cpu); - if (r) - goto out_free_2; - register_reboot_notifier(&kvm_reboot_notifier); - - /* A kmem cache lets us meet the alignment requirements of fx_save. */ - if (!vcpu_align) - vcpu_align = __alignof__(struct kvm_vcpu); - kvm_vcpu_cache = kmem_cache_create("kvm_vcpu", vcpu_size, vcpu_align, - 0, NULL); - if (!kvm_vcpu_cache) { - r = -ENOMEM; - goto out_free_3; - } - - r = kvm_async_pf_init(); - if (r) - goto out_free; - - kvm_chardev_ops.owner = module; - kvm_vm_fops.owner = module; - kvm_vcpu_fops.owner = module; - - r = misc_register(&kvm_dev); - if (r) { - pr_err("kvm: misc device register failed\n"); - goto out_unreg; - } - - register_syscore_ops(&kvm_syscore_ops); - - kvm_preempt_ops.sched_in = kvm_sched_in; - kvm_preempt_ops.sched_out = kvm_sched_out; - - r = kvm_init_debug(); - if (r) { - pr_err("kvm: create debugfs files failed\n"); - goto out_undebugfs; - } - - r = kvm_vfio_ops_init(); - WARN_ON(r); + kvm_arch_check_processor_compat(&r); + if (r < 0) + goto out_free_1; return 0; -out_undebugfs: - unregister_syscore_ops(&kvm_syscore_ops); - misc_deregister(&kvm_dev); -out_unreg: - kvm_async_pf_deinit(); -out_free: - kmem_cache_destroy(kvm_vcpu_cache); -out_free_3: - unregister_reboot_notifier(&kvm_reboot_notifier); - cpuhp_remove_state_nocalls(CPUHP_AP_KVM_STARTING); -out_free_2: out_free_1: kvm_arch_hardware_unsetup(); out_free_0a: free_cpumask_var(cpus_hardware_enabled); out_free_0: - kvm_irqfd_exit(); -out_irqfd: kvm_arch_exit(); out_fail: return r; } -EXPORT_SYMBOL_GPL(kvm_init); void kvm_exit(void) { - debugfs_remove_recursive(kvm_debugfs_dir); - misc_deregister(&kvm_dev); - kmem_cache_destroy(kvm_vcpu_cache); - kvm_async_pf_deinit(); - unregister_syscore_ops(&kvm_syscore_ops); - unregister_reboot_notifier(&kvm_reboot_notifier); - cpuhp_remove_state_nocalls(CPUHP_AP_KVM_STARTING); - on_each_cpu(hardware_disable_nolock, NULL, 1); + smp_call_function_many(cpu_online_mask, + hardware_disable_nolock, NULL, 1); kvm_arch_hardware_unsetup(); kvm_arch_exit(); - kvm_irqfd_exit(); free_cpumask_var(cpus_hardware_enabled); - kvm_vfio_ops_exit(); } -EXPORT_SYMBOL_GPL(kvm_exit); diff --git a/virt/kvm/vfio.c b/virt/kvm/vfio.c deleted file mode 100644 index 1dd087d..0000000 --- a/virt/kvm/vfio.c +++ /dev/null @@ -1,295 +0,0 @@ -/* - * VFIO-KVM bridge pseudo device - * - * Copyright (C) 2013 Red Hat, Inc. All rights reserved. - * Author: Alex Williamson <alex.williamson@redhat.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include <linux/errno.h> -#include <linux/file.h> -#include <linux/kvm_host.h> -#include <linux/list.h> -#include <linux/module.h> -#include <linux/mutex.h> -#include <linux/slab.h> -#include <linux/uaccess.h> -#include <linux/vfio.h> -#include "vfio.h" - -struct kvm_vfio_group { - struct list_head node; - struct vfio_group *vfio_group; -}; - -struct kvm_vfio { - struct list_head group_list; - struct mutex lock; - bool noncoherent; -}; - -static struct vfio_group *kvm_vfio_group_get_external_user(struct file *filep) -{ - struct vfio_group *vfio_group; - struct vfio_group *(*fn)(struct file *); - - fn = symbol_get(vfio_group_get_external_user); - if (!fn) - return ERR_PTR(-EINVAL); - - vfio_group = fn(filep); - - symbol_put(vfio_group_get_external_user); - - return vfio_group; -} - -static void kvm_vfio_group_put_external_user(struct vfio_group *vfio_group) -{ - void (*fn)(struct vfio_group *); - - fn = symbol_get(vfio_group_put_external_user); - if (!fn) - return; - - fn(vfio_group); - - symbol_put(vfio_group_put_external_user); -} - -static bool kvm_vfio_group_is_coherent(struct vfio_group *vfio_group) -{ - long (*fn)(struct vfio_group *, unsigned long); - long ret; - - fn = symbol_get(vfio_external_check_extension); - if (!fn) - return false; - - ret = fn(vfio_group, VFIO_DMA_CC_IOMMU); - - symbol_put(vfio_external_check_extension); - - return ret > 0; -} - -/* - * Groups can use the same or different IOMMU domains. If the same then - * adding a new group may change the coherency of groups we've previously - * been told about. We don't want to care about any of that so we retest - * each group and bail as soon as we find one that's noncoherent. This - * means we only ever [un]register_noncoherent_dma once for the whole device. - */ -static void kvm_vfio_update_coherency(struct kvm_device *dev) -{ - struct kvm_vfio *kv = dev->private; - bool noncoherent = false; - struct kvm_vfio_group *kvg; - - mutex_lock(&kv->lock); - - list_for_each_entry(kvg, &kv->group_list, node) { - if (!kvm_vfio_group_is_coherent(kvg->vfio_group)) { - noncoherent = true; - break; - } - } - - if (noncoherent != kv->noncoherent) { - kv->noncoherent = noncoherent; - - if (kv->noncoherent) - kvm_arch_register_noncoherent_dma(dev->kvm); - else - kvm_arch_unregister_noncoherent_dma(dev->kvm); - } - - mutex_unlock(&kv->lock); -} - -static int kvm_vfio_set_group(struct kvm_device *dev, long attr, u64 arg) -{ - struct kvm_vfio *kv = dev->private; - struct vfio_group *vfio_group; - struct kvm_vfio_group *kvg; - int32_t __user *argp = (int32_t __user *)(unsigned long)arg; - struct fd f; - int32_t fd; - int ret; - - switch (attr) { - case KVM_DEV_VFIO_GROUP_ADD: - if (get_user(fd, argp)) - return -EFAULT; - - f = fdget(fd); - if (!f.file) - return -EBADF; - - vfio_group = kvm_vfio_group_get_external_user(f.file); - fdput(f); - - if (IS_ERR(vfio_group)) - return PTR_ERR(vfio_group); - - mutex_lock(&kv->lock); - - list_for_each_entry(kvg, &kv->group_list, node) { - if (kvg->vfio_group == vfio_group) { - mutex_unlock(&kv->lock); - kvm_vfio_group_put_external_user(vfio_group); - return -EEXIST; - } - } - - kvg = kzalloc(sizeof(*kvg), GFP_KERNEL); - if (!kvg) { - mutex_unlock(&kv->lock); - kvm_vfio_group_put_external_user(vfio_group); - return -ENOMEM; - } - - list_add_tail(&kvg->node, &kv->group_list); - kvg->vfio_group = vfio_group; - - kvm_arch_start_assignment(dev->kvm); - - mutex_unlock(&kv->lock); - - kvm_vfio_update_coherency(dev); - - return 0; - - case KVM_DEV_VFIO_GROUP_DEL: - if (get_user(fd, argp)) - return -EFAULT; - - f = fdget(fd); - if (!f.file) - return -EBADF; - - vfio_group = kvm_vfio_group_get_external_user(f.file); - fdput(f); - - if (IS_ERR(vfio_group)) - return PTR_ERR(vfio_group); - - ret = -ENOENT; - - mutex_lock(&kv->lock); - - list_for_each_entry(kvg, &kv->group_list, node) { - if (kvg->vfio_group != vfio_group) - continue; - - list_del(&kvg->node); - kvm_vfio_group_put_external_user(kvg->vfio_group); - kfree(kvg); - ret = 0; - break; - } - - kvm_arch_end_assignment(dev->kvm); - - mutex_unlock(&kv->lock); - - kvm_vfio_group_put_external_user(vfio_group); - - kvm_vfio_update_coherency(dev); - - return ret; - } - - return -ENXIO; -} - -static int kvm_vfio_set_attr(struct kvm_device *dev, - struct kvm_device_attr *attr) -{ - switch (attr->group) { - case KVM_DEV_VFIO_GROUP: - return kvm_vfio_set_group(dev, attr->attr, attr->addr); - } - - return -ENXIO; -} - -static int kvm_vfio_has_attr(struct kvm_device *dev, - struct kvm_device_attr *attr) -{ - switch (attr->group) { - case KVM_DEV_VFIO_GROUP: - switch (attr->attr) { - case KVM_DEV_VFIO_GROUP_ADD: - case KVM_DEV_VFIO_GROUP_DEL: - return 0; - } - - break; - } - - return -ENXIO; -} - -static void kvm_vfio_destroy(struct kvm_device *dev) -{ - struct kvm_vfio *kv = dev->private; - struct kvm_vfio_group *kvg, *tmp; - - list_for_each_entry_safe(kvg, tmp, &kv->group_list, node) { - kvm_vfio_group_put_external_user(kvg->vfio_group); - list_del(&kvg->node); - kfree(kvg); - kvm_arch_end_assignment(dev->kvm); - } - - kvm_vfio_update_coherency(dev); - - kfree(kv); - kfree(dev); /* alloc by kvm_ioctl_create_device, free by .destroy */ -} - -static int kvm_vfio_create(struct kvm_device *dev, u32 type); - -static struct kvm_device_ops kvm_vfio_ops = { - .name = "kvm-vfio", - .create = kvm_vfio_create, - .destroy = kvm_vfio_destroy, - .set_attr = kvm_vfio_set_attr, - .has_attr = kvm_vfio_has_attr, -}; - -static int kvm_vfio_create(struct kvm_device *dev, u32 type) -{ - struct kvm_device *tmp; - struct kvm_vfio *kv; - - /* Only one VFIO "device" per VM */ - list_for_each_entry(tmp, &dev->kvm->devices, vm_node) - if (tmp->ops == &kvm_vfio_ops) - return -EBUSY; - - kv = kzalloc(sizeof(*kv), GFP_KERNEL); - if (!kv) - return -ENOMEM; - - INIT_LIST_HEAD(&kv->group_list); - mutex_init(&kv->lock); - - dev->private = kv; - - return 0; -} - -int kvm_vfio_ops_init(void) -{ - return kvm_register_device_ops(&kvm_vfio_ops, KVM_DEV_TYPE_VFIO); -} - -void kvm_vfio_ops_exit(void) -{ - kvm_unregister_device_ops(KVM_DEV_TYPE_VFIO); -} diff --git a/virt/kvm/vfio.h b/virt/kvm/vfio.h deleted file mode 100644 index ab88c7d..0000000 --- a/virt/kvm/vfio.h +++ /dev/null @@ -1,17 +0,0 @@ -#ifndef __KVM_VFIO_H -#define __KVM_VFIO_H - -#ifdef CONFIG_KVM_VFIO -int kvm_vfio_ops_init(void); -void kvm_vfio_ops_exit(void); -#else -static inline int kvm_vfio_ops_init(void) -{ - return 0; -} -static inline void kvm_vfio_ops_exit(void) -{ -} -#endif - -#endif |