diff options
Diffstat (limited to 'testcases/kernel/kvm')
-rw-r--r-- | testcases/kernel/kvm/.gitignore | 3 | ||||
-rw-r--r-- | testcases/kernel/kvm/Makefile | 7 | ||||
-rw-r--r-- | testcases/kernel/kvm/bootstrap_x86.S | 103 | ||||
-rw-r--r-- | testcases/kernel/kvm/bootstrap_x86_64.S | 110 | ||||
-rw-r--r-- | testcases/kernel/kvm/include/kvm_common.h | 8 | ||||
-rw-r--r-- | testcases/kernel/kvm/include/kvm_guest.h | 14 | ||||
-rw-r--r-- | testcases/kernel/kvm/include/kvm_host.h | 20 | ||||
-rw-r--r-- | testcases/kernel/kvm/include/kvm_x86.h | 72 | ||||
-rw-r--r-- | testcases/kernel/kvm/include/kvm_x86_svm.h | 166 | ||||
-rw-r--r-- | testcases/kernel/kvm/kvm_svm01.c | 123 | ||||
-rw-r--r-- | testcases/kernel/kvm/kvm_svm02.c | 152 | ||||
-rw-r--r-- | testcases/kernel/kvm/kvm_svm03.c | 169 | ||||
-rw-r--r-- | testcases/kernel/kvm/lib_guest.c | 18 | ||||
-rw-r--r-- | testcases/kernel/kvm/lib_host.c | 55 | ||||
-rw-r--r-- | testcases/kernel/kvm/lib_x86.c | 247 | ||||
-rw-r--r-- | testcases/kernel/kvm/linker/x86.lds | 5 | ||||
-rw-r--r-- | testcases/kernel/kvm/linker/x86_64.lds | 5 |
17 files changed, 1260 insertions, 17 deletions
diff --git a/testcases/kernel/kvm/.gitignore b/testcases/kernel/kvm/.gitignore index 349260359..9638a6fc7 100644 --- a/testcases/kernel/kvm/.gitignore +++ b/testcases/kernel/kvm/.gitignore @@ -1 +1,4 @@ /kvm_pagefault01 +/kvm_svm01 +/kvm_svm02 +/kvm_svm03 diff --git a/testcases/kernel/kvm/Makefile b/testcases/kernel/kvm/Makefile index 6986844be..ce4a5ede2 100644 --- a/testcases/kernel/kvm/Makefile +++ b/testcases/kernel/kvm/Makefile @@ -8,8 +8,8 @@ include $(top_srcdir)/include/mk/testcases.mk ASFLAGS = CPPFLAGS += -I$(abs_srcdir)/include GUEST_CPPFLAGS = $(CPPFLAGS) -DCOMPILE_PAYLOAD -GUEST_CFLAGS = -ffreestanding -O2 -Wall -fno-asynchronous-unwind-tables -mno-mmx -mno-sse -GUEST_LDFLAGS = -nostdlib -Wl,--build-id=none -fno-stack-protector +GUEST_CFLAGS = -ffreestanding -O2 -Wall -fno-asynchronous-unwind-tables -fno-stack-protector -mno-mmx -mno-sse +GUEST_LDFLAGS = -nostdlib -Wl,--build-id=none -z noexecstack GUEST_LDLIBS = KVM_LD ?= $(LD) @@ -48,6 +48,9 @@ endif lib_guest.o $(ARCH_OBJ): CPPFLAGS := $(GUEST_CPPFLAGS) lib_guest.o $(ARCH_OBJ): CFLAGS := $(GUEST_CFLAGS) +kvm_svm03: CFLAGS += -pthread +kvm_svm03: LDLIBS += -pthread + include $(top_srcdir)/include/mk/generic_leaf_target.mk %-payload.o: %.c lib_guest.o $(ARCH_OBJ) diff --git a/testcases/kernel/kvm/bootstrap_x86.S b/testcases/kernel/kvm/bootstrap_x86.S index 5ec4c0b7e..a39c6bea7 100644 --- a/testcases/kernel/kvm/bootstrap_x86.S +++ b/testcases/kernel/kvm/bootstrap_x86.S @@ -7,6 +7,9 @@ .set KVM_TEXIT, 0xff .set RESULT_ADDRESS, 0xfffff000 +.set KVM_GDT_SIZE, 32 + +.set MSR_VM_HSAVE_PA, 0xc0010117 /* * This section will be allocated at address 0x1000 and @@ -31,7 +34,7 @@ protected_mode_entry: mov %eax, %es jmp init_memlayout -.section .data.gdt32, "a", @progbits +.section .init.gdt32, "a", @progbits .macro gdt32_entry type:req l=0 d=0 dpl=0 limit=0xfffff g=1 p=1 .4byte \limit & 0xffff @@ -44,7 +47,7 @@ kvm_gdt: .8byte 0 gdt32_entry type=0x1a l=0 d=1 /* Code segment protected_mode, 32bits */ gdt32_entry type=0x12 /* Data segment, writable */ - .skip 16 /* Stack and TSS segment descriptors */ + .skip (KVM_GDT_SIZE-3)*8 /* Stack, TSS and other segment descriptors */ .Lgdt_end: .global kvm_gdt_desc @@ -196,6 +199,25 @@ kvm_read_cregs: pop %edi ret +.global kvm_read_sregs +kvm_read_sregs: + push %edi + mov 8(%esp), %edi + mov %cs, %ax + movw %ax, (%edi) + mov %ds, %ax + movw %ax, 2(%edi) + mov %es, %ax + movw %ax, 4(%edi) + mov %fs, %ax + movw %ax, 6(%edi) + mov %gs, %ax + movw %ax, 8(%edi) + mov %ss, %ax + movw %ax, 10(%edi) + pop %edi + ret + handle_interrupt: /* save CPU state */ push %ebp @@ -331,6 +353,83 @@ kvm_yield: hlt ret +.global kvm_svm_guest_entry +kvm_svm_guest_entry: + call *%eax +1: hlt + jmp 1b + +.global kvm_svm_vmrun +kvm_svm_vmrun: + push %edi + mov 8(%esp), %edi + push %ebx + push %esi + push %ebp + + clgi + + /* Save full host state */ + movl $MSR_VM_HSAVE_PA, %ecx + rdmsr + vmsave + push %eax + + /* Load guest registers */ + push %edi + movl (%edi), %eax + /* %eax is loaded by vmrun from VMCB */ + movl 0x0c(%edi), %ebx + movl 0x14(%edi), %ecx + movl 0x1c(%edi), %edx + movl 0x2c(%edi), %esi + movl 0x34(%edi), %ebp + /* %esp is loaded by vmrun from VMCB */ + movl 0x24(%edi), %edi + + vmload + vmrun + vmsave + + /* Clear guest register buffer */ + push %edi + push %ecx + movl 8(%esp), %edi + addl $4, %edi + xorl %eax, %eax + mov $32, %ecx + pushfl + cld + rep stosl + popfl + + /* Save guest registers */ + pop %ecx + pop %eax + pop %edi + movl %ebx, 0x0c(%edi) + movl %ecx, 0x14(%edi) + movl %edx, 0x1c(%edi) + movl %eax, 0x24(%edi) + movl %esi, 0x2c(%edi) + movl %ebp, 0x34(%edi) + /* Copy %eax and %esp from VMCB */ + movl (%edi), %esi + movl 0x5f8(%esi), %eax + movl %eax, 0x04(%edi) + movl 0x5d8(%esi), %eax + movl %eax, 0x3c(%edi) + + pop %eax + vmload + stgi + + pop %ebp + pop %esi + pop %ebx + pop %edi + ret + .section .bss.pgtables, "aw", @nobits .global kvm_pagetable diff --git a/testcases/kernel/kvm/bootstrap_x86_64.S b/testcases/kernel/kvm/bootstrap_x86_64.S index 9ddbd17ed..b02dd4d92 100644 --- a/testcases/kernel/kvm/bootstrap_x86_64.S +++ b/testcases/kernel/kvm/bootstrap_x86_64.S @@ -8,6 +8,9 @@ .set KVM_TCONF, 32 .set KVM_TEXIT, 0xff .set RESULT_ADDRESS, 0xfffff000 +.set KVM_GDT_SIZE, 32 + +.set MSR_VM_HSAVE_PA, 0xc0010117 /* * This section will be allocated at address 0x1000 and @@ -32,7 +35,7 @@ protected_mode_entry: mov %eax, %es jmp init_memlayout -.section .data.gdt32, "a", @progbits +.section .init.gdt32, "a", @progbits .macro gdt32_entry type:req l=0 d=0 dpl=0 limit=0xfffff g=1 p=1 .4byte \limit & 0xffff @@ -302,6 +305,22 @@ kvm_read_cregs: mov %rax, 24(%rdi) retq +.global kvm_read_sregs +kvm_read_sregs: + mov %cs, %ax + movw %ax, (%rdi) + mov %ds, %ax + movw %ax, 2(%rdi) + mov %es, %ax + movw %ax, 4(%rdi) + mov %fs, %ax + movw %ax, 6(%rdi) + mov %gs, %ax + movw %ax, 8(%rdi) + mov %ss, %ax + movw %ax, 10(%rdi) + retq + handle_interrupt: /* push CPU state */ push %rbp @@ -457,6 +476,93 @@ kvm_yield: hlt ret +.global kvm_svm_guest_entry +kvm_svm_guest_entry: + call *%rax +1: hlt + jmp 1b + +.global kvm_svm_vmrun +kvm_svm_vmrun: + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + + clgi + + /* Save full host state */ + movq $MSR_VM_HSAVE_PA, %rcx + rdmsr + shlq $32, %rdx + orq %rdx, %rax + vmsave + pushq %rax + + /* Load guest registers */ + pushq %rdi + movq (%rdi), %rax + /* %rax is loaded by vmrun from VMCB */ + movq 0x10(%rdi), %rbx + movq 0x18(%rdi), %rcx + movq 0x20(%rdi), %rdx + movq 0x30(%rdi), %rsi + movq 0x38(%rdi), %rbp + /* %rsp is loaded by vmrun from VMCB */ + movq 0x48(%rdi), %r8 + movq 0x50(%rdi), %r9 + movq 0x58(%rdi), %r10 + movq 0x60(%rdi), %r11 + movq 0x68(%rdi), %r12 + movq 0x70(%rdi), %r13 + movq 0x78(%rdi), %r14 + movq 0x80(%rdi), %r15 + movq 0x28(%rdi), %rdi + + vmload + vmrun + vmsave + + /* Save guest registers */ + movq %rdi, %rax + popq %rdi + movq %rbx, 0x10(%rdi) + movq %rcx, 0x18(%rdi) + movq %rdx, 0x20(%rdi) + /* %rax contains guest %rdi */ + movq %rax, 0x28(%rdi) + movq %rsi, 0x30(%rdi) + movq %rbp, 0x38(%rdi) + movq %r8, 0x48(%rdi) + movq %r9, 0x50(%rdi) + movq %r10, 0x58(%rdi) + movq %r11, 0x60(%rdi) + movq %r12, 0x68(%rdi) + movq %r13, 0x70(%rdi) + movq %r14, 0x78(%rdi) + movq %r15, 0x80(%rdi) + /* copy guest %rax and %rsp from VMCB*/ + movq (%rdi), %rsi + movq 0x5f8(%rsi), %rax + movq %rax, 0x08(%rdi) + movq 0x5d8(%rsi), %rax + movq %rax, 0x40(%rdi) + + /* Reload host state */ + popq %rax + vmload + + stgi + + popq %r15 + popq %r14 + popq %r13 + popq %r12 + popq %rbp + popq %rbx + retq .section .bss.pgtables, "aw", @nobits .global kvm_pagetable @@ -478,7 +584,7 @@ kvm_pgtable_l4: kvm_gdt: .8byte 0 gdt32_entry type=0x1a l=1 limit=0 g=0 /* Code segment long mode */ - .skip 16 /* TSS segment descriptor */ + .skip (KVM_GDT_SIZE-2)*8 /* TSS and other segment descriptors */ .Lgdt_end: .global kvm_gdt_desc diff --git a/testcases/kernel/kvm/include/kvm_common.h b/testcases/kernel/kvm/include/kvm_common.h index 4e81d8302..377e3f6aa 100644 --- a/testcases/kernel/kvm/include/kvm_common.h +++ b/testcases/kernel/kvm/include/kvm_common.h @@ -11,6 +11,14 @@ #define KVM_TNONE -1 /* "No result" status value */ /* + * Result value for asynchronous notifications between guest and host. + * Do not use this value directly. Call tst_signal_host() or tst_wait_host() + * in guest code. The notification must be handled by another host thread + * and then the result value must be reset to KVM_TNONE. + */ +#define KVM_TSYNC 0xfe + +/* * Result value indicating end of test. If the test program exits using * the HLT instruction with any valid result value other than KVM_TEXIT or * TBROK, KVM runner will automatically resume VM execution after printing diff --git a/testcases/kernel/kvm/include/kvm_guest.h b/testcases/kernel/kvm/include/kvm_guest.h index ec13c5845..96f246155 100644 --- a/testcases/kernel/kvm/include/kvm_guest.h +++ b/testcases/kernel/kvm/include/kvm_guest.h @@ -64,6 +64,20 @@ void tst_brk_(const char *file, const int lineno, int result, const char *message) __attribute__((noreturn)); #define tst_brk(result, msg) tst_brk_(__FILE__, __LINE__, (result), (msg)) +/* + * Send asynchronous notification to host without stopping VM execution and + * return immediately. The notification must be handled by another host thread. + * The data argument will be passed to host in test_result->file_addr and + * can be used to send additional data both ways. + */ +void tst_signal_host(void *data); + +/* + * Call tst_signal_host(data) and wait for host to call + * tst_kvm_clear_guest_signal(). + */ +void tst_wait_host(void *data); + void *tst_heap_alloc_aligned(size_t size, size_t align); void *tst_heap_alloc(size_t size); diff --git a/testcases/kernel/kvm/include/kvm_host.h b/testcases/kernel/kvm/include/kvm_host.h index 2359944fd..06bcb5d45 100644 --- a/testcases/kernel/kvm/include/kvm_host.h +++ b/testcases/kernel/kvm/include/kvm_host.h @@ -125,13 +125,29 @@ struct kvm_cpuid2 *tst_kvm_get_cpuid(int sysfd); void tst_kvm_create_instance(struct tst_kvm_instance *inst, size_t ram_size); /* - * Execute the given KVM instance and print results. + * Execute the given KVM instance and print results. If ioctl(KVM_RUN) is + * expected to fail, pass the expected error code in exp_errno, otherwise + * set it to zero. Returns last value returned by ioctl(KVM_RUN). */ -void tst_kvm_run_instance(struct tst_kvm_instance *inst); +int tst_kvm_run_instance(struct tst_kvm_instance *inst, int exp_errno); /* * Close the given KVM instance. */ void tst_kvm_destroy_instance(struct tst_kvm_instance *inst); +/* + * Wait for given VM to call tst_signal_host() or tst_wait_host(). Timeout + * value is in milliseconds. Zero means no wait, negative value means wait + * forever. Returns 0 if signal was received, KVM_TEXIT if the VM exited + * without sending a signal, or -1 if timeout was reached. + */ +int tst_kvm_wait_guest(struct tst_kvm_instance *inst, int timeout_ms); + +/* + * Clear VM signal sent by tst_signal_host(). If the VM is waiting + * in tst_wait_host(), this function will signal the VM to resume execution. + */ +void tst_kvm_clear_guest_signal(struct tst_kvm_instance *inst); + #endif /* KVM_HOST_H_ */ diff --git a/testcases/kernel/kvm/include/kvm_x86.h b/testcases/kernel/kvm/include/kvm_x86.h index 4f3671135..bc36c0e0f 100644 --- a/testcases/kernel/kvm/include/kvm_x86.h +++ b/testcases/kernel/kvm/include/kvm_x86.h @@ -10,6 +10,9 @@ #include "kvm_test.h" +#define PAGESIZE 0x1000 +#define KVM_GDT_SIZE 32 + /* Interrupts */ #define X86_INTR_COUNT 256 @@ -38,19 +41,48 @@ #define INTR_SECURITY_ERROR 30 +/* Segment descriptor flags */ +#define SEGTYPE_LDT 0x02 +#define SEGTYPE_TSS 0x09 +#define SEGTYPE_TSS_BUSY 0x0b +#define SEGTYPE_CALL_GATE 0x0c +#define SEGTYPE_INTR_GATE 0x0e +#define SEGTYPE_TRAP_GATE 0x0f +#define SEGTYPE_RODATA 0x10 +#define SEGTYPE_RWDATA 0x12 +#define SEGTYPE_STACK 0x16 +#define SEGTYPE_CODE 0x1a +#define SEGTYPE_MASK 0x1f + +#define SEGFLAG_NSYSTEM 0x10 +#define SEGFLAG_PRESENT 0x80 +#define SEGFLAG_CODE64 0x200 +#define SEGFLAG_32BIT 0x400 +#define SEGFLAG_PAGE_LIMIT 0x800 + + /* CPUID constants */ #define CPUID_GET_INPUT_RANGE 0x80000000 #define CPUID_GET_EXT_FEATURES 0x80000001 +#define CPUID_GET_SVM_FEATURES 0x8000000a /* Model-specific CPU register constants */ #define MSR_EFER 0xc0000080 +#define MSR_VM_CR 0xc0010114 +#define MSR_VM_HSAVE_PA 0xc0010117 #define EFER_SCE (1 << 0) /* SYSCALL/SYSRET instructions enabled */ #define EFER_LME (1 << 8) /* CPU is running in 64bit mode */ #define EFER_LMA (1 << 10) /* CPU uses 64bit memory paging (read-only) */ #define EFER_NXE (1 << 11) /* Execute disable bit active */ +#define EFER_SVME (1 << 12) /* AMD SVM instructions enabled */ +#define VM_CR_DPD (1 << 0) +#define VM_CR_R_INIT (1 << 1) +#define VM_CR_DIS_A20M (1 << 2) +#define VM_CR_LOCK (1 << 3) +#define VM_CR_SVMDIS (1 << 4) /* Control register constants */ #define CR4_VME (1 << 0) @@ -91,6 +123,25 @@ struct intr_descriptor { #endif /* defined(__x86_64__) */ } __attribute__((__packed__)); +struct segment_descriptor { + unsigned int limit_lo : 16; + unsigned int baseaddr_lo : 24; + unsigned int flags_lo : 8; + unsigned int limit_hi : 4; + unsigned int flags_hi : 4; + unsigned int baseaddr_hi : 8; +} __attribute__((__packed__)); + +struct segment_descriptor64 { + unsigned int limit_lo : 16; + unsigned int baseaddr_lo : 24; + unsigned int flags_lo : 8; + unsigned int limit_hi : 4; + unsigned int flags_hi : 4; + uint64_t baseaddr_hi : 40; + uint32_t reserved; +} __attribute__((__packed__)); + struct page_table_entry_pae { unsigned int present: 1; unsigned int writable: 1; @@ -116,15 +167,36 @@ struct kvm_cregs { unsigned long cr0, cr2, cr3, cr4; }; +struct kvm_sregs { + uint16_t cs, ds, es, fs, gs, ss; +}; + +struct kvm_regs64 { + uint64_t rax, rbx, rcx, rdx, rdi, rsi, rbp, rsp; + uint64_t r8, r9, r10, r11, r12, r13, r14, r15; +}; + extern struct page_table_entry_pae kvm_pagetable[]; extern struct intr_descriptor kvm_idt[X86_INTR_COUNT]; +extern struct segment_descriptor kvm_gdt[KVM_GDT_SIZE]; /* Page table helper functions */ uintptr_t kvm_get_page_address_pae(const struct page_table_entry_pae *entry); +/* Segment descriptor table functions */ +void kvm_set_segment_descriptor(struct segment_descriptor *dst, + uint64_t baseaddr, uint32_t limit, unsigned int flags); +void kvm_parse_segment_descriptor(struct segment_descriptor *src, + uint64_t *baseaddr, uint32_t *limit, unsigned int *flags); +int kvm_find_free_descriptor(const struct segment_descriptor *table, + size_t size); +unsigned int kvm_create_stack_descriptor(struct segment_descriptor *table, + size_t tabsize, void *stack_base); + /* Functions for querying CPU info and status */ void kvm_get_cpuid(unsigned int eax, unsigned int ecx, struct kvm_cpuid *buf); void kvm_read_cregs(struct kvm_cregs *buf); +void kvm_read_sregs(struct kvm_sregs *buf); uint64_t kvm_rdmsr(unsigned int msr); void kvm_wrmsr(unsigned int msr, uint64_t value); diff --git a/testcases/kernel/kvm/include/kvm_x86_svm.h b/testcases/kernel/kvm/include/kvm_x86_svm.h new file mode 100644 index 000000000..b4b1b80e2 --- /dev/null +++ b/testcases/kernel/kvm/include/kvm_x86_svm.h @@ -0,0 +1,166 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (C) 2023 SUSE LLC <mdoucha@suse.cz> + * + * x86-specific KVM helper functions and structures for AMD SVM + */ + +#ifndef KVM_X86_SVM_H_ +#define KVM_X86_SVM_H_ + +#include "kvm_x86.h" + +/* CPUID_GET_SVM_FEATURES flags returned in EDX */ +#define SVM_CPUID_NESTED_PAGING (1 << 0) +#define SVM_CPUID_LBR_VIRT (1 << 1) +#define SVM_CPUID_LOCK (1 << 2) +#define SVM_CPUID_NRIP_SAVE (1 << 3) +#define SVM_CPUID_TSC_RATE_MSR (1 << 4) +#define SVM_CPUID_VMCB_CLEAN (1 << 5) +#define SVM_CPUID_FLUSH_ASID (1 << 6) +#define SVM_CPUID_DECODE_ASSIST (1 << 7) +#define SVM_CPUID_PAUSE_FILTER (1 << 10) +#define SVM_CPUID_PAUSE_THRESHOLD (1 << 12) +#define SVM_CPUID_AVIC (1 << 13) +#define SVM_CPUID_VMSAVE_VIRT (1 << 15) +#define SVM_CPUID_VGIF (1 << 16) +#define SVM_CPUID_GMET (1 << 17) +#define SVM_CPUID_X2AVIC (1 << 18) +#define SVM_CPUID_SSSCHECK (1 << 19) +#define SVM_CPUID_SPEC_CTRL (1 << 20) +#define SVM_CPUID_ROGPT (1 << 21) +#define SVM_CPUID_HOST_MCE_OVERRIDE (1 << 23) +#define SVM_CPUID_TLBI_CTL (1 << 24) +#define SVM_CPUID_NMI_VIRT (1 << 25) +#define SVM_CPUID_IBS_VIRT (1 << 26) + +/* SVM event intercept IDs */ +#define SVM_INTERCEPT_HLT 0x78 +#define SVM_INTERCEPT_VMRUN 0x80 +#define SVM_INTERCEPT_VMLOAD 0x82 +#define SVM_INTERCEPT_VMSAVE 0x83 +#define SVM_INTERCEPT_STGI 0x84 +#define SVM_INTERCEPT_CLGI 0x85 +#define SVM_INTERCEPT_MAX 0x95 + +/* SVM vmrun exit codes */ +#define SVM_EXIT_HLT 0x78 +#define SVM_EXIT_VMRUN 0x80 +#define SVM_EXIT_VMLOAD 0x82 +#define SVM_EXIT_VMSAVE 0x83 +#define SVM_EXIT_STGI 0x84 +#define SVM_EXIT_CLGI 0x85 +#define SVM_EXIT_AVIC_NOACCEL 0x402 +#define SVM_EXIT_INVALID ((uint64_t)-1) + +/* SVM VMCB flags */ +#define SVM_INTR_AVIC (1 << 7) + +struct kvm_vmcb_descriptor { + uint16_t selector; + uint16_t attrib; + uint32_t limit; + uint64_t base; +}; + +struct kvm_vmcb { + /* VMCB control area */ + uint8_t intercepts[20]; + uint8_t reserved1[44]; + uint64_t iopm_base_addr; + uint64_t msrpm_base_addr; + uint64_t tsc_offset; + uint32_t guest_asid; + uint32_t tlb_control; + uint8_t virtual_tpr; + uint8_t virtual_irq; + unsigned char virt_intr_prio: 4; + unsigned char virt_ignore_tpr: 4; + uint8_t virt_intr_ctl; + uint8_t virt_intr_vector; + uint8_t reserved2[3]; + uint64_t interrupt_shadow; + uint64_t exitcode; + uint64_t exitinfo1; + uint64_t exitinfo2; + uint64_t exit_int_info; + uint64_t enable_nested_paging; + uint64_t avic_bar; + uint64_t ghcb_gpa; + uint64_t event_injection; + uint64_t nested_cr3; + uint64_t virt_ext; + uint32_t vmcb_clean; + uint8_t reserved3[4]; + uint64_t next_rip; + uint8_t instr_len; + uint8_t instr_bytes[15]; + uint64_t avic_backing_page; + uint8_t reserved4[8]; + uint64_t avic_logical_ptr; + uint64_t avic_physical_ptr; + uint8_t reserved5[8]; + uint64_t vmsa_pa; + uint64_t vmgexit_rax; + uint8_t vmgexit_cpl; + uint8_t reserved6[0x2e7]; + + /* VMCB state save area */ + struct kvm_vmcb_descriptor es, cs, ss, ds, fs, gs; + struct kvm_vmcb_descriptor gdtr, ldtr, idtr, tr; + uint8_t reserved7[43]; + uint8_t cpl; + uint8_t reserved8[4]; + uint64_t efer; + uint8_t reserved9[112]; + uint64_t cr4; + uint64_t cr3; + uint64_t cr0; + uint64_t dr7; + uint64_t dr6; + uint64_t rflags; + uint64_t rip; + uint8_t reserved10[88]; + uint64_t rsp; + uint64_t s_cet; + uint64_t ssp; + uint64_t isst_addr; + uint64_t rax; + uint64_t star; + uint64_t lstar; + uint64_t cstar; + uint64_t sfmask; + uint64_t kernel_gs_base; + uint64_t sysenter_cs; + uint64_t sysenter_esp; + uint64_t sysenter_eip; + uint64_t cr2; + uint8_t reserved11[32]; + uint64_t guest_pat; + uint8_t padding[0x990]; +}; + +struct kvm_svm_vcpu { + struct kvm_vmcb *vmcb; + struct kvm_regs64 regs; +}; + +/* AMD SVM virtualization helper functions */ +int kvm_is_svm_supported(void); +int kvm_get_svm_state(void); +void kvm_set_svm_state(int enabled); + +void kvm_init_svm(void); /* Fully initialize host SVM environment */ +struct kvm_vmcb *kvm_alloc_vmcb(void); +void kvm_vmcb_copy_gdt_descriptor(struct kvm_vmcb_descriptor *dst, + unsigned int gdt_id); +void kvm_vmcb_set_intercept(struct kvm_vmcb *vmcb, unsigned int id, + unsigned int state); +void kvm_init_guest_vmcb(struct kvm_vmcb *vmcb, uint32_t asid, uint16_t ss, + void *rsp, int (*guest_main)(void)); +struct kvm_svm_vcpu *kvm_create_svm_vcpu(int (*guest_main)(void), + int alloc_stack); + +void kvm_svm_vmrun(struct kvm_svm_vcpu *cpu); + +#endif /* KVM_X86_SVM_H_ */ diff --git a/testcases/kernel/kvm/kvm_svm01.c b/testcases/kernel/kvm/kvm_svm01.c new file mode 100644 index 000000000..32d15526b --- /dev/null +++ b/testcases/kernel/kvm/kvm_svm01.c @@ -0,0 +1,123 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (C) 2023 SUSE LLC + * Author: Nicolai Stange <nstange@suse.de> + * LTP port: Martin Doucha <mdoucha@suse.cz> + */ + +/*\ + * CVE 2021-3653 + * + * Check that KVM either blocks enabling virtual interrupt controller (AVIC) + * in nested VMs or correctly sets up the required memory address translation. + * If AVIC is enabled without address translation in the host kernel, + * the nested VM will be able to read and write an arbitraty physical memory + * page specified by the parent VM. Unauthorized memory access fixed in: + * + * commit 0f923e07124df069ba68d8bb12324398f4b6b709 + * Author: Maxim Levitsky <mlevitsk@redhat.com> + * Date: Thu Jul 15 01:56:24 2021 +0300 + * + * KVM: nSVM: avoid picking up unsupported bits from L2 in int_ctl (CVE-2021-3653) + */ + +#include "kvm_test.h" + +#ifdef COMPILE_PAYLOAD +#if defined(__i386__) || defined(__x86_64__) + +#include "kvm_x86_svm.h" + +#define AVIC_REG_ADDR 0x280 +#define AVIC_TEST_VAL 0xec +#define AVIC_READ_FAIL 0x12ead + +#define AVIC_INFO_MASK ((1ULL << 32) | 0xff0) +#define AVIC_INFO_EXP ((1ULL << 32) | AVIC_REG_ADDR) + +static uint32_t * const avic_ptr = (uint32_t *)AVIC_REG_ADDR; + +static int guest_main(void) +{ + if (*avic_ptr != 0xaaaaaaaa) + return AVIC_READ_FAIL; + + *avic_ptr = AVIC_TEST_VAL; + return 0; +} + +void main(void) +{ + struct kvm_svm_vcpu *vcpu; + + kvm_init_svm(); + vcpu = kvm_create_svm_vcpu(guest_main, 1); + + /* + * Enable AVIC and set both the AVIC base address (where the nested VM + * will write) and backing page address (where the parent VM expects + * to see the changes) to 0 + */ + vcpu->vmcb->virt_intr_ctl |= SVM_INTR_AVIC; + vcpu->vmcb->avic_backing_page = 0; + vcpu->vmcb->avic_bar = 0; + memset((void *)8, 0xaa, PAGESIZE - 8); + + /* Write into AVIC backing page in the nested VM */ + kvm_svm_vmrun(vcpu); + + switch (vcpu->vmcb->exitcode) { + case SVM_EXIT_HLT: + if (vcpu->vmcb->rax == AVIC_READ_FAIL) { + tst_res(TFAIL, "Nested VM can read host memory"); + return; + } + + if (vcpu->vmcb->rax) + tst_brk(TBROK, "Unexpected guest_main() return value"); + + break; + + case SVM_EXIT_AVIC_NOACCEL: + if ((vcpu->vmcb->exitinfo1 & AVIC_INFO_MASK) == AVIC_INFO_EXP) { + tst_res(TPASS, "AVIC register write caused VMEXIT"); + break; + } + + /* unexpected exit, fall through */ + + default: + tst_brk(TBROK, "Nested VM exited unexpectedly"); + } + + if (*avic_ptr != AVIC_TEST_VAL) { + tst_res(TFAIL, "Write into AVIC ESR redirected to host memory"); + return; + } + + tst_res(TPASS, "Writes into AVIC backing page were not redirected"); +} + +#else /* defined(__i386__) || defined(__x86_64__) */ +TST_TEST_TCONF("Test supported only on x86"); +#endif /* defined(__i386__) || defined(__x86_64__) */ + +#else /* COMPILE_PAYLOAD */ + +static struct tst_test test = { + .test_all = tst_kvm_run, + .setup = tst_kvm_setup, + .cleanup = tst_kvm_cleanup, + .supported_archs = (const char *const []) { + "x86_64", + "x86", + NULL + }, + .tags = (struct tst_tag[]){ + {"linux-git", "0f923e07124d"}, + {"CVE", "2021-3653"}, + {} + } +}; + +#endif /* COMPILE_PAYLOAD */ diff --git a/testcases/kernel/kvm/kvm_svm02.c b/testcases/kernel/kvm/kvm_svm02.c new file mode 100644 index 000000000..5d2e2ce37 --- /dev/null +++ b/testcases/kernel/kvm/kvm_svm02.c @@ -0,0 +1,152 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (C) 2023 SUSE LLC + * Author: Nicolai Stange <nstange@suse.de> + * LTP port: Martin Doucha <mdoucha@suse.cz> + */ + +/*\ + * CVE 2021-3656 + * + * Check that KVM correctly intercepts VMSAVE and VMLOAD instructions + * in a nested virtual machine even when the parent guest disables + * intercepting either instruction. If KVM does not override the disabled + * intercepts, it'll give the nested VM read/write access to a few bytes + * of an arbitrary physical memory page. Unauthorized memory access fixed in: + * + * commit c7dfa4009965a9b2d7b329ee970eb8da0d32f0bc + * Author: Maxim Levitsky <mlevitsk@redhat.com> + * Date: Mon Jul 19 16:05:00 2021 +0300 + * + * KVM: nSVM: always intercept VMLOAD/VMSAVE when nested (CVE-2021-3656) + */ + +#include "kvm_test.h" + +#ifdef COMPILE_PAYLOAD +#if defined(__i386__) || defined(__x86_64__) + +#include "kvm_x86_svm.h" + +static void *vmsave_buf; + +/* Load FS, GS, TR and LDTR state from vmsave_buf */ +static int guest_vmload(void) +{ + asm ( + "vmload %0\n" + : + : "a" (vmsave_buf) + ); + return 0; +} + +/* Save current FS, GS, TR and LDTR state to vmsave_buf */ +static int guest_vmsave(void) +{ + asm ( + "vmsave %0\n" + : + : "a" (vmsave_buf) + ); + return 0; +} + +static int cmp_descriptor(const struct kvm_vmcb_descriptor *a, + const struct kvm_vmcb_descriptor *b) +{ + int ret; + + ret = a->selector != b->selector; + ret = ret || a->attrib != b->attrib; + ret = ret || a->limit != b->limit; + ret = ret || a->base != b->base; + return ret; +} + +/* Return non-zero if the VMCB fields touched by vmsave/vmload differ */ +static int cmp_vmcb(const struct kvm_vmcb *a, const struct kvm_vmcb *b) +{ + int ret; + + ret = cmp_descriptor(&a->fs, &b->fs); + ret = ret || cmp_descriptor(&a->gs, &b->gs); + ret = ret || cmp_descriptor(&a->tr, &b->tr); + ret = ret || cmp_descriptor(&a->ldtr, &b->ldtr); + ret = ret || a->kernel_gs_base != b->kernel_gs_base; + ret = ret || a->star != b->star; + ret = ret || a->lstar != b->lstar; + ret = ret || a->cstar != b->cstar; + ret = ret || a->sfmask != b->sfmask; + ret = ret || a->sysenter_cs != b->sysenter_cs; + ret = ret || a->sysenter_esp != b->sysenter_esp; + ret = ret || a->sysenter_eip != b->sysenter_eip; + return ret; +} + +void main(void) +{ + uint16_t ss; + uint64_t rsp; + struct kvm_svm_vcpu *vcpu; + + kvm_init_svm(); + vcpu = kvm_create_svm_vcpu(guest_vmload, 1); + kvm_vmcb_set_intercept(vcpu->vmcb, SVM_INTERCEPT_VMLOAD, 0); + vmsave_buf = kvm_alloc_vmcb(); + + /* Save allocated stack for later VM reinit */ + ss = vcpu->vmcb->ss.selector; + rsp = vcpu->vmcb->rsp; + + /* Load partial state from vmsave_buf and save it to vcpu->vmcb */ + kvm_svm_vmrun(vcpu); + + if (vcpu->vmcb->exitcode != SVM_EXIT_HLT) + tst_brk(TBROK, "Nested VM exited unexpectedly"); + + if (cmp_vmcb(vcpu->vmcb, vmsave_buf)) { + tst_res(TFAIL, "Nested VM can read host memory"); + return; + } + + /* Load state from vcpu->vmcb and save it to vmsave_buf */ + memset(vmsave_buf, 0xaa, sizeof(struct kvm_vmcb)); + kvm_init_guest_vmcb(vcpu->vmcb, 1, ss, (void *)rsp, guest_vmsave); + kvm_vmcb_set_intercept(vcpu->vmcb, SVM_INTERCEPT_VMSAVE, 0); + kvm_svm_vmrun(vcpu); + + if (vcpu->vmcb->exitcode != SVM_EXIT_HLT) + tst_brk(TBROK, "Nested VM exited unexpectedly"); + + if (cmp_vmcb(vcpu->vmcb, vmsave_buf)) { + tst_res(TFAIL, "Nested VM can overwrite host memory"); + return; + } + + tst_res(TPASS, "VMLOAD and VMSAVE were intercepted by kernel"); +} + +#else /* defined(__i386__) || defined(__x86_64__) */ +TST_TEST_TCONF("Test supported only on x86"); +#endif /* defined(__i386__) || defined(__x86_64__) */ + +#else /* COMPILE_PAYLOAD */ + +static struct tst_test test = { + .test_all = tst_kvm_run, + .setup = tst_kvm_setup, + .cleanup = tst_kvm_cleanup, + .supported_archs = (const char *const []) { + "x86_64", + "x86", + NULL + }, + .tags = (struct tst_tag[]){ + {"linux-git", "c7dfa4009965"}, + {"CVE", "2021-3656"}, + {} + } +}; + +#endif /* COMPILE_PAYLOAD */ diff --git a/testcases/kernel/kvm/kvm_svm03.c b/testcases/kernel/kvm/kvm_svm03.c new file mode 100644 index 000000000..87164d013 --- /dev/null +++ b/testcases/kernel/kvm/kvm_svm03.c @@ -0,0 +1,169 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (C) 2023 SUSE LLC + * Author: Nicolai Stange <nstange@suse.de> + * LTP port: Martin Doucha <mdoucha@suse.cz> + */ + +/*\ + * Check that KVM correctly intercepts the CLGI instruction in a nested + * virtual machine even when the parent guest disables intercept. + * If KVM does not override the disabled intercept, it'll allow the nested VM + * to hold the physical CPU indefinitely and potentially perform a denial + * of service attack against the host kernel. CPU lockup fixed in: + * + * commit 91b7130cb6606d8c6b3b77e54426b3f3a83f48b1 + * Author: Paolo Bonzini <pbonzini@redhat.com> + * Date: Fri May 22 12:28:52 2020 -0400 + * + * KVM: SVM: preserve VGIF across VMCB switch + */ + +#include "kvm_test.h" + +#ifdef COMPILE_PAYLOAD +#if defined(__i386__) || defined(__x86_64__) + +#include "kvm_x86_svm.h" + +/* Disable global interrupts */ +static int guest_clgi(void) +{ + int ret, *result = (int *)KVM_RESULT_BASEADDR; + + /* + * Make sure that result page is present in memory. CLGI may disable + * page fault handling on the current CPU. The actual value + * at that address is irrelevant. + */ + ret = *result; + + /* Disable global interrupts */ + asm ("clgi"); + + /* Signal host to kill the VM and wait */ + tst_wait_host(NULL); + return ret; +} + +void main(void) +{ + struct kvm_svm_vcpu *vcpu; + + kvm_init_svm(); + vcpu = kvm_create_svm_vcpu(guest_clgi, 1); + kvm_vmcb_set_intercept(vcpu->vmcb, SVM_INTERCEPT_CLGI, 0); + kvm_svm_vmrun(vcpu); + + if (vcpu->vmcb->exitcode != SVM_EXIT_HLT) + tst_brk(TBROK, "Nested VM exited unexpectedly"); +} + +#else /* defined(__i386__) || defined(__x86_64__) */ +TST_TEST_TCONF("Test supported only on x86"); +#endif /* defined(__i386__) || defined(__x86_64__) */ + +#else /* COMPILE_PAYLOAD */ + +#include <pthread.h> +#include "tst_safe_pthread.h" +#include "tst_safe_clocks.h" + +static struct tst_kvm_instance test_vm = { .vm_fd = -1 }; +static pthread_mutex_t mutex; +static int mutex_init; + +static void sighandler(int sig LTP_ATTRIBUTE_UNUSED) +{ + +} + +static void *vm_thread(void *arg) +{ + SAFE_PTHREAD_MUTEX_LOCK(&mutex); + tst_kvm_run_instance(&test_vm, EINTR); + SAFE_PTHREAD_MUTEX_UNLOCK(&mutex); + return arg; +} + +static void setup(void) +{ + struct sigaction sa = { .sa_handler = sighandler }; + pthread_mutexattr_t attr; + + SAFE_PTHREAD_MUTEXATTR_INIT(&attr); + SAFE_PTHREAD_MUTEXATTR_SETTYPE(&attr, PTHREAD_MUTEX_NORMAL); + SAFE_PTHREAD_MUTEX_INIT(&mutex, &attr); + mutex_init = 1; + SAFE_PTHREAD_MUTEXATTR_DESTROY(&attr); + SAFE_SIGACTION(SIGUSR1, &sa, NULL); +} + +static void run(void) +{ + struct timespec timeout; + pthread_t tid; + int ret; + + tst_kvm_create_instance(&test_vm, DEFAULT_RAM_SIZE); + + SAFE_PTHREAD_CREATE(&tid, NULL, vm_thread, NULL); + ret = tst_kvm_wait_guest(&test_vm, 2000); + + if (ret == KVM_TEXIT) { + SAFE_PTHREAD_JOIN(tid, NULL); + tst_brk(TCONF, "Guest exited early"); + } + + if (ret) + tst_brk(TBROK, "Wait for guest initialization timed out"); + + SAFE_PTHREAD_KILL(tid, SIGUSR1); + SAFE_CLOCK_GETTIME(CLOCK_REALTIME, &timeout); + timeout.tv_sec += 2; + + if (SAFE_PTHREAD_MUTEX_TIMEDLOCK(&mutex, &timeout)) { + tst_kvm_clear_guest_signal(&test_vm); + tst_res(TFAIL, "VM thread does not respond to signals"); + } else { + SAFE_PTHREAD_MUTEX_UNLOCK(&mutex); + tst_res(TPASS, "VM thread was interrupted by signal"); + } + + SAFE_PTHREAD_JOIN(tid, NULL); + tst_kvm_destroy_instance(&test_vm); + tst_free_all(); +} + +static void cleanup(void) +{ + /* + * If the mutex is locked, the VM is likely still running, cannot + * clean up anything + */ + if (!mutex_init || SAFE_PTHREAD_MUTEX_TRYLOCK(&mutex)) + return; + + if (!SAFE_PTHREAD_MUTEX_UNLOCK(&mutex)) + SAFE_PTHREAD_MUTEX_DESTROY(&mutex); + + tst_kvm_destroy_instance(&test_vm); +} + +static struct tst_test test = { + .test_all = run, + .setup = setup, + .cleanup = cleanup, + .min_cpus = 2, + .supported_archs = (const char *const []) { + "x86_64", + "x86", + NULL + }, + .tags = (struct tst_tag[]){ + {"linux-git", "91b7130cb660"}, + {} + } +}; + +#endif /* COMPILE_PAYLOAD */ diff --git a/testcases/kernel/kvm/lib_guest.c b/testcases/kernel/kvm/lib_guest.c index d237293fc..f3e21d3d6 100644 --- a/testcases/kernel/kvm/lib_guest.c +++ b/testcases/kernel/kvm/lib_guest.c @@ -82,7 +82,7 @@ char *ptr2hex(char *dest, uintptr_t val) uintptr_t tmp; char *ret = dest; - for (i = 4; val >> i; i += 4) + for (i = 4, tmp = val >> 4; tmp; i += 4, tmp >>= 4) ; do { @@ -155,6 +155,22 @@ void tst_brk_(const char *file, const int lineno, int result, kvm_exit(); } +void tst_signal_host(void *data) +{ + test_result->file_addr = (uintptr_t)data; + test_result->result = KVM_TSYNC; +} + +void tst_wait_host(void *data) +{ + volatile int32_t *vres = &test_result->result; + + tst_signal_host(data); + + while (*vres != KVM_TNONE) + ; +} + void tst_handle_interrupt(struct kvm_interrupt_frame *ifrm, long vector, unsigned long errcode) { diff --git a/testcases/kernel/kvm/lib_host.c b/testcases/kernel/kvm/lib_host.c index 2782e68b0..8e3d6094e 100644 --- a/testcases/kernel/kvm/lib_host.c +++ b/testcases/kernel/kvm/lib_host.c @@ -10,6 +10,8 @@ #define TST_NO_DEFAULT_MAIN #include "tst_test.h" +#include "tst_clocks.h" +#include "tst_timer.h" #include "kvm_host.h" static struct tst_kvm_instance test_vm = { .vm_fd = -1 }; @@ -234,14 +236,28 @@ void tst_kvm_create_instance(struct tst_kvm_instance *inst, size_t ram_size) inst->result->message[0] = '\0'; } -void tst_kvm_run_instance(struct tst_kvm_instance *inst) +int tst_kvm_run_instance(struct tst_kvm_instance *inst, int exp_errno) { struct kvm_regs regs; + int ret; while (1) { inst->result->result = KVM_TNONE; inst->result->message[0] = '\0'; - SAFE_IOCTL(inst->vcpu_fd, KVM_RUN, 0); + errno = 0; + ret = ioctl(inst->vcpu_fd, KVM_RUN, 0); + + if (ret == -1) { + if (errno == exp_errno) + return ret; + + tst_brk(TBROK | TERRNO, "ioctl(KVM_RUN) failed"); + } + + if (ret < 0) { + tst_brk(TBROK | TERRNO, + "Invalid ioctl(KVM_RUN) return value %d", ret); + } if (inst->vcpu_info->exit_reason != KVM_EXIT_HLT) { SAFE_IOCTL(inst->vcpu_fd, KVM_GET_REGS, ®s); @@ -255,6 +271,8 @@ void tst_kvm_run_instance(struct tst_kvm_instance *inst) tst_kvm_print_result(inst); } + + return ret; } void tst_kvm_destroy_instance(struct tst_kvm_instance *inst) @@ -272,6 +290,37 @@ void tst_kvm_destroy_instance(struct tst_kvm_instance *inst) memset(inst->ram, 0, sizeof(inst->ram)); } +int tst_kvm_wait_guest(struct tst_kvm_instance *inst, int timeout_ms) +{ + volatile struct tst_kvm_result *result = inst->result; + int32_t res; + struct timespec start, now; + + if (timeout_ms >= 0) + tst_clock_gettime(CLOCK_MONOTONIC, &start); + + while ((res = result->result) != KVM_TSYNC) { + if (res == KVM_TEXIT) + return res; + + if (timeout_ms >= 0) { + tst_clock_gettime(CLOCK_MONOTONIC, &now); + + if (tst_timespec_diff_ms(now, start) >= timeout_ms) + return -1; + } + + usleep(1000); + } + + return 0; +} + +void tst_kvm_clear_guest_signal(struct tst_kvm_instance *inst) +{ + inst->result->result = KVM_TNONE; +} + void tst_kvm_setup(void) { @@ -280,7 +329,7 @@ void tst_kvm_setup(void) void tst_kvm_run(void) { tst_kvm_create_instance(&test_vm, DEFAULT_RAM_SIZE); - tst_kvm_run_instance(&test_vm); + tst_kvm_run_instance(&test_vm, 0); tst_kvm_destroy_instance(&test_vm); tst_free_all(); } diff --git a/testcases/kernel/kvm/lib_x86.c b/testcases/kernel/kvm/lib_x86.c index dc2354b10..3e6656f11 100644 --- a/testcases/kernel/kvm/lib_x86.c +++ b/testcases/kernel/kvm/lib_x86.c @@ -5,7 +5,9 @@ * x86-specific KVM helper functions */ -#include "kvm_x86.h" +#include "kvm_x86_svm.h" + +void kvm_svm_guest_entry(void); struct kvm_interrupt_frame { uintptr_t eip, cs, eflags, esp, ss; @@ -110,6 +112,98 @@ uintptr_t kvm_get_page_address_pae(const struct page_table_entry_pae *entry) return entry->address << 12; } +#ifdef __x86_64__ +static void kvm_set_segment_descriptor64(struct segment_descriptor64 *dst, + uint64_t baseaddr, uint32_t limit, unsigned int flags) +{ + + dst->baseaddr_lo = baseaddr & 0xffffff; + dst->baseaddr_hi = baseaddr >> 24; + dst->limit_lo = limit & 0xffff; + dst->limit_hi = limit >> 16; + dst->flags_lo = flags & 0xff; + dst->flags_hi = (flags >> 8) & 0xf; + dst->reserved = 0; +} +#endif + +void kvm_set_segment_descriptor(struct segment_descriptor *dst, + uint64_t baseaddr, uint32_t limit, unsigned int flags) +{ + if (limit >> 20) + tst_brk(TBROK, "Segment limit out of range"); + +#ifdef __x86_64__ + /* System descriptors have double size in 64bit mode */ + if (!(flags & SEGFLAG_NSYSTEM)) { + kvm_set_segment_descriptor64((struct segment_descriptor64 *)dst, + baseaddr, limit, flags); + return; + } +#endif + + if (baseaddr >> 32) + tst_brk(TBROK, "Segment base address out of range"); + + dst->baseaddr_lo = baseaddr & 0xffffff; + dst->baseaddr_hi = baseaddr >> 24; + dst->limit_lo = limit & 0xffff; + dst->limit_hi = limit >> 16; + dst->flags_lo = flags & 0xff; + dst->flags_hi = (flags >> 8) & 0xf; +} + +void kvm_parse_segment_descriptor(struct segment_descriptor *src, + uint64_t *baseaddr, uint32_t *limit, unsigned int *flags) +{ + if (baseaddr) { + *baseaddr = (((uint64_t)src->baseaddr_hi) << 24) | + src->baseaddr_lo; + } + + if (limit) + *limit = (((uint32_t)src->limit_hi) << 16) | src->limit_lo; + + if (flags) + *flags = (((uint32_t)src->flags_hi) << 8) | src->flags_lo; +} + +int kvm_find_free_descriptor(const struct segment_descriptor *table, + size_t size) +{ + const struct segment_descriptor *ptr; + size_t i; + + for (i = 0, ptr = table; i < size; i++, ptr++) { + if (!(ptr->flags_lo & SEGFLAG_PRESENT)) + return i; + +#ifdef __x86_64__ + /* System descriptors have double size in 64bit mode */ + if (!(ptr->flags_lo & SEGFLAG_NSYSTEM)) { + ptr++; + i++; + } +#endif + } + + return -1; +} + +unsigned int kvm_create_stack_descriptor(struct segment_descriptor *table, + size_t tabsize, void *stack_base) +{ + int ret = kvm_find_free_descriptor(table, tabsize); + + if (ret < 0) + tst_brk(TBROK, "Descriptor table is full"); + + kvm_set_segment_descriptor(table + ret, 0, + (((uintptr_t)stack_base) - 1) >> 12, SEGTYPE_STACK | + SEGFLAG_PRESENT | SEGFLAG_32BIT | SEGFLAG_PAGE_LIMIT); + return ret; +} + void kvm_get_cpuid(unsigned int eax, unsigned int ecx, struct kvm_cpuid *buf) { asm ( @@ -148,3 +242,154 @@ uintptr_t kvm_get_interrupt_ip(const struct kvm_interrupt_frame *ifrm) { return ifrm->eip; } + +int kvm_is_svm_supported(void) +{ + struct kvm_cpuid buf; + + kvm_get_cpuid(CPUID_GET_INPUT_RANGE, 0, &buf); + + if (buf.eax < CPUID_GET_EXT_FEATURES) + return 0; + + kvm_get_cpuid(CPUID_GET_EXT_FEATURES, 0, &buf); + return buf.ecx & 0x4; +} + +int kvm_get_svm_state(void) +{ + return kvm_rdmsr(MSR_EFER) & EFER_SVME; +} + +void kvm_set_svm_state(int enabled) +{ + uint64_t value; + + if (!kvm_is_svm_supported()) + tst_brk(TCONF, "CPU does not support SVM"); + + if (kvm_rdmsr(MSR_VM_CR) & VM_CR_SVMDIS) + tst_brk(TCONF, "SVM is supported but disabled"); + + value = kvm_rdmsr(MSR_EFER); + + if (enabled) + value |= EFER_SVME; + else + value &= ~EFER_SVME; + + kvm_wrmsr(MSR_EFER, value); +} + +struct kvm_vmcb *kvm_alloc_vmcb(void) +{ + struct kvm_vmcb *ret; + + ret = tst_heap_alloc_aligned(sizeof(struct kvm_vmcb), PAGESIZE); + memset(ret, 0, sizeof(struct kvm_vmcb)); + return ret; +} + +void kvm_init_svm(void) +{ + kvm_set_svm_state(1); + kvm_wrmsr(MSR_VM_HSAVE_PA, (uintptr_t)kvm_alloc_vmcb()); +} + +void kvm_vmcb_copy_gdt_descriptor(struct kvm_vmcb_descriptor *dst, + unsigned int gdt_id) +{ + uint64_t baseaddr; + uint32_t limit; + unsigned int flags; + + if (gdt_id >= KVM_GDT_SIZE) + tst_brk(TBROK, "GDT descriptor ID out of range"); + + kvm_parse_segment_descriptor(kvm_gdt + gdt_id, &baseaddr, &limit, + &flags); + + if (!(flags & SEGFLAG_PRESENT)) { + memset(dst, 0, sizeof(struct kvm_vmcb_descriptor)); + return; + } + + if (flags & SEGFLAG_PAGE_LIMIT) + limit = (limit << 12) | 0xfff; + + dst->selector = gdt_id << 3; + dst->attrib = flags; + dst->limit = limit; + dst->base = baseaddr; +} + +void kvm_vmcb_set_intercept(struct kvm_vmcb *vmcb, unsigned int id, + unsigned int state) +{ + unsigned int addr = id / 8, bit = 1 << (id % 8); + + if (id >= SVM_INTERCEPT_MAX) + tst_brk(TBROK, "Invalid SVM intercept ID"); + + if (state) + vmcb->intercepts[addr] |= bit; + else + vmcb->intercepts[addr] &= ~bit; +} + +void kvm_init_guest_vmcb(struct kvm_vmcb *vmcb, uint32_t asid, uint16_t ss, + void *rsp, int (*guest_main)(void)) +{ + struct kvm_cregs cregs; + struct kvm_sregs sregs; + + kvm_read_cregs(&cregs); + kvm_read_sregs(&sregs); + + kvm_vmcb_set_intercept(vmcb, SVM_INTERCEPT_VMRUN, 1); + kvm_vmcb_set_intercept(vmcb, SVM_INTERCEPT_HLT, 1); + + kvm_vmcb_copy_gdt_descriptor(&vmcb->es, sregs.es >> 3); + kvm_vmcb_copy_gdt_descriptor(&vmcb->cs, sregs.cs >> 3); + kvm_vmcb_copy_gdt_descriptor(&vmcb->ss, ss); + kvm_vmcb_copy_gdt_descriptor(&vmcb->ds, sregs.ds >> 3); + kvm_vmcb_copy_gdt_descriptor(&vmcb->fs, sregs.fs >> 3); + kvm_vmcb_copy_gdt_descriptor(&vmcb->gs, sregs.gs >> 3); + vmcb->gdtr.base = (uintptr_t)kvm_gdt; + vmcb->gdtr.limit = (KVM_GDT_SIZE*sizeof(struct segment_descriptor)) - 1; + vmcb->idtr.base = (uintptr_t)kvm_idt; + vmcb->idtr.limit = (X86_INTR_COUNT*sizeof(struct intr_descriptor)) - 1; + + vmcb->guest_asid = asid; + vmcb->efer = kvm_rdmsr(MSR_EFER); + vmcb->cr0 = cregs.cr0; + vmcb->cr3 = cregs.cr3; + vmcb->cr4 = cregs.cr4; + vmcb->rip = (uintptr_t)kvm_svm_guest_entry; + vmcb->rax = (uintptr_t)guest_main; + vmcb->rsp = (uintptr_t)rsp; + vmcb->rflags = 0x200; /* Interrupts enabled */ +} + +struct kvm_svm_vcpu *kvm_create_svm_vcpu(int (*guest_main)(void), + int alloc_stack) +{ + uint16_t ss = 0; + char *stack = NULL; + struct kvm_vmcb *vmcb; + struct kvm_svm_vcpu *ret; + + vmcb = kvm_alloc_vmcb(); + + if (alloc_stack) { + stack = tst_heap_alloc_aligned(2 * PAGESIZE, PAGESIZE); + ss = kvm_create_stack_descriptor(kvm_gdt, KVM_GDT_SIZE, stack); + stack += 2 * PAGESIZE; + } + + kvm_init_guest_vmcb(vmcb, 1, ss, stack, guest_main); + ret = tst_heap_alloc(sizeof(struct kvm_svm_vcpu)); + memset(ret, 0, sizeof(struct kvm_svm_vcpu)); + ret->vmcb = vmcb; + return ret; +} diff --git a/testcases/kernel/kvm/linker/x86.lds b/testcases/kernel/kvm/linker/x86.lds index 95edb0be0..6e69c4d0f 100644 --- a/testcases/kernel/kvm/linker/x86.lds +++ b/testcases/kernel/kvm/linker/x86.lds @@ -4,6 +4,7 @@ PHDRS { headers PT_PHDR PHDRS ; text PT_LOAD FILEHDR PHDRS ; + data PT_LOAD ; bss PT_LOAD ; } @@ -18,7 +19,7 @@ SECTIONS .init.boot : { *(.init.protected_mode) - *(.data.gdt32) + *(.init.gdt32) *(.init.memlayout) } :text @@ -40,7 +41,7 @@ SECTIONS { *(.data.strings) *(.data) - } + } :data .preinit_array : { diff --git a/testcases/kernel/kvm/linker/x86_64.lds b/testcases/kernel/kvm/linker/x86_64.lds index ac372f863..9e62aa5ad 100644 --- a/testcases/kernel/kvm/linker/x86_64.lds +++ b/testcases/kernel/kvm/linker/x86_64.lds @@ -4,6 +4,7 @@ PHDRS { headers PT_PHDR PHDRS ; text PT_LOAD FILEHDR PHDRS ; + data PT_LOAD ; bss PT_LOAD ; } @@ -18,7 +19,7 @@ SECTIONS .init.boot : { *(.init.protected_mode) - *(.data.gdt32) + *(.init.gdt32) *(.init.memlayout) } :text @@ -40,7 +41,7 @@ SECTIONS { *(.data.strings) *(.data) - } + } :data .preinit_array : { |