diff options
Diffstat (limited to 'arch')
86 files changed, 756 insertions, 276 deletions
diff --git a/arch/Kconfig b/arch/Kconfig index 98f64ad1caf1..ed2539c590bf 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -225,8 +225,8 @@ config ARCH_INIT_TASK config ARCH_TASK_STRUCT_ALLOCATOR bool -# Select if arch has its private alloc_thread_info() function -config ARCH_THREAD_INFO_ALLOCATOR +# Select if arch has its private alloc_thread_stack() function +config ARCH_THREAD_STACK_ALLOCATOR bool # Select if arch wants to size task_struct dynamically via arch_task_struct_size: diff --git a/arch/arm/boot/dts/armada-388-gp.dts b/arch/arm/boot/dts/armada-388-gp.dts index cd316021d6ce..6c1b45c1af66 100644 --- a/arch/arm/boot/dts/armada-388-gp.dts +++ b/arch/arm/boot/dts/armada-388-gp.dts @@ -89,7 +89,7 @@ pinctrl-names = "default"; pinctrl-0 = <&pca0_pins>; interrupt-parent = <&gpio0>; - interrupts = <18 IRQ_TYPE_EDGE_FALLING>; + interrupts = <18 IRQ_TYPE_LEVEL_LOW>; gpio-controller; #gpio-cells = <2>; interrupt-controller; @@ -101,7 +101,7 @@ compatible = "nxp,pca9555"; pinctrl-names = "default"; interrupt-parent = <&gpio0>; - interrupts = <18 IRQ_TYPE_EDGE_FALLING>; + interrupts = <18 IRQ_TYPE_LEVEL_LOW>; gpio-controller; #gpio-cells = <2>; interrupt-controller; diff --git a/arch/arm/boot/dts/omap3-n900.dts b/arch/arm/boot/dts/omap3-n900.dts index 5f5e0f3d5b64..27cd4abfc74d 100644 --- a/arch/arm/boot/dts/omap3-n900.dts +++ b/arch/arm/boot/dts/omap3-n900.dts @@ -697,6 +697,8 @@ vmmc_aux-supply = <&vsim>; bus-width = <8>; non-removable; + no-sdio; + no-sd; }; &mmc3 { diff --git a/arch/arm/configs/s3c2410_defconfig b/arch/arm/configs/s3c2410_defconfig index f3142369f594..01116ee1284b 100644 --- a/arch/arm/configs/s3c2410_defconfig +++ b/arch/arm/configs/s3c2410_defconfig @@ -87,9 +87,9 @@ CONFIG_IPV6_TUNNEL=m CONFIG_NETFILTER=y CONFIG_NF_CONNTRACK=m CONFIG_NF_CONNTRACK_EVENTS=y -CONFIG_NF_CT_PROTO_DCCP=m -CONFIG_NF_CT_PROTO_SCTP=m -CONFIG_NF_CT_PROTO_UDPLITE=m +CONFIG_NF_CT_PROTO_DCCP=y +CONFIG_NF_CT_PROTO_SCTP=y +CONFIG_NF_CT_PROTO_UDPLITE=y CONFIG_NF_CONNTRACK_AMANDA=m CONFIG_NF_CONNTRACK_FTP=m CONFIG_NF_CONNTRACK_H323=m diff --git a/arch/arm/include/asm/elf.h b/arch/arm/include/asm/elf.h index d2315ffd8f12..f13ae153fb24 100644 --- a/arch/arm/include/asm/elf.h +++ b/arch/arm/include/asm/elf.h @@ -112,12 +112,8 @@ int dump_task_regs(struct task_struct *t, elf_gregset_t *elfregs); #define CORE_DUMP_USE_REGSET #define ELF_EXEC_PAGESIZE 4096 -/* This is the location that an ET_DYN program is loaded if exec'ed. Typical - use of this is to invoke "./ld.so someprog" to test out a new version of - the loader. We need to make sure that it is out of the way of the program - that it will "exec", and that there is sufficient room for the brk. */ - -#define ELF_ET_DYN_BASE (TASK_SIZE / 3 * 2) +/* This is the base location for PIE (ET_DYN with INTERP) loads. */ +#define ELF_ET_DYN_BASE 0x400000UL /* When the program starts, a1 contains a pointer to a function to be registered with atexit, as per the SVR4 ABI. A value of 0 means we diff --git a/arch/arm/include/asm/ftrace.h b/arch/arm/include/asm/ftrace.h index bfe2a2f5a644..22b73112b75f 100644 --- a/arch/arm/include/asm/ftrace.h +++ b/arch/arm/include/asm/ftrace.h @@ -54,6 +54,24 @@ static inline void *return_address(unsigned int level) #define ftrace_return_address(n) return_address(n) +#define ARCH_HAS_SYSCALL_MATCH_SYM_NAME + +static inline bool arch_syscall_match_sym_name(const char *sym, + const char *name) +{ + if (!strcmp(sym, "sys_mmap2")) + sym = "sys_mmap_pgoff"; + else if (!strcmp(sym, "sys_statfs64_wrapper")) + sym = "sys_statfs64"; + else if (!strcmp(sym, "sys_fstatfs64_wrapper")) + sym = "sys_fstatfs64"; + else if (!strcmp(sym, "sys_arm_fadvise64_64")) + sym = "sys_fadvise64_64"; + + /* Ignore case since sym may start with "SyS" instead of "sys" */ + return !strcasecmp(sym, name); +} + #endif /* ifndef __ASSEMBLY__ */ #endif /* _ASM_ARM_FTRACE */ diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index 1f1ff7e7b9cf..ba079e279b58 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -1629,12 +1629,16 @@ static int kvm_test_age_hva_handler(struct kvm *kvm, gpa_t gpa, void *data) int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end) { + if (!kvm->arch.pgd) + return 0; trace_kvm_age_hva(start, end); return handle_hva_to_gpa(kvm, start, end, kvm_age_hva_handler, NULL); } int kvm_test_age_hva(struct kvm *kvm, unsigned long hva) { + if (!kvm->arch.pgd) + return 0; trace_kvm_test_age_hva(hva); return handle_hva_to_gpa(kvm, hva, hva, kvm_test_age_hva_handler, NULL); } diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 35be8566140e..da007c26de52 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -98,6 +98,7 @@ config ARM64 select SPARSE_IRQ select SYSCTL_EXCEPTION_TRACE select HAVE_CONTEXT_TRACKING + select THREAD_INFO_IN_TASK help ARM 64-bit (AArch64) Linux support. diff --git a/arch/arm64/boot/dts/xilinx/zynqmp-ep108.dts b/arch/arm64/boot/dts/xilinx/zynqmp-ep108.dts index ce5d848251fa..7b34822d61e9 100644 --- a/arch/arm64/boot/dts/xilinx/zynqmp-ep108.dts +++ b/arch/arm64/boot/dts/xilinx/zynqmp-ep108.dts @@ -26,7 +26,7 @@ stdout-path = "serial0:115200n8"; }; - memory { + memory@0 { device_type = "memory"; reg = <0x0 0x0 0x40000000>; }; diff --git a/arch/arm64/boot/dts/xilinx/zynqmp.dtsi b/arch/arm64/boot/dts/xilinx/zynqmp.dtsi index 857eda5c7217..172402cc1a0f 100644 --- a/arch/arm64/boot/dts/xilinx/zynqmp.dtsi +++ b/arch/arm64/boot/dts/xilinx/zynqmp.dtsi @@ -71,7 +71,7 @@ <1 10 0xf01>; }; - amba_apu { + amba_apu: amba_apu@0 { compatible = "simple-bus"; #address-cells = <2>; #size-cells = <1>; @@ -191,7 +191,7 @@ }; i2c0: i2c@ff020000 { - compatible = "cdns,i2c-r1p10"; + compatible = "cdns,i2c-r1p14", "cdns,i2c-r1p10"; status = "disabled"; interrupt-parent = <&gic>; interrupts = <0 17 4>; @@ -202,7 +202,7 @@ }; i2c1: i2c@ff030000 { - compatible = "cdns,i2c-r1p10"; + compatible = "cdns,i2c-r1p14", "cdns,i2c-r1p10"; status = "disabled"; interrupt-parent = <&gic>; interrupts = <0 18 4>; diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index d8855ca6068a..e450bb6d21bd 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -223,14 +223,25 @@ lr .req x30 // link register .endm /* + * @dst: Result of per_cpu(sym, smp_processor_id()) * @sym: The name of the per-cpu variable - * @reg: Result of per_cpu(sym, smp_processor_id()) * @tmp: scratch register */ - .macro this_cpu_ptr, sym, reg, tmp - adr_l \reg, \sym + .macro adr_this_cpu, dst, sym, tmp + adr_l \dst, \sym mrs \tmp, tpidr_el1 - add \reg, \reg, \tmp + add \dst, \dst, \tmp + .endm + + /* + * @dst: Result of READ_ONCE(per_cpu(sym, smp_processor_id())) + * @sym: The name of the per-cpu variable + * @tmp: scratch register + */ + .macro ldr_this_cpu dst, sym, tmp + adr_l \dst, \sym + mrs \tmp, tpidr_el1 + ldr \dst, [\dst, \tmp] .endm /* diff --git a/arch/arm64/include/asm/current.h b/arch/arm64/include/asm/current.h new file mode 100644 index 000000000000..2e61d21294ba --- /dev/null +++ b/arch/arm64/include/asm/current.h @@ -0,0 +1,27 @@ +#ifndef __ASM_CURRENT_H +#define __ASM_CURRENT_H + +#include <linux/compiler.h> + +#include <asm/sysreg.h> + +#ifndef __ASSEMBLY__ + +#ifdef CONFIG_THREAD_INFO_IN_TASK +struct task_struct; + +static __always_inline struct task_struct *get_current(void) +{ + return (struct task_struct *)read_sysreg(sp_el0); +} +#define current get_current() +#else +#include <linux/thread_info.h> +#define get_current() (current_thread_info()->task) +#define current get_current() +#endif + +#endif /* __ASSEMBLY__ */ + +#endif /* __ASM_CURRENT_H */ + diff --git a/arch/arm64/include/asm/elf.h b/arch/arm64/include/asm/elf.h index 7875c886ad24..8fbc5c7faf70 100644 --- a/arch/arm64/include/asm/elf.h +++ b/arch/arm64/include/asm/elf.h @@ -113,12 +113,11 @@ #define ELF_EXEC_PAGESIZE PAGE_SIZE /* - * This is the location that an ET_DYN program is loaded if exec'ed. Typical - * use of this is to invoke "./ld.so someprog" to test out a new version of - * the loader. We need to make sure that it is out of the way of the program - * that it will "exec", and that there is sufficient room for the brk. + * This is the base location for PIE (ET_DYN with INTERP) loads. On + * 64-bit, this is raised to 4GB to leave the entire 32-bit address + * space open for things that want to use the area for 32-bit pointers. */ -#define ELF_ET_DYN_BASE (2 * TASK_SIZE_64 / 3) +#define ELF_ET_DYN_BASE 0x100000000UL #ifndef __ASSEMBLY__ @@ -169,7 +168,8 @@ extern int arch_setup_additional_pages(struct linux_binprm *bprm, #ifdef CONFIG_COMPAT -#define COMPAT_ELF_ET_DYN_BASE (2 * TASK_SIZE_32 / 3) +/* PIE load location for compat arm. Must match ARM ELF_ET_DYN_BASE. */ +#define COMPAT_ELF_ET_DYN_BASE 0x000400000UL /* AArch32 registers. */ #define COMPAT_ELF_NGREG 18 diff --git a/arch/arm64/include/asm/percpu.h b/arch/arm64/include/asm/percpu.h index 8a336852eeba..2ce1a0262a59 100644 --- a/arch/arm64/include/asm/percpu.h +++ b/arch/arm64/include/asm/percpu.h @@ -16,6 +16,8 @@ #ifndef __ASM_PERCPU_H #define __ASM_PERCPU_H +#include <asm/stack_pointer.h> + static inline void set_my_cpu_offset(unsigned long off) { asm volatile("msr tpidr_el1, %0" :: "r" (off) : "memory"); diff --git a/arch/arm64/include/asm/perf_event.h b/arch/arm64/include/asm/perf_event.h index 7bd3cdb533ea..91b6be092ce2 100644 --- a/arch/arm64/include/asm/perf_event.h +++ b/arch/arm64/include/asm/perf_event.h @@ -17,6 +17,8 @@ #ifndef __ASM_PERF_EVENT_H #define __ASM_PERF_EVENT_H +#include <asm/stack_pointer.h> + #ifdef CONFIG_PERF_EVENTS struct pt_regs; extern unsigned long perf_instruction_pointer(struct pt_regs *regs); diff --git a/arch/arm64/include/asm/smp.h b/arch/arm64/include/asm/smp.h index 2013a4dc5124..a05033beb2a2 100644 --- a/arch/arm64/include/asm/smp.h +++ b/arch/arm64/include/asm/smp.h @@ -16,11 +16,20 @@ #ifndef __ASM_SMP_H #define __ASM_SMP_H +#include <asm/percpu.h> + #include <linux/threads.h> #include <linux/cpumask.h> #include <linux/thread_info.h> -#define raw_smp_processor_id() (current_thread_info()->cpu) +DECLARE_PER_CPU_READ_MOSTLY(int, cpu_number); + +/* + * We don't use this_cpu_read(cpu_number) as that has implicit writes to + * preempt_count, and associated (compiler) barriers, that we'd like to avoid + * the expense of. If we're preemptible, the value can be stale at use anyway. + */ +#define raw_smp_processor_id() (*this_cpu_ptr(&cpu_number)) struct seq_file; @@ -57,6 +66,9 @@ asmlinkage void secondary_start_kernel(void); */ struct secondary_data { void *stack; +#ifdef CONFIG_THREAD_INFO_IN_TASK + struct task_struct *task; +#endif }; extern struct secondary_data secondary_data; extern void secondary_entry(void); diff --git a/arch/arm64/include/asm/stack_pointer.h b/arch/arm64/include/asm/stack_pointer.h new file mode 100644 index 000000000000..ffcdf742cddf --- /dev/null +++ b/arch/arm64/include/asm/stack_pointer.h @@ -0,0 +1,9 @@ +#ifndef __ASM_STACK_POINTER_H +#define __ASM_STACK_POINTER_H + +/* + * how to get the current stack pointer from C + */ +register unsigned long current_stack_pointer asm ("sp"); + +#endif /* __ASM_STACK_POINTER_H */ diff --git a/arch/arm64/include/asm/suspend.h b/arch/arm64/include/asm/suspend.h index 59a5b0f1e81c..4d19a03d316e 100644 --- a/arch/arm64/include/asm/suspend.h +++ b/arch/arm64/include/asm/suspend.h @@ -1,7 +1,7 @@ #ifndef __ASM_SUSPEND_H #define __ASM_SUSPEND_H -#define NR_CTX_REGS 11 +#define NR_CTX_REGS 13 /* * struct cpu_suspend_ctx must be 16-byte aligned since it is allocated on diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h index 794d22603f04..67dd228c3f17 100644 --- a/arch/arm64/include/asm/thread_info.h +++ b/arch/arm64/include/asm/thread_info.h @@ -36,25 +36,36 @@ struct task_struct; +#include <asm/stack_pointer.h> #include <asm/types.h> typedef unsigned long mm_segment_t; /* * low level task data that entry.S needs immediate access to. - * __switch_to() assumes cpu_context follows immediately after cpu_domain. */ struct thread_info { unsigned long flags; /* low level flags */ mm_segment_t addr_limit; /* address limit */ +#ifndef CONFIG_THREAD_INFO_IN_TASK struct task_struct *task; /* main task structure */ +#endif #ifdef CONFIG_ARM64_SW_TTBR0_PAN u64 ttbr0; /* saved TTBR0_EL1 */ #endif int preempt_count; /* 0 => preemptable, <0 => bug */ +#ifndef CONFIG_THREAD_INFO_IN_TASK int cpu; /* cpu */ +#endif }; +#ifdef CONFIG_THREAD_INFO_IN_TASK +#define INIT_THREAD_INFO(tsk) \ +{ \ + .preempt_count = INIT_PREEMPT_COUNT, \ + .addr_limit = KERNEL_DS, \ +} +#else #define INIT_THREAD_INFO(tsk) \ { \ .task = &tsk, \ @@ -63,14 +74,6 @@ struct thread_info { .addr_limit = KERNEL_DS, \ } -#define init_thread_info (init_thread_union.thread_info) -#define init_stack (init_thread_union.stack) - -/* - * how to get the current stack pointer from C - */ -register unsigned long current_stack_pointer asm ("sp"); - /* * how to get the thread information struct from C */ @@ -88,6 +91,11 @@ static inline struct thread_info *current_thread_info(void) return (struct thread_info *)sp_el0; } +#define init_thread_info (init_thread_union.thread_info) +#endif + +#define init_stack (init_thread_union.stack) + #define thread_saved_pc(tsk) \ ((unsigned long)(tsk->thread.cpu_context.pc)) #define thread_saved_sp(tsk) \ diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index b84d8e85d19d..24e65f0897ee 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -33,11 +33,16 @@ int main(void) { DEFINE(TSK_ACTIVE_MM, offsetof(struct task_struct, active_mm)); BLANK(); +#ifdef CONFIG_THREAD_INFO_IN_TASK + DEFINE(TSK_TI_FLAGS, offsetof(struct task_struct, thread_info.flags)); + DEFINE(TSK_TI_PREEMPT, offsetof(struct task_struct, thread_info.preempt_count)); + DEFINE(TSK_TI_ADDR_LIMIT, offsetof(struct task_struct, thread_info.addr_limit)); + DEFINE(TSK_STACK, offsetof(struct task_struct, stack)); +#else DEFINE(TI_FLAGS, offsetof(struct thread_info, flags)); DEFINE(TI_PREEMPT, offsetof(struct thread_info, preempt_count)); DEFINE(TI_ADDR_LIMIT, offsetof(struct thread_info, addr_limit)); - DEFINE(TI_TASK, offsetof(struct thread_info, task)); - DEFINE(TI_CPU, offsetof(struct thread_info, cpu)); +#endif #ifdef CONFIG_ARM64_SW_TTBR0_PAN DEFINE(TSK_TI_TTBR0, offsetof(struct thread_info, ttbr0)); #endif @@ -111,6 +116,11 @@ int main(void) DEFINE(TZ_MINWEST, offsetof(struct timezone, tz_minuteswest)); DEFINE(TZ_DSTTIME, offsetof(struct timezone, tz_dsttime)); BLANK(); +#ifdef CONFIG_THREAD_INFO_IN_TASK + DEFINE(CPU_BOOT_STACK, offsetof(struct secondary_data, stack)); + DEFINE(CPU_BOOT_TASK, offsetof(struct secondary_data, task)); + BLANK(); +#endif #ifdef CONFIG_KVM_ARM_HOST DEFINE(VCPU_CONTEXT, offsetof(struct kvm_vcpu, arch.ctxt)); DEFINE(CPU_GP_REGS, offsetof(struct kvm_cpu_context, gp_regs)); diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 3f9d78612e57..dba3aceaed2f 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -93,9 +93,14 @@ .if \el == 0 mrs x21, sp_el0 +#ifdef CONFIG_THREAD_INFO_IN_TASK + ldr_this_cpu tsk, __entry_task, x20 // Ensure MDSCR_EL1.SS is clear, + ldr x19, [tsk, #TSK_TI_FLAGS] // since we can unmask debug +#else mov tsk, sp and tsk, tsk, #~(THREAD_SIZE - 1) // Ensure MDSCR_EL1.SS is clear, ldr x19, [tsk, #TI_FLAGS] // since we can unmask debug +#endif disable_step_tsk x19, x20 // exceptions when scheduling. mov x29, xzr // fp pointed to user-space @@ -103,10 +108,18 @@ add x21, sp, #S_FRAME_SIZE get_thread_info tsk /* Save the task's original addr_limit and set USER_DS (TASK_SIZE_64) */ +#ifdef CONFIG_THREAD_INFO_IN_TASK + ldr x20, [tsk, #TSK_TI_ADDR_LIMIT] +#else ldr x20, [tsk, #TI_ADDR_LIMIT] +#endif str x20, [sp, #S_ORIG_ADDR_LIMIT] mov x20, #TASK_SIZE_64 +#ifdef CONFIG_THREAD_INFO_IN_TASK + str x20, [tsk, #TSK_TI_ADDR_LIMIT] +#else str x20, [tsk, #TI_ADDR_LIMIT] +#endif ALTERNATIVE(nop, SET_PSTATE_UAO(0), ARM64_HAS_UAO, CONFIG_ARM64_UAO) .endif /* \el == 0 */ mrs x22, elr_el1 @@ -168,7 +181,11 @@ alternative_else_nop_endif .if \el != 0 /* Restore the task's original addr_limit. */ ldr x20, [sp, #S_ORIG_ADDR_LIMIT] +#ifdef CONFIG_THREAD_INFO_IN_TASK + str x20, [tsk, #TSK_TI_ADDR_LIMIT] +#else str x20, [tsk, #TI_ADDR_LIMIT] +#endif /* No need to restore UAO, it will be restored from SPSR_EL1 */ .endif @@ -258,15 +275,22 @@ alternative_endif mov x19, sp // preserve the original sp /* - * Compare sp with the current thread_info, if the top - * ~(THREAD_SIZE - 1) bits match, we are on a task stack, and - * should switch to the irq stack. + * Compare sp with the base of the task stack. + * If the top ~(THREAD_SIZE - 1) bits match, we are on a task stack, + * and should switch to the irq stack. */ +#ifdef CONFIG_THREAD_INFO_IN_TASK + ldr x25, [tsk, TSK_STACK] + eor x25, x25, x19 + and x25, x25, #~(THREAD_SIZE - 1) + cbnz x25, 9998f +#else and x25, x19, #~(THREAD_SIZE - 1) cmp x25, tsk b.ne 9998f +#endif - this_cpu_ptr irq_stack, x25, x26 + adr_this_cpu x25, irq_stack, x26 mov x26, #IRQ_STACK_START_SP add x26, x25, x26 @@ -493,9 +517,17 @@ el1_irq: irq_handler #ifdef CONFIG_PREEMPT +#ifdef CONFIG_THREAD_INFO_IN_TASK + ldr w24, [tsk, #TSK_TI_PREEMPT] // get preempt count +#else ldr w24, [tsk, #TI_PREEMPT] // get preempt count +#endif cbnz w24, 1f // preempt count != 0 +#ifdef CONFIG_THREAD_INFO_IN_TASK + ldr x0, [tsk, #TSK_TI_FLAGS] // get flags +#else ldr x0, [tsk, #TI_FLAGS] // get flags +#endif tbz x0, #TIF_NEED_RESCHED, 1f // needs rescheduling? bl el1_preempt 1: @@ -510,7 +542,11 @@ ENDPROC(el1_irq) el1_preempt: mov x24, lr 1: bl preempt_schedule_irq // irq en/disable is done inside +#ifdef CONFIG_THREAD_INFO_IN_TASK + ldr x0, [tsk, #TSK_TI_FLAGS] // get new tasks TI_FLAGS +#else ldr x0, [tsk, #TI_FLAGS] // get new tasks TI_FLAGS +#endif tbnz x0, #TIF_NEED_RESCHED, 1b // needs rescheduling? ret x24 #endif @@ -730,8 +766,12 @@ ENTRY(cpu_switch_to) ldp x29, x9, [x8], #16 ldr lr, [x8] mov sp, x9 +#ifdef CONFIG_THREAD_INFO_IN_TASK + msr sp_el0, x1 +#else and x9, x9, #~(THREAD_SIZE - 1) msr sp_el0, x9 +#endif ret ENDPROC(cpu_switch_to) @@ -742,7 +782,11 @@ ENDPROC(cpu_switch_to) ret_fast_syscall: disable_irq // disable interrupts str x0, [sp, #S_X0] // returned x0 +#ifdef CONFIG_THREAD_INFO_IN_TASK + ldr x1, [tsk, #TSK_TI_FLAGS] // re-check for syscall tracing +#else ldr x1, [tsk, #TI_FLAGS] // re-check for syscall tracing +#endif and x2, x1, #_TIF_SYSCALL_WORK cbnz x2, ret_fast_syscall_trace and x2, x1, #_TIF_WORK_MASK @@ -774,7 +818,11 @@ work_resched: */ ret_to_user: disable_irq // disable interrupts +#ifdef CONFIG_THREAD_INFO_IN_TASK + ldr x1, [tsk, #TSK_TI_FLAGS] +#else ldr x1, [tsk, #TI_FLAGS] +#endif and x2, x1, #_TIF_WORK_MASK cbnz x2, work_pending enable_step_tsk x1, x2 @@ -806,7 +854,11 @@ el0_svc_naked: // compat entry point enable_dbg_and_irq ct_user_exit 1 +#ifdef CONFIG_THREAD_INFO_IN_TASK + ldr x16, [tsk, #TSK_TI_FLAGS] // check for syscall hooks +#else ldr x16, [tsk, #TI_FLAGS] // check for syscall hooks +#endif tst x16, #_TIF_SYSCALL_WORK b.ne __sys_trace cmp scno, sc_nr // check upper syscall limit diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 99710399aa38..16d0820fff3a 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -424,6 +424,7 @@ kernel_img_size: .set initial_sp, init_thread_union + THREAD_START_SP __mmap_switched: mov x28, lr // preserve LR + adr_l x8, vectors // load VBAR_EL1 with virtual msr vbar_el1, x8 // vector table address isb @@ -474,10 +475,18 @@ __mmap_switched: dsb sy // with MMU off #endif +#ifdef CONFIG_THREAD_INFO_IN_TASK + adrp x4, init_thread_union + add sp, x4, #THREAD_SIZE + adr_l x5, init_task + msr sp_el0, x5 // Save thread_info +#else adr_l sp, initial_sp, x4 mov x4, sp and x4, x4, #~(THREAD_SIZE - 1) msr sp_el0, x4 // Save thread_info +#endif + str_l x21, __fdt_pointer, x5 // Save FDT pointer ldr_l x4, kimage_vaddr // Save the offset between @@ -689,11 +698,18 @@ ENTRY(__secondary_switched) adr_l x5, vectors msr vbar_el1, x5 isb - +#ifdef CONFIG_THREAD_INFO_IN_TASK + adr_l x0, secondary_data + ldr x1, [x0, #CPU_BOOT_STACK] // get secondary_data.stack + mov sp, x1 + ldr x2, [x0, #CPU_BOOT_TASK] + msr sp_el0, x2 +#else ldr_l x0, secondary_data // get secondary_data.stack mov sp, x0 and x0, x0, #~(THREAD_SIZE - 1) msr sp_el0, x0 // save thread_info +#endif mov x29, #0 b secondary_start_kernel ENDPROC(__secondary_switched) diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index e6afea67f6c1..e34bcf3f2c35 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -45,6 +45,9 @@ #include <linux/personality.h> #include <linux/notifier.h> #include <trace/events/power.h> +#ifdef CONFIG_THREAD_INFO_IN_TASK +#include <linux/percpu.h> +#endif #include <asm/alternative.h> #include <asm/compat.h> @@ -390,6 +393,22 @@ void uao_thread_switch(struct task_struct *next) } } +#ifdef CONFIG_THREAD_INFO_IN_TASK +/* + * We store our current task in sp_el0, which is clobbered by userspace. Keep a + * shadow copy so that we can restore this upon entry from userspace. + * + * This is *only* for exception entry from EL0, and is not valid until we + * __switch_to() a user task. + */ +DEFINE_PER_CPU(struct task_struct *, __entry_task); + +static void entry_task_switch(struct task_struct *next) +{ + __this_cpu_write(__entry_task, next); +} +#endif + /* * Thread switching. */ @@ -402,6 +421,9 @@ struct task_struct *__switch_to(struct task_struct *prev, tls_thread_switch(next); hw_breakpoint_thread_switch(next); contextidr_thread_switch(next); +#ifdef CONFIG_THREAD_INFO_IN_TASK + entry_task_switch(next); +#endif uao_thread_switch(next); /* @@ -419,27 +441,35 @@ struct task_struct *__switch_to(struct task_struct *prev, unsigned long get_wchan(struct task_struct *p) { struct stackframe frame; - unsigned long stack_page; + unsigned long stack_page, ret = 0; int count = 0; if (!p || p == current || p->state == TASK_RUNNING) return 0; + stack_page = (unsigned long)try_get_task_stack(p); + if (!stack_page) + return 0; + frame.fp = thread_saved_fp(p); frame.sp = thread_saved_sp(p); frame.pc = thread_saved_pc(p); #ifdef CONFIG_FUNCTION_GRAPH_TRACER frame.graph = p->curr_ret_stack; #endif - stack_page = (unsigned long)task_stack_page(p); do { if (frame.sp < stack_page || frame.sp >= stack_page + THREAD_SIZE || unwind_frame(p, &frame)) - return 0; - if (!in_sched_functions(frame.pc)) - return frame.pc; + goto out; + if (!in_sched_functions(frame.pc)) { + ret = frame.pc; + goto out; + } } while (count ++ < 16); - return 0; + +out: + put_task_stack(p); + return ret; } unsigned long arch_align_stack(unsigned long sp) diff --git a/arch/arm64/kernel/return_address.c b/arch/arm64/kernel/return_address.c index 1718706fde83..12a87f2600f2 100644 --- a/arch/arm64/kernel/return_address.c +++ b/arch/arm64/kernel/return_address.c @@ -12,6 +12,7 @@ #include <linux/export.h> #include <linux/ftrace.h> +#include <asm/stack_pointer.h> #include <asm/stacktrace.h> struct return_address_data { diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 6591bf23422b..ede6cc373f43 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -349,11 +349,15 @@ void __init setup_arch(char **cmdline_p) #ifdef CONFIG_ARM64_SW_TTBR0_PAN /* - * Make sure init_thread_info.ttbr0 always generates translation + * Make sure thread_info.ttbr0 always generates translation * faults in case uaccess_enable() is inadvertently called by the init * thread. */ - init_thread_info.ttbr0 = virt_to_phys(empty_zero_page); +#ifdef CONFIG_THREAD_INFO_IN_TASK + init_task.thread_info.ttbr0 = virt_to_phys(empty_zero_page); +#else + init_thread_info.ttbr0 = (init_thread_union.thread_info); +#endif #endif #ifdef CONFIG_VT diff --git a/arch/arm64/kernel/sleep.S b/arch/arm64/kernel/sleep.S index e33fe33876ab..f586f7c875e2 100644 --- a/arch/arm64/kernel/sleep.S +++ b/arch/arm64/kernel/sleep.S @@ -173,9 +173,6 @@ ENTRY(cpu_resume) /* load physical address of identity map page table in x1 */ adrp x1, idmap_pg_dir mov sp, x2 - /* save thread_info */ - and x2, x2, #~(THREAD_SIZE - 1) - msr sp_el0, x2 /* * cpu_do_resume expects x0 to contain context physical address * pointer and x1 to contain physical address of 1:1 page tables diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index a84623d91410..ac899acec6eb 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -57,6 +57,9 @@ #define CREATE_TRACE_POINTS #include <trace/events/ipi.h> +DEFINE_PER_CPU_READ_MOSTLY(int, cpu_number); +EXPORT_PER_CPU_SYMBOL(cpu_number); + /* * as from 2.5, kernels no longer have an init_tasks structure * so we need some other way of telling a new secondary core @@ -95,6 +98,9 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle) * We need to tell the secondary core where to find its stack and the * page tables. */ +#ifdef CONFIG_THREAD_INFO_IN_TASK + secondary_data.task = idle; +#endif secondary_data.stack = task_stack_page(idle) + THREAD_START_SP; __flush_dcache_area(&secondary_data, sizeof(secondary_data)); @@ -118,6 +124,9 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle) pr_err("CPU%u: failed to boot: %d\n", cpu, ret); } +#ifdef CONFIG_THREAD_INFO_IN_TASK + secondary_data.task = NULL; +#endif secondary_data.stack = NULL; return ret; @@ -135,7 +144,10 @@ static void smp_store_cpu_info(unsigned int cpuid) asmlinkage void secondary_start_kernel(void) { struct mm_struct *mm = &init_mm; - unsigned int cpu = smp_processor_id(); + unsigned int cpu; + + cpu = task_cpu(current); + set_my_cpu_offset(per_cpu_offset(cpu)); /* * All kernel threads share the same mm context; grab a @@ -144,8 +156,6 @@ asmlinkage void secondary_start_kernel(void) atomic_inc(&mm->mm_count); current->active_mm = mm; - set_my_cpu_offset(per_cpu_offset(smp_processor_id())); - /* * TTBR0 is only used for the identity mapping at this stage. Make it * point to zero page to avoid speculatively fetching new entries. @@ -607,6 +617,8 @@ void __init smp_prepare_cpus(unsigned int max_cpus) if (max_cpus == 0) break; + per_cpu(cpu_number, cpu) = cpu; + if (cpu == smp_processor_id()) continue; diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c index cfd46c227c8c..4faaf1af88fd 100644 --- a/arch/arm64/kernel/stacktrace.c +++ b/arch/arm64/kernel/stacktrace.c @@ -22,6 +22,7 @@ #include <linux/stacktrace.h> #include <asm/irq.h> +#include <asm/stack_pointer.h> #include <asm/stacktrace.h> /* @@ -125,7 +126,6 @@ void notrace walk_stackframe(struct task_struct *tsk, struct stackframe *frame, break; } } -EXPORT_SYMBOL(walk_stackframe); #ifdef CONFIG_STACKTRACE struct stack_trace_data { @@ -157,6 +157,9 @@ void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) struct stack_trace_data data; struct stackframe frame; + if (!try_get_task_stack(tsk)) + return; + data.trace = trace; data.skip = trace->skip; @@ -178,6 +181,8 @@ void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) walk_stackframe(tsk, &frame, save_trace, &data); if (trace->nr_entries < trace->max_entries) trace->entries[trace->nr_entries++] = ULONG_MAX; + + put_task_stack(tsk); } void save_stack_trace(struct stack_trace *trace) diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c index f42b8b8f1d0a..e7a96462ca2d 100644 --- a/arch/arm64/kernel/suspend.c +++ b/arch/arm64/kernel/suspend.c @@ -96,12 +96,6 @@ int cpu_suspend(unsigned long arg, int (*fn)(unsigned long)) cpu_uninstall_idmap(); /* - * Restore per-cpu offset before any kernel - * subsystem relying on it has a chance to run. - */ - set_my_cpu_offset(per_cpu_offset(smp_processor_id())); - - /* * PSTATE was not saved over suspend/resume, re-enable any * detected features that might not have been set correctly. */ diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index f5c82c76cf7c..e63708dc7b86 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -38,6 +38,7 @@ #include <asm/esr.h> #include <asm/insn.h> #include <asm/traps.h> +#include <asm/stack_pointer.h> #include <asm/stacktrace.h> #include <asm/exception.h> #include <asm/system_misc.h> @@ -149,6 +150,14 @@ static void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk) unsigned long irq_stack_ptr; int skip; + pr_debug("%s(regs = %p tsk = %p)\n", __func__, regs, tsk); + + if (!tsk) + tsk = current; + + if (!try_get_task_stack(tsk)) + return; + /* * Switching between stacks is valid when tracing current and in * non-preemptible context. @@ -219,6 +228,8 @@ static void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk) stack + sizeof(struct pt_regs), false); } } + + put_task_stack(tsk); } void show_stack(struct task_struct *tsk, unsigned long *sp) @@ -234,10 +245,9 @@ void show_stack(struct task_struct *tsk, unsigned long *sp) #endif #define S_SMP " SMP" -static int __die(const char *str, int err, struct thread_info *thread, - struct pt_regs *regs) +static int __die(const char *str, int err, struct pt_regs *regs) { - struct task_struct *tsk = thread->task; + struct task_struct *tsk = current; static int die_counter; int ret; @@ -252,7 +262,8 @@ static int __die(const char *str, int err, struct thread_info *thread, print_modules(); __show_regs(regs); pr_emerg("Process %.*s (pid: %d, stack limit = 0x%p)\n", - TASK_COMM_LEN, tsk->comm, task_pid_nr(tsk), thread + 1); + TASK_COMM_LEN, tsk->comm, task_pid_nr(tsk), + end_of_stack(tsk)); if (!user_mode(regs) || in_interrupt()) { dump_mem(KERN_EMERG, "Stack: ", regs->sp, @@ -272,7 +283,6 @@ static DEFINE_RAW_SPINLOCK(die_lock); */ void die(const char *str, struct pt_regs *regs, int err) { - struct thread_info *thread = current_thread_info(); int ret; oops_enter(); @@ -280,9 +290,9 @@ void die(const char *str, struct pt_regs *regs, int err) raw_spin_lock_irq(&die_lock); console_verbose(); bust_spinlocks(1); - ret = __die(str, err, thread, regs); + ret = __die(str, err, regs); - if (regs && kexec_should_crash(thread->task)) + if (regs && kexec_should_crash(current)) crash_kexec(regs); bust_spinlocks(0); diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 2581ede3075a..253bdf2cfe0f 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -66,21 +66,21 @@ void show_pte(struct mm_struct *mm, unsigned long addr) break; pud = pud_offset(pgd, addr); - printk(", *pud=%016llx", pud_val(*pud)); + pr_cont(", *pud=%016llx", pud_val(*pud)); if (pud_none(*pud) || pud_bad(*pud)) break; pmd = pmd_offset(pud, addr); - printk(", *pmd=%016llx", pmd_val(*pmd)); + pr_cont(", *pmd=%016llx", pmd_val(*pmd)); if (pmd_none(*pmd) || pmd_bad(*pmd)) break; pte = pte_offset_map(pmd, addr); - printk(", *pte=%016llx", pte_val(*pte)); + pr_cont(", *pte=%016llx", pte_val(*pte)); pte_unmap(pte); } while(0); - printk("\n"); + pr_cont("\n"); } #ifdef CONFIG_ARM64_HW_AFDBM diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index 85a542b21575..3b3a4710dcd6 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -71,12 +71,15 @@ ENTRY(cpu_do_suspend) mrs x10, mdscr_el1 mrs x11, oslsr_el1 mrs x12, sctlr_el1 + mrs x13, tpidr_el1 + mrs x14, sp_el0 stp x2, x3, [x0] stp x4, x5, [x0, #16] stp x6, x7, [x0, #32] stp x8, x9, [x0, #48] stp x10, x11, [x0, #64] - str x12, [x0, #80] + stp x12, x13, [x0, #80] + str x14, [x0, #96] ret ENDPROC(cpu_do_suspend) @@ -99,7 +102,8 @@ ENTRY(cpu_do_resume) ldp x6, x7, [x0, #32] ldp x8, x9, [x0, #48] ldp x10, x11, [x0, #64] - ldr x12, [x0, #80] + ldp x12, x13, [x0, #80] + ldr x14, [x0, #96] msr tpidr_el0, x2 msr tpidrro_el0, x3 msr contextidr_el1, x4 @@ -111,6 +115,8 @@ ENTRY(cpu_do_resume) msr tcr_el1, x8 msr vbar_el1, x9 msr mdscr_el1, x10 + msr tpidr_el1, x13 + msr sp_el0, x14 /* * Restore oslsr_el1 by writing oslar_el1 */ diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index eb0249e37981..7534fd34f79d 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -45,7 +45,7 @@ config IA64 select GENERIC_SMP_IDLE_THREAD select ARCH_INIT_TASK select ARCH_TASK_STRUCT_ALLOCATOR - select ARCH_THREAD_INFO_ALLOCATOR + select ARCH_THREAD_STACK_ALLOCATOR select ARCH_CLOCKSOURCE_DATA select GENERIC_TIME_VSYSCALL_OLD select SYSCTL_ARCH_UNALIGN_NO_WARN diff --git a/arch/ia64/include/asm/thread_info.h b/arch/ia64/include/asm/thread_info.h index aa995b67c3f5..d1212b84fb83 100644 --- a/arch/ia64/include/asm/thread_info.h +++ b/arch/ia64/include/asm/thread_info.h @@ -48,15 +48,15 @@ struct thread_info { #ifndef ASM_OFFSETS_C /* how to get the thread information struct from C */ #define current_thread_info() ((struct thread_info *) ((char *) current + IA64_TASK_SIZE)) -#define alloc_thread_info_node(tsk, node) \ - ((struct thread_info *) ((char *) (tsk) + IA64_TASK_SIZE)) +#define alloc_thread_stack_node(tsk, node) \ + ((unsigned long *) ((char *) (tsk) + IA64_TASK_SIZE)) #define task_thread_info(tsk) ((struct thread_info *) ((char *) (tsk) + IA64_TASK_SIZE)) #else #define current_thread_info() ((struct thread_info *) 0) -#define alloc_thread_info_node(tsk, node) ((struct thread_info *) 0) +#define alloc_thread_stack_node(tsk, node) ((unsigned long *) 0) #define task_thread_info(tsk) ((struct thread_info *) 0) #endif -#define free_thread_info(ti) /* nothing */ +#define free_thread_stack(ti) /* nothing */ #define task_stack_page(tsk) ((void *)(tsk)) #define __HAVE_THREAD_FUNCTIONS diff --git a/arch/ia64/kernel/init_task.c b/arch/ia64/kernel/init_task.c index f9efe9739d3f..0eaa89f3defd 100644 --- a/arch/ia64/kernel/init_task.c +++ b/arch/ia64/kernel/init_task.c @@ -26,6 +26,7 @@ static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); * handled. This is done by having a special ".data..init_task" section... */ #define init_thread_info init_task_mem.s.thread_info +#define init_stack init_task_mem.stack union { struct { diff --git a/arch/mips/include/asm/branch.h b/arch/mips/include/asm/branch.h index de781cf54bc7..da80878f2c0d 100644 --- a/arch/mips/include/asm/branch.h +++ b/arch/mips/include/asm/branch.h @@ -74,10 +74,7 @@ static inline int compute_return_epc(struct pt_regs *regs) return __microMIPS_compute_return_epc(regs); if (cpu_has_mips16) return __MIPS16e_compute_return_epc(regs); - return regs->cp0_epc; - } - - if (!delay_slot(regs)) { + } else if (!delay_slot(regs)) { regs->cp0_epc += 4; return 0; } diff --git a/arch/mips/kernel/branch.c b/arch/mips/kernel/branch.c index e9fed8ca9b42..71e8f4c0b8da 100644 --- a/arch/mips/kernel/branch.c +++ b/arch/mips/kernel/branch.c @@ -399,7 +399,7 @@ int __MIPS16e_compute_return_epc(struct pt_regs *regs) * * @regs: Pointer to pt_regs * @insn: branch instruction to decode - * @returns: -EFAULT on error and forces SIGBUS, and on success + * @returns: -EFAULT on error and forces SIGILL, and on success * returns 0 or BRANCH_LIKELY_TAKEN as appropriate after * evaluating the branch. * @@ -431,7 +431,7 @@ int __compute_return_epc_for_insn(struct pt_regs *regs, /* Fall through */ case jr_op: if (NO_R6EMU && insn.r_format.func == jr_op) - goto sigill_r6; + goto sigill_r2r6; regs->cp0_epc = regs->regs[insn.r_format.rs]; break; } @@ -446,7 +446,7 @@ int __compute_return_epc_for_insn(struct pt_regs *regs, switch (insn.i_format.rt) { case bltzl_op: if (NO_R6EMU) - goto sigill_r6; + goto sigill_r2r6; case bltz_op: if ((long)regs->regs[insn.i_format.rs] < 0) { epc = epc + 4 + (insn.i_format.simmediate << 2); @@ -459,7 +459,7 @@ int __compute_return_epc_for_insn(struct pt_regs *regs, case bgezl_op: if (NO_R6EMU) - goto sigill_r6; + goto sigill_r2r6; case bgez_op: if ((long)regs->regs[insn.i_format.rs] >= 0) { epc = epc + 4 + (insn.i_format.simmediate << 2); @@ -473,10 +473,8 @@ int __compute_return_epc_for_insn(struct pt_regs *regs, case bltzal_op: case bltzall_op: if (NO_R6EMU && (insn.i_format.rs || - insn.i_format.rt == bltzall_op)) { - ret = -SIGILL; - break; - } + insn.i_format.rt == bltzall_op)) + goto sigill_r2r6; regs->regs[31] = epc + 8; /* * OK we are here either because we hit a NAL @@ -507,10 +505,8 @@ int __compute_return_epc_for_insn(struct pt_regs *regs, case bgezal_op: case bgezall_op: if (NO_R6EMU && (insn.i_format.rs || - insn.i_format.rt == bgezall_op)) { - ret = -SIGILL; - break; - } + insn.i_format.rt == bgezall_op)) + goto sigill_r2r6; regs->regs[31] = epc + 8; /* * OK we are here either because we hit a BAL @@ -556,6 +552,7 @@ int __compute_return_epc_for_insn(struct pt_regs *regs, /* * These are unconditional and in j_format. */ + case jalx_op: case jal_op: regs->regs[31] = regs->cp0_epc + 8; case j_op: @@ -573,7 +570,7 @@ int __compute_return_epc_for_insn(struct pt_regs *regs, */ case beql_op: if (NO_R6EMU) - goto sigill_r6; + goto sigill_r2r6; case beq_op: if (regs->regs[insn.i_format.rs] == regs->regs[insn.i_format.rt]) { @@ -587,7 +584,7 @@ int __compute_return_epc_for_insn(struct pt_regs *regs, case bnel_op: if (NO_R6EMU) - goto sigill_r6; + goto sigill_r2r6; case bne_op: if (regs->regs[insn.i_format.rs] != regs->regs[insn.i_format.rt]) { @@ -601,7 +598,7 @@ int __compute_return_epc_for_insn(struct pt_regs *regs, case blezl_op: /* not really i_format */ if (!insn.i_format.rt && NO_R6EMU) - goto sigill_r6; + goto sigill_r2r6; case blez_op: /* * Compact branches for R6 for the @@ -636,7 +633,7 @@ int __compute_return_epc_for_insn(struct pt_regs *regs, case bgtzl_op: if (!insn.i_format.rt && NO_R6EMU) - goto sigill_r6; + goto sigill_r2r6; case bgtz_op: /* * Compact branches for R6 for the @@ -843,11 +840,12 @@ int __compute_return_epc_for_insn(struct pt_regs *regs, return ret; sigill_dsp: - printk("%s: DSP branch but not DSP ASE - sending SIGBUS.\n", current->comm); - force_sig(SIGBUS, current); + pr_info("%s: DSP branch but not DSP ASE - sending SIGILL.\n", + current->comm); + force_sig(SIGILL, current); return -EFAULT; -sigill_r6: - pr_info("%s: R2 branch but r2-to-r6 emulator is not preset - sending SIGILL.\n", +sigill_r2r6: + pr_info("%s: R2 branch but r2-to-r6 emulator is not present - sending SIGILL.\n", current->comm); force_sig(SIGILL, current); return -EFAULT; diff --git a/arch/mips/kernel/proc.c b/arch/mips/kernel/proc.c index 298b2b773d12..f1fab6ff53e6 100644 --- a/arch/mips/kernel/proc.c +++ b/arch/mips/kernel/proc.c @@ -83,7 +83,7 @@ static int show_cpuinfo(struct seq_file *m, void *v) } seq_printf(m, "isa\t\t\t:"); - if (cpu_has_mips_r1) + if (cpu_has_mips_1) seq_printf(m, " mips1"); if (cpu_has_mips_2) seq_printf(m, "%s", " mips2"); diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c index c95bf18260f8..24c115a0721a 100644 --- a/arch/mips/kernel/ptrace.c +++ b/arch/mips/kernel/ptrace.c @@ -927,7 +927,7 @@ asmlinkage void syscall_trace_leave(struct pt_regs *regs) audit_syscall_exit(regs); if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) - trace_sys_exit(regs, regs->regs[2]); + trace_sys_exit(regs, regs_return_value(regs)); if (test_thread_flag(TIF_SYSCALL_TRACE)) tracehook_report_syscall_exit(regs, 0); diff --git a/arch/mips/kernel/scall32-o32.S b/arch/mips/kernel/scall32-o32.S index 2d23c834ba96..29b0c5f978e4 100644 --- a/arch/mips/kernel/scall32-o32.S +++ b/arch/mips/kernel/scall32-o32.S @@ -372,7 +372,7 @@ EXPORT(sys_call_table) PTR sys_writev PTR sys_cacheflush PTR sys_cachectl - PTR sys_sysmips + PTR __sys_sysmips PTR sys_ni_syscall /* 4150 */ PTR sys_getsid PTR sys_fdatasync diff --git a/arch/mips/kernel/scall64-64.S b/arch/mips/kernel/scall64-64.S index deac63315d0e..a6323a969919 100644 --- a/arch/mips/kernel/scall64-64.S +++ b/arch/mips/kernel/scall64-64.S @@ -312,7 +312,7 @@ EXPORT(sys_call_table) PTR sys_sched_getaffinity PTR sys_cacheflush PTR sys_cachectl - PTR sys_sysmips + PTR __sys_sysmips PTR sys_io_setup /* 5200 */ PTR sys_io_destroy PTR sys_io_getevents diff --git a/arch/mips/kernel/scall64-n32.S b/arch/mips/kernel/scall64-n32.S index ee93d5fe61d7..e0fdca8d3abe 100644 --- a/arch/mips/kernel/scall64-n32.S +++ b/arch/mips/kernel/scall64-n32.S @@ -298,7 +298,7 @@ EXPORT(sysn32_call_table) PTR compat_sys_sched_getaffinity PTR sys_cacheflush PTR sys_cachectl - PTR sys_sysmips + PTR __sys_sysmips PTR compat_sys_io_setup /* 6200 */ PTR sys_io_destroy PTR compat_sys_io_getevents diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S index b77052ec6fb2..87c697181d25 100644 --- a/arch/mips/kernel/scall64-o32.S +++ b/arch/mips/kernel/scall64-o32.S @@ -367,7 +367,7 @@ EXPORT(sys32_call_table) PTR compat_sys_writev PTR sys_cacheflush PTR sys_cachectl - PTR sys_sysmips + PTR __sys_sysmips PTR sys_ni_syscall /* 4150 */ PTR sys_getsid PTR sys_fdatasync diff --git a/arch/mips/kernel/syscall.c b/arch/mips/kernel/syscall.c index 53a7ef9a8f32..4234b2d726c5 100644 --- a/arch/mips/kernel/syscall.c +++ b/arch/mips/kernel/syscall.c @@ -28,6 +28,7 @@ #include <linux/elf.h> #include <asm/asm.h> +#include <asm/asm-eva.h> #include <asm/branch.h> #include <asm/cachectl.h> #include <asm/cacheflush.h> @@ -138,10 +139,12 @@ static inline int mips_atomic_set(unsigned long addr, unsigned long new) __asm__ __volatile__ ( " .set "MIPS_ISA_ARCH_LEVEL" \n" " li %[err], 0 \n" - "1: ll %[old], (%[addr]) \n" + "1: \n" + user_ll("%[old]", "(%[addr])") " move %[tmp], %[new] \n" - "2: sc %[tmp], (%[addr]) \n" - " bnez %[tmp], 4f \n" + "2: \n" + user_sc("%[tmp]", "(%[addr])") + " beqz %[tmp], 4f \n" "3: \n" " .insn \n" " .subsection 2 \n" @@ -199,6 +202,12 @@ static inline int mips_atomic_set(unsigned long addr, unsigned long new) unreachable(); } +/* + * mips_atomic_set() normally returns directly via syscall_exit potentially + * clobbering static registers, so be sure to preserve them. + */ +save_static_function(sys_sysmips); + SYSCALL_DEFINE3(sysmips, long, cmd, long, arg1, long, arg2) { switch (cmd) { diff --git a/arch/mips/math-emu/cp1emu.c b/arch/mips/math-emu/cp1emu.c index 734a2c7665ec..6da2e4a6ba39 100644 --- a/arch/mips/math-emu/cp1emu.c +++ b/arch/mips/math-emu/cp1emu.c @@ -2496,6 +2496,35 @@ dcopuop: return 0; } +/* + * Emulate FPU instructions. + * + * If we use FPU hardware, then we have been typically called to handle + * an unimplemented operation, such as where an operand is a NaN or + * denormalized. In that case exit the emulation loop after a single + * iteration so as to let hardware execute any subsequent instructions. + * + * If we have no FPU hardware or it has been disabled, then continue + * emulating floating-point instructions until one of these conditions + * has occurred: + * + * - a non-FPU instruction has been encountered, + * + * - an attempt to emulate has ended with a signal, + * + * - the ISA mode has been switched. + * + * We need to terminate the emulation loop if we got switched to the + * MIPS16 mode, whether supported or not, so that we do not attempt + * to emulate a MIPS16 instruction as a regular MIPS FPU instruction. + * Similarly if we got switched to the microMIPS mode and only the + * regular MIPS mode is supported, so that we do not attempt to emulate + * a microMIPS instruction as a regular MIPS FPU instruction. Or if + * we got switched to the regular MIPS mode and only the microMIPS mode + * is supported, so that we do not attempt to emulate a regular MIPS + * instruction that should cause an Address Error exception instead. + * For simplicity we always terminate upon an ISA mode switch. + */ int fpu_emulator_cop1Handler(struct pt_regs *xcp, struct mips_fpu_struct *ctx, int has_fpu, void *__user *fault_addr) { @@ -2581,6 +2610,15 @@ int fpu_emulator_cop1Handler(struct pt_regs *xcp, struct mips_fpu_struct *ctx, break; if (sig) break; + /* + * We have to check for the ISA bit explicitly here, + * because `get_isa16_mode' may return 0 if support + * for code compression has been globally disabled, + * or otherwise we may produce the wrong signal or + * even proceed successfully where we must not. + */ + if ((xcp->cp0_epc ^ prevepc) & 0x1) + break; cond_resched(); } while (xcp->cp0_epc > prevepc); diff --git a/arch/mn10300/include/asm/thread_info.h b/arch/mn10300/include/asm/thread_info.h index 4861a78c7160..f5f90bbf019d 100644 --- a/arch/mn10300/include/asm/thread_info.h +++ b/arch/mn10300/include/asm/thread_info.h @@ -115,7 +115,7 @@ static inline unsigned long current_stack_pointer(void) } #ifndef CONFIG_KGDB -void arch_release_thread_info(struct thread_info *ti); +void arch_release_thread_stack(unsigned long *stack); #endif #define get_thread_info(ti) get_task_struct((ti)->task) #define put_thread_info(ti) put_task_struct((ti)->task) diff --git a/arch/mn10300/kernel/kgdb.c b/arch/mn10300/kernel/kgdb.c index 99770823451a..2d7986c386fe 100644 --- a/arch/mn10300/kernel/kgdb.c +++ b/arch/mn10300/kernel/kgdb.c @@ -397,8 +397,9 @@ static bool kgdb_arch_undo_singlestep(struct pt_regs *regs) * single-step state is cleared. At this point the breakpoints should have * been removed by __switch_to(). */ -void arch_release_thread_info(struct thread_info *ti) +void arch_release_thread_stack(unsigned long *stack) { + struct thread_info *ti = (void *)stack; if (kgdb_sstep_thread == ti) { kgdb_sstep_thread = NULL; diff --git a/arch/openrisc/kernel/vmlinux.lds.S b/arch/openrisc/kernel/vmlinux.lds.S index 2d69a853b742..3a08b55609b6 100644 --- a/arch/openrisc/kernel/vmlinux.lds.S +++ b/arch/openrisc/kernel/vmlinux.lds.S @@ -38,6 +38,8 @@ SECTIONS /* Read-only sections, merged into text segment: */ . = LOAD_BASE ; + _text = .; + /* _s_kernel_ro must be page aligned */ . = ALIGN(PAGE_SIZE); _s_kernel_ro = .; diff --git a/arch/parisc/include/asm/dma-mapping.h b/arch/parisc/include/asm/dma-mapping.h index d8d60a57183f..f53725202955 100644 --- a/arch/parisc/include/asm/dma-mapping.h +++ b/arch/parisc/include/asm/dma-mapping.h @@ -39,6 +39,8 @@ struct hppa_dma_ops { ** flush/purge and allocate "regular" cacheable pages for everything. */ +#define DMA_ERROR_CODE (~(dma_addr_t)0) + #ifdef CONFIG_PA11 extern struct hppa_dma_ops pcxl_dma_ops; extern struct hppa_dma_ops pcx_dma_ops; @@ -209,12 +211,13 @@ parisc_walk_tree(struct device *dev) break; } } - BUG_ON(!dev->platform_data); return dev->platform_data; } - -#define GET_IOC(dev) (HBA_DATA(parisc_walk_tree(dev))->iommu) - + +#define GET_IOC(dev) ({ \ + void *__pdata = parisc_walk_tree(dev); \ + __pdata ? HBA_DATA(__pdata)->iommu : NULL; \ +}) #ifdef CONFIG_IOMMU_CCIO struct parisc_device; diff --git a/arch/parisc/include/asm/mmu_context.h b/arch/parisc/include/asm/mmu_context.h index 59be25764433..a81226257878 100644 --- a/arch/parisc/include/asm/mmu_context.h +++ b/arch/parisc/include/asm/mmu_context.h @@ -49,15 +49,26 @@ static inline void load_context(mm_context_t context) mtctl(__space_to_prot(context), 8); } -static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk) +static inline void switch_mm_irqs_off(struct mm_struct *prev, + struct mm_struct *next, struct task_struct *tsk) { - if (prev != next) { mtctl(__pa(next->pgd), 25); load_context(next->context); } } +static inline void switch_mm(struct mm_struct *prev, + struct mm_struct *next, struct task_struct *tsk) +{ + unsigned long flags; + + local_irq_save(flags); + switch_mm_irqs_off(prev, next, tsk); + local_irq_restore(flags); +} +#define switch_mm_irqs_off switch_mm_irqs_off + #define deactivate_mm(tsk,mm) do { } while (0) static inline void activate_mm(struct mm_struct *prev, struct mm_struct *next) diff --git a/arch/parisc/kernel/syscall_table.S b/arch/parisc/kernel/syscall_table.S index d4ffcfbc9885..041e1f9ec129 100644 --- a/arch/parisc/kernel/syscall_table.S +++ b/arch/parisc/kernel/syscall_table.S @@ -361,7 +361,7 @@ ENTRY_SAME(ni_syscall) /* 263: reserved for vserver */ ENTRY_SAME(add_key) ENTRY_SAME(request_key) /* 265 */ - ENTRY_SAME(keyctl) + ENTRY_COMP(keyctl) ENTRY_SAME(ioprio_set) ENTRY_SAME(ioprio_get) ENTRY_SAME(inotify_init) diff --git a/arch/parisc/mm/fault.c b/arch/parisc/mm/fault.c index f9064449908a..d8c2f3bcfc18 100644 --- a/arch/parisc/mm/fault.c +++ b/arch/parisc/mm/fault.c @@ -303,7 +303,7 @@ bad_area: case 15: /* Data TLB miss fault/Data page fault */ /* send SIGSEGV when outside of vma */ if (!vma || - address < vma->vm_start || address > vma->vm_end) { + address < vma->vm_start || address >= vma->vm_end) { si.si_signo = SIGSEGV; si.si_code = SEGV_MAPERR; break; diff --git a/arch/powerpc/include/asm/atomic.h b/arch/powerpc/include/asm/atomic.h index 55f106ed12bf..039c4b910615 100644 --- a/arch/powerpc/include/asm/atomic.h +++ b/arch/powerpc/include/asm/atomic.h @@ -460,7 +460,7 @@ static __inline__ int atomic64_add_unless(atomic64_t *v, long a, long u) * Atomically increments @v by 1, so long as @v is non-zero. * Returns non-zero if @v was non-zero, and zero otherwise. */ -static __inline__ long atomic64_inc_not_zero(atomic64_t *v) +static __inline__ int atomic64_inc_not_zero(atomic64_t *v) { long t1, t2; @@ -479,7 +479,7 @@ static __inline__ long atomic64_inc_not_zero(atomic64_t *v) : "r" (&v->counter) : "cc", "xer", "memory"); - return t1; + return t1 != 0; } #endif /* __powerpc64__ */ diff --git a/arch/powerpc/include/asm/elf.h b/arch/powerpc/include/asm/elf.h index ee46ffef608e..743ad7a400d6 100644 --- a/arch/powerpc/include/asm/elf.h +++ b/arch/powerpc/include/asm/elf.h @@ -23,12 +23,13 @@ #define CORE_DUMP_USE_REGSET #define ELF_EXEC_PAGESIZE PAGE_SIZE -/* This is the location that an ET_DYN program is loaded if exec'ed. Typical - use of this is to invoke "./ld.so someprog" to test out a new version of - the loader. We need to make sure that it is out of the way of the program - that it will "exec", and that there is sufficient room for the brk. */ - -#define ELF_ET_DYN_BASE 0x20000000 +/* + * This is the base location for PIE (ET_DYN with INTERP) loads. On + * 64-bit, this is raised to 4GB to leave the entire 32-bit address + * space open for things that want to use the area for 32-bit pointers. + */ +#define ELF_ET_DYN_BASE (is_32bit_task() ? 0x000400000UL : \ + 0x100000000UL) #define ELF_CORE_EFLAGS (is_elf2_task() ? 2 : 0) diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index 627d129d7fcb..ca372bbc0ffe 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -1236,7 +1236,7 @@ static inline unsigned long mfvtb (void) " .llong 0\n" \ ".previous" \ : "=r" (rval) \ - : "i" (CPU_FTR_CELL_TB_BUG), "i" (SPRN_TBRL)); \ + : "i" (CPU_FTR_CELL_TB_BUG), "i" (SPRN_TBRL) : "cr0"); \ rval;}) #else #define mftb() ({unsigned long rval; \ diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h index 329771559cbb..8b3b46b7b0f2 100644 --- a/arch/powerpc/include/asm/topology.h +++ b/arch/powerpc/include/asm/topology.h @@ -44,22 +44,8 @@ extern void __init dump_numa_cpu_topology(void); extern int sysfs_add_device_to_node(struct device *dev, int nid); extern void sysfs_remove_device_from_node(struct device *dev, int nid); -static inline int early_cpu_to_node(int cpu) -{ - int nid; - - nid = numa_cpu_lookup_table[cpu]; - - /* - * Fall back to node 0 if nid is unset (it should be, except bugs). - * This allows callers to safely do NODE_DATA(early_cpu_to_node(cpu)). - */ - return (nid < 0) ? 0 : nid; -} #else -static inline int early_cpu_to_node(int cpu) { return 0; } - static inline void dump_numa_cpu_topology(void) {} static inline int sysfs_add_device_to_node(struct device *dev, int nid) diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index fe6e800c1357..a20823210ac0 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -751,7 +751,7 @@ void __init setup_arch(char **cmdline_p) static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, size_t align) { - return __alloc_bootmem_node(NODE_DATA(early_cpu_to_node(cpu)), size, align, + return __alloc_bootmem_node(NODE_DATA(cpu_to_node(cpu)), size, align, __pa(MAX_DMA_ADDRESS)); } @@ -762,7 +762,7 @@ static void __init pcpu_fc_free(void *ptr, size_t size) static int pcpu_cpu_distance(unsigned int from, unsigned int to) { - if (early_cpu_to_node(from) == early_cpu_to_node(to)) + if (cpu_to_node(from) == cpu_to_node(to)) return LOCAL_DISTANCE; else return REMOTE_DISTANCE; diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 396dc44e783b..428563b195c3 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -2687,6 +2687,10 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu) { int r; int srcu_idx; + unsigned long ebb_regs[3] = {}; /* shut up GCC */ + unsigned long user_tar = 0; + unsigned long proc_fscr = 0; + unsigned int user_vrsave; if (!vcpu->arch.sane) { run->exit_reason = KVM_EXIT_INTERNAL_ERROR; @@ -2707,10 +2711,11 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu) run->fail_entry.hardware_entry_failure_reason = 0; return -EINVAL; } + /* Enable TM so we can read the TM SPRs */ + mtmsr(mfmsr() | MSR_TM); current->thread.tm_tfhar = mfspr(SPRN_TFHAR); current->thread.tm_tfiar = mfspr(SPRN_TFIAR); current->thread.tm_texasr = mfspr(SPRN_TEXASR); - current->thread.regs->msr &= ~MSR_TM; } #endif @@ -2736,6 +2741,17 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu) flush_fp_to_thread(current); flush_altivec_to_thread(current); flush_vsx_to_thread(current); + + /* Save userspace EBB and other register values */ + if (cpu_has_feature(CPU_FTR_ARCH_207S)) { + ebb_regs[0] = mfspr(SPRN_EBBHR); + ebb_regs[1] = mfspr(SPRN_EBBRR); + ebb_regs[2] = mfspr(SPRN_BESCR); + user_tar = mfspr(SPRN_TAR); + proc_fscr = mfspr(SPRN_FSCR); + } + user_vrsave = mfspr(SPRN_VRSAVE); + vcpu->arch.wqp = &vcpu->arch.vcore->wq; vcpu->arch.pgdir = current->mm->pgd; vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST; @@ -2757,6 +2773,29 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu) } } while (is_kvmppc_resume_guest(r)); + /* Restore userspace EBB and other register values */ + if (cpu_has_feature(CPU_FTR_ARCH_207S)) { + mtspr(SPRN_EBBHR, ebb_regs[0]); + mtspr(SPRN_EBBRR, ebb_regs[1]); + mtspr(SPRN_BESCR, ebb_regs[2]); + mtspr(SPRN_TAR, user_tar); + mtspr(SPRN_FSCR, proc_fscr); + } + mtspr(SPRN_VRSAVE, user_vrsave); + + /* + * Since we don't do lazy TM reload, we need to reload + * the TM registers here. + */ +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + if (cpu_has_feature(CPU_FTR_TM) && current->thread.regs && + (current->thread.regs->msr & MSR_TM)) { + mtspr(SPRN_TFHAR, current->thread.tm_tfhar); + mtspr(SPRN_TFIAR, current->thread.tm_tfiar); + mtspr(SPRN_TEXASR, current->thread.tm_texasr); + } +#endif + out: vcpu->arch.state = KVMPPC_VCPU_NOTREADY; atomic_dec(&vcpu->kvm->arch.vcpus_running); diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 1a743f87b37d..ffab9269bfe4 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -36,6 +36,13 @@ #define NAPPING_CEDE 1 #define NAPPING_NOVCPU 2 +/* Stack frame offsets for kvmppc_hv_entry */ +#define SFS 112 +#define STACK_SLOT_TRAP (SFS-4) +#define STACK_SLOT_CIABR (SFS-16) +#define STACK_SLOT_DAWR (SFS-24) +#define STACK_SLOT_DAWRX (SFS-32) + /* * Call kvmppc_hv_entry in real mode. * Must be called with interrupts hard-disabled. @@ -274,10 +281,10 @@ kvm_novcpu_exit: bl kvmhv_accumulate_time #endif 13: mr r3, r12 - stw r12, 112-4(r1) + stw r12, STACK_SLOT_TRAP(r1) bl kvmhv_commence_exit nop - lwz r12, 112-4(r1) + lwz r12, STACK_SLOT_TRAP(r1) b kvmhv_switch_to_host /* @@ -489,7 +496,7 @@ kvmppc_hv_entry: */ mflr r0 std r0, PPC_LR_STKOFF(r1) - stdu r1, -112(r1) + stdu r1, -SFS(r1) /* Save R1 in the PACA */ std r1, HSTATE_HOST_R1(r13) @@ -643,6 +650,16 @@ kvmppc_got_guest: mtspr SPRN_PURR,r7 mtspr SPRN_SPURR,r8 + /* Save host values of some registers */ +BEGIN_FTR_SECTION + mfspr r5, SPRN_CIABR + mfspr r6, SPRN_DAWR + mfspr r7, SPRN_DAWRX + std r5, STACK_SLOT_CIABR(r1) + std r6, STACK_SLOT_DAWR(r1) + std r7, STACK_SLOT_DAWRX(r1) +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) + BEGIN_FTR_SECTION /* Set partition DABR */ /* Do this before re-enabling PMU to avoid P7 DABR corruption bug */ @@ -1266,8 +1283,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) */ li r0, 0 mtspr SPRN_IAMR, r0 - mtspr SPRN_CIABR, r0 - mtspr SPRN_DAWRX, r0 + mtspr SPRN_PSPB, r0 mtspr SPRN_TCSCR, r0 mtspr SPRN_WORT, r0 /* Set MMCRS to 1<<31 to freeze and disable the SPMC counters */ @@ -1283,6 +1299,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) std r6,VCPU_UAMOR(r9) li r6,0 mtspr SPRN_AMR,r6 + mtspr SPRN_UAMOR, r6 /* Switch DSCR back to host value */ mfspr r8, SPRN_DSCR @@ -1424,6 +1441,16 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) slbia ptesync + /* Restore host values of some registers */ +BEGIN_FTR_SECTION + ld r5, STACK_SLOT_CIABR(r1) + ld r6, STACK_SLOT_DAWR(r1) + ld r7, STACK_SLOT_DAWRX(r1) + mtspr SPRN_CIABR, r5 + mtspr SPRN_DAWR, r6 + mtspr SPRN_DAWRX, r7 +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) + /* * POWER7/POWER8 guest -> host partition switch code. * We don't have to lock against tlbies but we do @@ -1533,8 +1560,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) li r0, KVM_GUEST_MODE_NONE stb r0, HSTATE_IN_GUEST(r13) - ld r0, 112+PPC_LR_STKOFF(r1) - addi r1, r1, 112 + ld r0, SFS+PPC_LR_STKOFF(r1) + addi r1, r1, SFS mtlr r0 blr diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index 4014881e9843..e37162d356d8 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -687,8 +687,10 @@ int __kprobes analyse_instr(struct instruction_op *op, struct pt_regs *regs, case 19: switch ((instr >> 1) & 0x3ff) { case 0: /* mcrf */ - rd = (instr >> 21) & 0x1c; - ra = (instr >> 16) & 0x1c; + rd = 7 - ((instr >> 23) & 0x7); + ra = 7 - ((instr >> 18) & 0x7); + rd *= 4; + ra *= 4; val = (regs->ccr >> ra) & 0xf; regs->ccr = (regs->ccr & ~(0xfUL << rd)) | (val << rd); goto instr_done; @@ -967,6 +969,19 @@ int __kprobes analyse_instr(struct instruction_op *op, struct pt_regs *regs, #endif case 19: /* mfcr */ + if ((instr >> 20) & 1) { + imm = 0xf0000000UL; + for (sh = 0; sh < 8; ++sh) { + if (instr & (0x80000 >> sh)) { + regs->gpr[rd] = regs->ccr & imm; + break; + } + imm >>= 4; + } + + goto instr_done; + } + regs->gpr[rd] = regs->ccr; regs->gpr[rd] &= 0xffffffffUL; goto instr_done; diff --git a/arch/powerpc/platforms/pseries/reconfig.c b/arch/powerpc/platforms/pseries/reconfig.c index 7c7fcc042549..fb695f142563 100644 --- a/arch/powerpc/platforms/pseries/reconfig.c +++ b/arch/powerpc/platforms/pseries/reconfig.c @@ -82,7 +82,6 @@ static int pSeries_reconfig_remove_node(struct device_node *np) of_detach_node(np); of_node_put(parent); - of_node_put(np); /* Must decrement the refcount */ return 0; } diff --git a/arch/s390/include/asm/elf.h b/arch/s390/include/asm/elf.h index bab6739a1154..b9eb7b1a49d2 100644 --- a/arch/s390/include/asm/elf.h +++ b/arch/s390/include/asm/elf.h @@ -154,14 +154,13 @@ extern unsigned int vdso_enabled; #define CORE_DUMP_USE_REGSET #define ELF_EXEC_PAGESIZE 4096 -/* This is the location that an ET_DYN program is loaded if exec'ed. Typical - use of this is to invoke "./ld.so someprog" to test out a new version of - the loader. We need to make sure that it is out of the way of the program - that it will "exec", and that there is sufficient room for the brk. 64-bit - tasks are aligned to 4GB. */ -#define ELF_ET_DYN_BASE (is_32bit_task() ? \ - (STACK_TOP / 3 * 2) : \ - (STACK_TOP / 3 * 2) & ~((1UL << 32) - 1)) +/* + * This is the base location for PIE (ET_DYN with INTERP) loads. On + * 64-bit, this is raised to 4GB to leave the entire 32-bit address + * space open for things that want to use the area for 32-bit pointers. + */ +#define ELF_ET_DYN_BASE (is_compat_task() ? 0x000400000UL : \ + 0x100000000UL) /* This yields a mask that user programs can use to figure out what instruction set this CPU supports. */ diff --git a/arch/s390/include/asm/syscall.h b/arch/s390/include/asm/syscall.h index 6ba0bf928909..6bc941be6921 100644 --- a/arch/s390/include/asm/syscall.h +++ b/arch/s390/include/asm/syscall.h @@ -64,6 +64,12 @@ static inline void syscall_get_arguments(struct task_struct *task, { unsigned long mask = -1UL; + /* + * No arguments for this syscall, there's nothing to do. + */ + if (!n) + return; + BUG_ON(i + n > 6); #ifdef CONFIG_COMPAT if (test_tsk_thread_flag(task, TIF_31BIT)) diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index 0e2919dd8df3..1395eeb6005f 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -1250,7 +1250,8 @@ static int bpf_jit_prog(struct bpf_jit *jit, struct bpf_prog *fp) insn_count = bpf_jit_insn(jit, fp, i); if (insn_count < 0) return -1; - jit->addrs[i + 1] = jit->prg; /* Next instruction address */ + /* Next instruction address */ + jit->addrs[i + insn_count] = jit->prg; } bpf_jit_epilogue(jit); diff --git a/arch/sparc/include/asm/mmu_context_64.h b/arch/sparc/include/asm/mmu_context_64.h index 349dd23e2876..0cdeb2b483a0 100644 --- a/arch/sparc/include/asm/mmu_context_64.h +++ b/arch/sparc/include/asm/mmu_context_64.h @@ -25,9 +25,11 @@ void destroy_context(struct mm_struct *mm); void __tsb_context_switch(unsigned long pgd_pa, struct tsb_config *tsb_base, struct tsb_config *tsb_huge, - unsigned long tsb_descr_pa); + unsigned long tsb_descr_pa, + unsigned long secondary_ctx); -static inline void tsb_context_switch(struct mm_struct *mm) +static inline void tsb_context_switch_ctx(struct mm_struct *mm, + unsigned long ctx) { __tsb_context_switch(__pa(mm->pgd), &mm->context.tsb_block[0], @@ -38,9 +40,12 @@ static inline void tsb_context_switch(struct mm_struct *mm) #else NULL #endif - , __pa(&mm->context.tsb_descr[0])); + , __pa(&mm->context.tsb_descr[0]), + ctx); } +#define tsb_context_switch(X) tsb_context_switch_ctx(X, 0) + void tsb_grow(struct mm_struct *mm, unsigned long tsb_index, unsigned long mm_rss); @@ -110,8 +115,7 @@ static inline void switch_mm(struct mm_struct *old_mm, struct mm_struct *mm, str * cpu0 to update it's TSB because at that point the cpu_vm_mask * only had cpu1 set in it. */ - load_secondary_context(mm); - tsb_context_switch(mm); + tsb_context_switch_ctx(mm, CTX_HWBITS(mm->context)); /* Any time a processor runs a context on an address space * for the first time, we must flush that context out of the diff --git a/arch/sparc/include/asm/trap_block.h b/arch/sparc/include/asm/trap_block.h index ec9c04de3664..ff05992dae7a 100644 --- a/arch/sparc/include/asm/trap_block.h +++ b/arch/sparc/include/asm/trap_block.h @@ -54,6 +54,7 @@ extern struct trap_per_cpu trap_block[NR_CPUS]; void init_cur_cpu_trap(struct thread_info *); void setup_tba(void); extern int ncpus_probed; +extern u64 cpu_mondo_counter[NR_CPUS]; unsigned long real_hard_smp_processor_id(void); diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c index 95a9fa0d2195..4511caa3b7e9 100644 --- a/arch/sparc/kernel/smp_64.c +++ b/arch/sparc/kernel/smp_64.c @@ -617,22 +617,48 @@ retry: } } -/* Multi-cpu list version. */ +#define CPU_MONDO_COUNTER(cpuid) (cpu_mondo_counter[cpuid]) +#define MONDO_USEC_WAIT_MIN 2 +#define MONDO_USEC_WAIT_MAX 100 +#define MONDO_RETRY_LIMIT 500000 + +/* Multi-cpu list version. + * + * Deliver xcalls to 'cnt' number of cpus in 'cpu_list'. + * Sometimes not all cpus receive the mondo, requiring us to re-send + * the mondo until all cpus have received, or cpus are truly stuck + * unable to receive mondo, and we timeout. + * Occasionally a target cpu strand is borrowed briefly by hypervisor to + * perform guest service, such as PCIe error handling. Consider the + * service time, 1 second overall wait is reasonable for 1 cpu. + * Here two in-between mondo check wait time are defined: 2 usec for + * single cpu quick turn around and up to 100usec for large cpu count. + * Deliver mondo to large number of cpus could take longer, we adjusts + * the retry count as long as target cpus are making forward progress. + */ static void hypervisor_xcall_deliver(struct trap_per_cpu *tb, int cnt) { - int retries, this_cpu, prev_sent, i, saw_cpu_error; + int this_cpu, tot_cpus, prev_sent, i, rem; + int usec_wait, retries, tot_retries; + u16 first_cpu = 0xffff; + unsigned long xc_rcvd = 0; unsigned long status; + int ecpuerror_id = 0; + int enocpu_id = 0; u16 *cpu_list; + u16 cpu; this_cpu = smp_processor_id(); - cpu_list = __va(tb->cpu_list_pa); - - saw_cpu_error = 0; - retries = 0; + usec_wait = cnt * MONDO_USEC_WAIT_MIN; + if (usec_wait > MONDO_USEC_WAIT_MAX) + usec_wait = MONDO_USEC_WAIT_MAX; + retries = tot_retries = 0; + tot_cpus = cnt; prev_sent = 0; + do { - int forward_progress, n_sent; + int n_sent, mondo_delivered, target_cpu_busy; status = sun4v_cpu_mondo_send(cnt, tb->cpu_list_pa, @@ -640,94 +666,113 @@ static void hypervisor_xcall_deliver(struct trap_per_cpu *tb, int cnt) /* HV_EOK means all cpus received the xcall, we're done. */ if (likely(status == HV_EOK)) - break; + goto xcall_done; + + /* If not these non-fatal errors, panic */ + if (unlikely((status != HV_EWOULDBLOCK) && + (status != HV_ECPUERROR) && + (status != HV_ENOCPU))) + goto fatal_errors; /* First, see if we made any forward progress. * + * Go through the cpu_list, count the target cpus that have + * received our mondo (n_sent), and those that did not (rem). + * Re-pack cpu_list with the cpus remain to be retried in the + * front - this simplifies tracking the truly stalled cpus. + * * The hypervisor indicates successful sends by setting * cpu list entries to the value 0xffff. + * + * EWOULDBLOCK means some target cpus did not receive the + * mondo and retry usually helps. + * + * ECPUERROR means at least one target cpu is in error state, + * it's usually safe to skip the faulty cpu and retry. + * + * ENOCPU means one of the target cpu doesn't belong to the + * domain, perhaps offlined which is unexpected, but not + * fatal and it's okay to skip the offlined cpu. */ + rem = 0; n_sent = 0; for (i = 0; i < cnt; i++) { - if (likely(cpu_list[i] == 0xffff)) + cpu = cpu_list[i]; + if (likely(cpu == 0xffff)) { n_sent++; + } else if ((status == HV_ECPUERROR) && + (sun4v_cpu_state(cpu) == HV_CPU_STATE_ERROR)) { + ecpuerror_id = cpu + 1; + } else if (status == HV_ENOCPU && !cpu_online(cpu)) { + enocpu_id = cpu + 1; + } else { + cpu_list[rem++] = cpu; + } } - forward_progress = 0; - if (n_sent > prev_sent) - forward_progress = 1; + /* No cpu remained, we're done. */ + if (rem == 0) + break; - prev_sent = n_sent; + /* Otherwise, update the cpu count for retry. */ + cnt = rem; - /* If we get a HV_ECPUERROR, then one or more of the cpus - * in the list are in error state. Use the cpu_state() - * hypervisor call to find out which cpus are in error state. + /* Record the overall number of mondos received by the + * first of the remaining cpus. */ - if (unlikely(status == HV_ECPUERROR)) { - for (i = 0; i < cnt; i++) { - long err; - u16 cpu; + if (first_cpu != cpu_list[0]) { + first_cpu = cpu_list[0]; + xc_rcvd = CPU_MONDO_COUNTER(first_cpu); + } - cpu = cpu_list[i]; - if (cpu == 0xffff) - continue; + /* Was any mondo delivered successfully? */ + mondo_delivered = (n_sent > prev_sent); + prev_sent = n_sent; - err = sun4v_cpu_state(cpu); - if (err == HV_CPU_STATE_ERROR) { - saw_cpu_error = (cpu + 1); - cpu_list[i] = 0xffff; - } - } - } else if (unlikely(status != HV_EWOULDBLOCK)) - goto fatal_mondo_error; + /* or, was any target cpu busy processing other mondos? */ + target_cpu_busy = (xc_rcvd < CPU_MONDO_COUNTER(first_cpu)); + xc_rcvd = CPU_MONDO_COUNTER(first_cpu); - /* Don't bother rewriting the CPU list, just leave the - * 0xffff and non-0xffff entries in there and the - * hypervisor will do the right thing. - * - * Only advance timeout state if we didn't make any - * forward progress. + /* Retry count is for no progress. If we're making progress, + * reset the retry count. */ - if (unlikely(!forward_progress)) { - if (unlikely(++retries > 10000)) - goto fatal_mondo_timeout; - - /* Delay a little bit to let other cpus catch up - * on their cpu mondo queue work. - */ - udelay(2 * cnt); + if (likely(mondo_delivered || target_cpu_busy)) { + tot_retries += retries; + retries = 0; + } else if (unlikely(retries > MONDO_RETRY_LIMIT)) { + goto fatal_mondo_timeout; } - } while (1); - if (unlikely(saw_cpu_error)) - goto fatal_mondo_cpu_error; + /* Delay a little bit to let other cpus catch up on + * their cpu mondo queue work. + */ + if (!mondo_delivered) + udelay(usec_wait); - return; + retries++; + } while (1); -fatal_mondo_cpu_error: - printk(KERN_CRIT "CPU[%d]: SUN4V mondo cpu error, some target cpus " - "(including %d) were in error state\n", - this_cpu, saw_cpu_error - 1); +xcall_done: + if (unlikely(ecpuerror_id > 0)) { + pr_crit("CPU[%d]: SUN4V mondo cpu error, target cpu(%d) was in error state\n", + this_cpu, ecpuerror_id - 1); + } else if (unlikely(enocpu_id > 0)) { + pr_crit("CPU[%d]: SUN4V mondo cpu error, target cpu(%d) does not belong to the domain\n", + this_cpu, enocpu_id - 1); + } return; +fatal_errors: + /* fatal errors include bad alignment, etc */ + pr_crit("CPU[%d]: Args were cnt(%d) cpulist_pa(%lx) mondo_block_pa(%lx)\n", + this_cpu, tot_cpus, tb->cpu_list_pa, tb->cpu_mondo_block_pa); + panic("Unexpected SUN4V mondo error %lu\n", status); + fatal_mondo_timeout: - printk(KERN_CRIT "CPU[%d]: SUN4V mondo timeout, no forward " - " progress after %d retries.\n", - this_cpu, retries); - goto dump_cpu_list_and_out; - -fatal_mondo_error: - printk(KERN_CRIT "CPU[%d]: Unexpected SUN4V mondo error %lu\n", - this_cpu, status); - printk(KERN_CRIT "CPU[%d]: Args were cnt(%d) cpulist_pa(%lx) " - "mondo_block_pa(%lx)\n", - this_cpu, cnt, tb->cpu_list_pa, tb->cpu_mondo_block_pa); - -dump_cpu_list_and_out: - printk(KERN_CRIT "CPU[%d]: CPU list [ ", this_cpu); - for (i = 0; i < cnt; i++) - printk("%u ", cpu_list[i]); - printk("]\n"); + /* some cpus being non-responsive to the cpu mondo */ + pr_crit("CPU[%d]: SUN4V mondo timeout, cpu(%d) made no forward progress after %d retries. Total target cpus(%d).\n", + this_cpu, first_cpu, (tot_retries + retries), tot_cpus); + panic("SUN4V mondo timeout panic\n"); } static void (*xcall_deliver_impl)(struct trap_per_cpu *, int); diff --git a/arch/sparc/kernel/sun4v_ivec.S b/arch/sparc/kernel/sun4v_ivec.S index 559bc5e9c199..34631995859a 100644 --- a/arch/sparc/kernel/sun4v_ivec.S +++ b/arch/sparc/kernel/sun4v_ivec.S @@ -26,6 +26,21 @@ sun4v_cpu_mondo: ldxa [%g0] ASI_SCRATCHPAD, %g4 sub %g4, TRAP_PER_CPU_FAULT_INFO, %g4 + /* Get smp_processor_id() into %g3 */ + sethi %hi(trap_block), %g5 + or %g5, %lo(trap_block), %g5 + sub %g4, %g5, %g3 + srlx %g3, TRAP_BLOCK_SZ_SHIFT, %g3 + + /* Increment cpu_mondo_counter[smp_processor_id()] */ + sethi %hi(cpu_mondo_counter), %g5 + or %g5, %lo(cpu_mondo_counter), %g5 + sllx %g3, 3, %g3 + add %g5, %g3, %g5 + ldx [%g5], %g3 + add %g3, 1, %g3 + stx %g3, [%g5] + /* Get CPU mondo queue base phys address into %g7. */ ldx [%g4 + TRAP_PER_CPU_CPU_MONDO_PA], %g7 diff --git a/arch/sparc/kernel/traps_64.c b/arch/sparc/kernel/traps_64.c index cc97a43268ee..d883c5951e8b 100644 --- a/arch/sparc/kernel/traps_64.c +++ b/arch/sparc/kernel/traps_64.c @@ -2659,6 +2659,7 @@ void do_getpsr(struct pt_regs *regs) } } +u64 cpu_mondo_counter[NR_CPUS] = {0}; struct trap_per_cpu trap_block[NR_CPUS]; EXPORT_SYMBOL(trap_block); diff --git a/arch/sparc/kernel/tsb.S b/arch/sparc/kernel/tsb.S index 395ec1800530..7d961f6e3907 100644 --- a/arch/sparc/kernel/tsb.S +++ b/arch/sparc/kernel/tsb.S @@ -375,6 +375,7 @@ tsb_flush: * %o1: TSB base config pointer * %o2: TSB huge config pointer, or NULL if none * %o3: Hypervisor TSB descriptor physical address + * %o4: Secondary context to load, if non-zero * * We have to run this whole thing with interrupts * disabled so that the current cpu doesn't change @@ -387,6 +388,17 @@ __tsb_context_switch: rdpr %pstate, %g1 wrpr %g1, PSTATE_IE, %pstate + brz,pn %o4, 1f + mov SECONDARY_CONTEXT, %o5 + +661: stxa %o4, [%o5] ASI_DMMU + .section .sun4v_1insn_patch, "ax" + .word 661b + stxa %o4, [%o5] ASI_MMU + .previous + flush %g6 + +1: TRAP_LOAD_TRAP_BLOCK(%g2, %g3) stx %o0, [%g2 + TRAP_PER_CPU_PGD_PADDR] diff --git a/arch/sparc/power/hibernate.c b/arch/sparc/power/hibernate.c index 17bd2e167e07..df707a8ad311 100644 --- a/arch/sparc/power/hibernate.c +++ b/arch/sparc/power/hibernate.c @@ -35,6 +35,5 @@ void restore_processor_state(void) { struct mm_struct *mm = current->active_mm; - load_secondary_context(mm); - tsb_context_switch(mm); + tsb_context_switch_ctx(mm, CTX_HWBITS(mm->context)); } diff --git a/arch/tile/include/asm/thread_info.h b/arch/tile/include/asm/thread_info.h index dc1fb28d9636..489b15016303 100644 --- a/arch/tile/include/asm/thread_info.h +++ b/arch/tile/include/asm/thread_info.h @@ -78,7 +78,7 @@ struct thread_info { #ifndef __ASSEMBLY__ -void arch_release_thread_info(struct thread_info *info); +void arch_release_thread_stack(unsigned long *stack); /* How to get the thread information struct from C. */ register unsigned long stack_pointer __asm__("sp"); diff --git a/arch/tile/kernel/process.c b/arch/tile/kernel/process.c index 7d5769310bef..a97ab1a69a90 100644 --- a/arch/tile/kernel/process.c +++ b/arch/tile/kernel/process.c @@ -73,8 +73,9 @@ void arch_cpu_idle(void) /* * Release a thread_info structure */ -void arch_release_thread_info(struct thread_info *info) +void arch_release_thread_stack(unsigned long *stack) { + struct thread_info *info = (void *)stack; struct single_step_state *step_state = info->step_state; if (step_state) { diff --git a/arch/x86/boot/string.c b/arch/x86/boot/string.c index 318b8465d302..06ceddb3a22e 100644 --- a/arch/x86/boot/string.c +++ b/arch/x86/boot/string.c @@ -14,6 +14,7 @@ #include <linux/types.h> #include "ctype.h" +#include "string.h" int memcmp(const void *s1, const void *s2, size_t len) { diff --git a/arch/x86/boot/string.h b/arch/x86/boot/string.h index 725e820602b1..113588ddb43f 100644 --- a/arch/x86/boot/string.h +++ b/arch/x86/boot/string.h @@ -18,4 +18,13 @@ int memcmp(const void *s1, const void *s2, size_t len); #define memset(d,c,l) __builtin_memset(d,c,l) #define memcmp __builtin_memcmp +extern int strcmp(const char *str1, const char *str2); +extern int strncmp(const char *cs, const char *ct, size_t count); +extern size_t strlen(const char *s); +extern char *strstr(const char *s1, const char *s2); +extern size_t strnlen(const char *s, size_t maxlen); +extern unsigned int atou(const char *s); +extern unsigned long long simple_strtoull(const char *cp, char **endp, + unsigned int base); + #endif /* BOOT_STRING_H */ diff --git a/arch/x86/crypto/sha1_ssse3_glue.c b/arch/x86/crypto/sha1_ssse3_glue.c index dd14616b7739..7de207a11014 100644 --- a/arch/x86/crypto/sha1_ssse3_glue.c +++ b/arch/x86/crypto/sha1_ssse3_glue.c @@ -201,7 +201,7 @@ asmlinkage void sha1_transform_avx2(u32 *digest, const char *data, static bool avx2_usable(void) { - if (avx_usable() && boot_cpu_has(X86_FEATURE_AVX2) + if (false && avx_usable() && boot_cpu_has(X86_FEATURE_AVX2) && boot_cpu_has(X86_FEATURE_BMI1) && boot_cpu_has(X86_FEATURE_BMI2)) return true; diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h index d262f985bbc8..07cf288b692e 100644 --- a/arch/x86/include/asm/elf.h +++ b/arch/x86/include/asm/elf.h @@ -245,12 +245,13 @@ extern int force_personality32; #define CORE_DUMP_USE_REGSET #define ELF_EXEC_PAGESIZE 4096 -/* This is the location that an ET_DYN program is loaded if exec'ed. Typical - use of this is to invoke "./ld.so someprog" to test out a new version of - the loader. We need to make sure that it is out of the way of the program - that it will "exec", and that there is sufficient room for the brk. */ - -#define ELF_ET_DYN_BASE (TASK_SIZE / 3 * 2) +/* + * This is the base location for PIE (ET_DYN with INTERP) loads. On + * 64-bit, this is raised to 4GB to leave the entire 32-bit address + * space open for things that want to use the area for 32-bit pointers. + */ +#define ELF_ET_DYN_BASE (mmap_is_ia32() ? 0x000400000UL : \ + 0x100000000UL) /* This yields a mask that user programs can use to figure out what instruction set this CPU supports. This could be done in user space, diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 690b4027e17c..37db36fddc88 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -405,6 +405,8 @@ #define MSR_IA32_TSC_ADJUST 0x0000003b #define MSR_IA32_BNDCFGS 0x00000d90 +#define MSR_IA32_BNDCFGS_RSVD 0x00000ffc + #define MSR_IA32_XSS 0x00000da0 #define FEATURE_CONTROL_LOCKED (1<<0) diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h index 4c20dd333412..85133b2b8e99 100644 --- a/arch/x86/include/asm/xen/hypercall.h +++ b/arch/x86/include/asm/xen/hypercall.h @@ -43,6 +43,7 @@ #include <asm/page.h> #include <asm/pgtable.h> +#include <asm/smap.h> #include <xen/interface/xen.h> #include <xen/interface/sched.h> @@ -213,10 +214,12 @@ privcmd_call(unsigned call, __HYPERCALL_DECLS; __HYPERCALL_5ARG(a1, a2, a3, a4, a5); + stac(); asm volatile("call *%[call]" : __HYPERCALL_5PARAM : [call] "a" (&hypercall_page[call]) : __HYPERCALL_CLOBBER5); + clac(); return (long)__res; } diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index e75907601a41..1e5eb9f2ff5f 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -329,6 +329,14 @@ static void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, struct mpc_intsrc mp_irq; /* + * Check bus_irq boundary. + */ + if (bus_irq >= NR_IRQS_LEGACY) { + pr_warn("Invalid bus_irq %u for legacy override\n", bus_irq); + return; + } + + /* * Convert 'gsi' to 'ioapic.pin'. */ ioapic = mp_find_ioapic(gsi); diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 1e5d2f07416b..fc91c98bee01 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -2115,7 +2115,7 @@ static inline void __init check_timer(void) int idx; idx = find_irq_entry(apic1, pin1, mp_INT); if (idx != -1 && irq_trigger(idx)) - unmask_ioapic_irq(irq_get_chip_data(0)); + unmask_ioapic_irq(irq_get_irq_data(0)); } irq_domain_deactivate_irq(irq_data); irq_domain_activate_irq(irq_data); diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index 62aca448726a..2116176c1721 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -682,6 +682,9 @@ static int threshold_create_bank(unsigned int cpu, unsigned int bank) const char *name = th_names[bank]; int err = 0; + if (!dev) + return -ENODEV; + if (is_shared_bank(bank)) { nb = node_to_amd_nb(amd_get_nb_id(cpu)); diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index cec49ecf5f31..32187f8a49b4 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -151,6 +151,8 @@ void kvm_async_pf_task_wait(u32 token) if (hlist_unhashed(&n.link)) break; + rcu_irq_exit(); + if (!n.halted) { local_irq_enable(); schedule(); @@ -159,11 +161,11 @@ void kvm_async_pf_task_wait(u32 token) /* * We cannot reschedule. So halt. */ - rcu_irq_exit(); native_safe_halt(); local_irq_disable(); - rcu_irq_enter(); } + + rcu_irq_enter(); } if (!n.halted) finish_wait(&n.wq, &wait); diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 9357b29de9bc..83d6369c45f5 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -46,11 +46,18 @@ static u32 xstate_required_size(u64 xstate_bv, bool compacted) return ret; } +bool kvm_mpx_supported(void) +{ + return ((host_xcr0 & (XFEATURE_MASK_BNDREGS | XFEATURE_MASK_BNDCSR)) + && kvm_x86_ops->mpx_supported()); +} +EXPORT_SYMBOL_GPL(kvm_mpx_supported); + u64 kvm_supported_xcr0(void) { u64 xcr0 = KVM_SUPPORTED_XCR0 & host_xcr0; - if (!kvm_x86_ops->mpx_supported()) + if (!kvm_mpx_supported()) xcr0 &= ~(XFEATURE_MASK_BNDREGS | XFEATURE_MASK_BNDCSR); return xcr0; @@ -97,7 +104,7 @@ int kvm_update_cpuid(struct kvm_vcpu *vcpu) if (best && (best->eax & (F(XSAVES) | F(XSAVEC)))) best->ebx = xstate_required_size(vcpu->arch.xcr0, true); - vcpu->arch.eager_fpu = use_eager_fpu() || guest_cpuid_has_mpx(vcpu); + vcpu->arch.eager_fpu = use_eager_fpu(); if (vcpu->arch.eager_fpu) kvm_x86_ops->fpu_activate(vcpu); @@ -295,7 +302,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, #endif unsigned f_rdtscp = kvm_x86_ops->rdtscp_supported() ? F(RDTSCP) : 0; unsigned f_invpcid = kvm_x86_ops->invpcid_supported() ? F(INVPCID) : 0; - unsigned f_mpx = kvm_x86_ops->mpx_supported() ? F(MPX) : 0; + unsigned f_mpx = kvm_mpx_supported() ? F(MPX) : 0; unsigned f_xsaves = kvm_x86_ops->xsaves_supported() ? F(XSAVES) : 0; /* cpuid 1.edx */ diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h index 3f5c48ddba45..d1534feefcfe 100644 --- a/arch/x86/kvm/cpuid.h +++ b/arch/x86/kvm/cpuid.h @@ -4,6 +4,7 @@ #include "x86.h" int kvm_update_cpuid(struct kvm_vcpu *vcpu); +bool kvm_mpx_supported(void); struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu, u32 function, u32 index); int kvm_dev_ioctl_get_cpuid(struct kvm_cpuid2 *cpuid, @@ -134,20 +135,20 @@ static inline bool guest_cpuid_has_rtm(struct kvm_vcpu *vcpu) return best && (best->ebx & bit(X86_FEATURE_RTM)); } -static inline bool guest_cpuid_has_mpx(struct kvm_vcpu *vcpu) +static inline bool guest_cpuid_has_pcommit(struct kvm_vcpu *vcpu) { struct kvm_cpuid_entry2 *best; best = kvm_find_cpuid_entry(vcpu, 7, 0); - return best && (best->ebx & bit(X86_FEATURE_MPX)); + return best && (best->ebx & bit(X86_FEATURE_PCOMMIT)); } -static inline bool guest_cpuid_has_pcommit(struct kvm_vcpu *vcpu) +static inline bool guest_cpuid_has_mpx(struct kvm_vcpu *vcpu) { struct kvm_cpuid_entry2 *best; best = kvm_find_cpuid_entry(vcpu, 7, 0); - return best && (best->ebx & bit(X86_FEATURE_PCOMMIT)); + return best && (best->ebx & bit(X86_FEATURE_MPX)); } static inline bool guest_cpuid_has_rdtscp(struct kvm_vcpu *vcpu) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index bbaa11f4e74b..b12391119ce8 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -863,7 +863,6 @@ static unsigned long nested_ept_get_cr3(struct kvm_vcpu *vcpu); static u64 construct_eptp(unsigned long root_hpa); static void kvm_cpu_vmxon(u64 addr); static void kvm_cpu_vmxoff(void); -static bool vmx_mpx_supported(void); static bool vmx_xsaves_supported(void); static int vmx_cpu_uses_apicv(struct kvm_vcpu *vcpu); static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr); @@ -2541,7 +2540,7 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx) VM_EXIT_LOAD_IA32_EFER | VM_EXIT_SAVE_IA32_EFER | VM_EXIT_SAVE_VMX_PREEMPTION_TIMER | VM_EXIT_ACK_INTR_ON_EXIT; - if (vmx_mpx_supported()) + if (kvm_mpx_supported()) vmx->nested.nested_vmx_exit_ctls_high |= VM_EXIT_CLEAR_BNDCFGS; /* We support free control of debug control saving. */ @@ -2562,7 +2561,7 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx) VM_ENTRY_LOAD_IA32_PAT; vmx->nested.nested_vmx_entry_ctls_high |= (VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR | VM_ENTRY_LOAD_IA32_EFER); - if (vmx_mpx_supported()) + if (kvm_mpx_supported()) vmx->nested.nested_vmx_entry_ctls_high |= VM_ENTRY_LOAD_BNDCFGS; /* We support free control of debug control loading. */ @@ -2813,7 +2812,8 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) msr_info->data = vmcs_readl(GUEST_SYSENTER_ESP); break; case MSR_IA32_BNDCFGS: - if (!vmx_mpx_supported()) + if (!kvm_mpx_supported() || + (!msr_info->host_initiated && !guest_cpuid_has_mpx(vcpu))) return 1; msr_info->data = vmcs_read64(GUEST_BNDCFGS); break; @@ -2890,7 +2890,11 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) vmcs_writel(GUEST_SYSENTER_ESP, data); break; case MSR_IA32_BNDCFGS: - if (!vmx_mpx_supported()) + if (!kvm_mpx_supported() || + (!msr_info->host_initiated && !guest_cpuid_has_mpx(vcpu))) + return 1; + if (is_noncanonical_address(data & PAGE_MASK) || + (data & MSR_IA32_BNDCFGS_RSVD)) return 1; vmcs_write64(GUEST_BNDCFGS, data); break; @@ -3363,7 +3367,7 @@ static void init_vmcs_shadow_fields(void) for (i = j = 0; i < max_shadow_read_write_fields; i++) { switch (shadow_read_write_fields[i]) { case GUEST_BNDCFGS: - if (!vmx_mpx_supported()) + if (!kvm_mpx_supported()) continue; break; default: @@ -6253,7 +6257,6 @@ static __init int hardware_setup(void) vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_CS, false); vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP, false); vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false); - vmx_disable_intercept_for_msr(MSR_IA32_BNDCFGS, true); memcpy(vmx_msr_bitmap_legacy_x2apic, vmx_msr_bitmap_legacy, PAGE_SIZE); @@ -10265,7 +10268,7 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, vmcs12->guest_sysenter_cs = vmcs_read32(GUEST_SYSENTER_CS); vmcs12->guest_sysenter_esp = vmcs_readl(GUEST_SYSENTER_ESP); vmcs12->guest_sysenter_eip = vmcs_readl(GUEST_SYSENTER_EIP); - if (vmx_mpx_supported()) + if (kvm_mpx_supported()) vmcs12->guest_bndcfgs = vmcs_read64(GUEST_BNDCFGS); if (nested_cpu_has_xsaves(vmcs12)) vmcs12->xss_exit_bitmap = vmcs_read64(XSS_EXIT_BITMAP); |