diff options
author | Amit Pundir <amit.pundir@linaro.org> | 2014-11-10 11:59:49 +0530 |
---|---|---|
committer | Amit Pundir <amit.pundir@linaro.org> | 2014-11-10 11:59:49 +0530 |
commit | 6b392f1acbd3fb6aab552a07268a29370a0c7e95 (patch) | |
tree | 551ad7b44704fa1280d920b38f010b269d5ed7d7 | |
parent | e36578362eafd57a722084d89c37c623cc9efd12 (diff) | |
parent | 34b1652fcbcc704ebbd27e866f81b91a43e69ff6 (diff) | |
download | juno-linaro-armv8-14.11.tar.gz |
Merge branch 'linaro-android-3.10-lsk' into 'linaro-armv8'linaro-armv8-14.11
Signed-off-by: Amit Pundir <amit.pundir@linaro.org>
Conflicts:
arch/arm64/Kconfig
arch/arm64/include/asm/barrier.h
kernel/fork.c
63 files changed, 1066 insertions, 149 deletions
diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index 99d21641806..fbfc9e0d75e 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -1339,6 +1339,19 @@ ndisc_notify - BOOLEAN 1 - Generate unsolicited neighbour advertisements when device is brought up or hardware address changes. +optimistic_dad - BOOLEAN + Whether to perform Optimistic Duplicate Address Detection (RFC 4429). + 0: disabled (default) + 1: enabled + +use_optimistic - BOOLEAN + If enabled, do not classify optimistic addresses as deprecated during + source address selection. Preferred addresses will still be chosen + before optimistic addresses, subject to other ranking in the source + address selection algorithm. + 0: disabled (default) + 1: enabled + icmp/*: ratelimit - INTEGER Limit the maximal rates for sending ICMPv6 packets. diff --git a/arch/Kconfig b/arch/Kconfig index 00e3702ec79..4c0a1d03ae0 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -331,6 +331,7 @@ config HAVE_ARCH_SECCOMP_FILTER - secure_computing is called from a ptrace_event()-safe context - secure_computing return value is checked and a return value of -1 results in the system call being skipped immediately. + - seccomp syscall wired up config SECCOMP_FILTER def_bool y diff --git a/arch/arm/include/asm/barrier.h b/arch/arm/include/asm/barrier.h index 60f15e274e6..2f59f744339 100644 --- a/arch/arm/include/asm/barrier.h +++ b/arch/arm/include/asm/barrier.h @@ -59,6 +59,21 @@ #define smp_wmb() dmb(ishst) #endif +#define smp_store_release(p, v) \ +do { \ + compiletime_assert_atomic_type(*p); \ + smp_mb(); \ + ACCESS_ONCE(*p) = (v); \ +} while (0) + +#define smp_load_acquire(p) \ +({ \ + typeof(*p) ___p1 = ACCESS_ONCE(*p); \ + compiletime_assert_atomic_type(*p); \ + smp_mb(); \ + ___p1; \ +}) + #define read_barrier_depends() do { } while(0) #define smp_read_barrier_depends() do { } while(0) diff --git a/arch/arm/include/asm/syscall.h b/arch/arm/include/asm/syscall.h index 73ddd7239b3..ed805f1d378 100644 --- a/arch/arm/include/asm/syscall.h +++ b/arch/arm/include/asm/syscall.h @@ -103,8 +103,7 @@ static inline void syscall_set_arguments(struct task_struct *task, memcpy(®s->ARM_r0 + i, args, n * sizeof(args[0])); } -static inline int syscall_get_arch(struct task_struct *task, - struct pt_regs *regs) +static inline int syscall_get_arch(void) { /* ARM tasks don't change audit architectures on the fly. */ return AUDIT_ARCH_ARM; diff --git a/arch/arm/include/asm/unistd.h b/arch/arm/include/asm/unistd.h index cbd61977c99..43876245fc5 100644 --- a/arch/arm/include/asm/unistd.h +++ b/arch/arm/include/asm/unistd.h @@ -15,7 +15,7 @@ #include <uapi/asm/unistd.h> -#define __NR_syscalls (380) +#define __NR_syscalls (384) #define __ARM_NR_cmpxchg (__ARM_NR_BASE+0x00fff0) #define __ARCH_WANT_STAT64 diff --git a/arch/arm/include/uapi/asm/unistd.h b/arch/arm/include/uapi/asm/unistd.h index af33b44990e..17407c92c0d 100644 --- a/arch/arm/include/uapi/asm/unistd.h +++ b/arch/arm/include/uapi/asm/unistd.h @@ -406,6 +406,12 @@ #define __NR_process_vm_writev (__NR_SYSCALL_BASE+377) #define __NR_kcmp (__NR_SYSCALL_BASE+378) #define __NR_finit_module (__NR_SYSCALL_BASE+379) +/* Reserve for later +#define __NR_sched_setattr (__NR_SYSCALL_BASE+380) +#define __NR_sched_getattr (__NR_SYSCALL_BASE+381) +#define __NR_renameat2 (__NR_SYSCALL_BASE+382) +*/ +#define __NR_seccomp (__NR_SYSCALL_BASE+383) /* * This may need to be greater than __NR_last_syscall+1 in order to diff --git a/arch/arm/kernel/calls.S b/arch/arm/kernel/calls.S index c6ca7e37677..725f844926e 100644 --- a/arch/arm/kernel/calls.S +++ b/arch/arm/kernel/calls.S @@ -389,6 +389,11 @@ CALL(sys_process_vm_writev) CALL(sys_kcmp) CALL(sys_finit_module) +/* 380 */ CALL(sys_ni_syscall) /* reserved sys_sched_setattr */ + CALL(sys_ni_syscall) /* reserved sys_sched_getattr */ + CALL(sys_ni_syscall) /* reserved sys_renameat2 */ + CALL(sys_seccomp) + #ifndef syscalls_counted .equ syscalls_padding, ((NR_syscalls + 3) & ~3) - NR_syscalls #define syscalls_counted diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c index 03deeffd9f6..394424b2525 100644 --- a/arch/arm/kernel/ptrace.c +++ b/arch/arm/kernel/ptrace.c @@ -916,7 +916,7 @@ enum ptrace_syscall_dir { PTRACE_SYSCALL_EXIT, }; -static int tracehook_report_syscall(struct pt_regs *regs, +static void tracehook_report_syscall(struct pt_regs *regs, enum ptrace_syscall_dir dir) { unsigned long ip; @@ -934,7 +934,6 @@ static int tracehook_report_syscall(struct pt_regs *regs, current_thread_info()->syscall = -1; regs->ARM_ip = ip; - return current_thread_info()->syscall; } asmlinkage int syscall_trace_enter(struct pt_regs *regs, int scno) @@ -946,7 +945,9 @@ asmlinkage int syscall_trace_enter(struct pt_regs *regs, int scno) return -1; if (test_thread_flag(TIF_SYSCALL_TRACE)) - scno = tracehook_report_syscall(regs, PTRACE_SYSCALL_ENTER); + tracehook_report_syscall(regs, PTRACE_SYSCALL_ENTER); + + scno = current_thread_info()->syscall; if (test_thread_flag(TIF_SYSCALL_TRACEPOINT)) trace_sys_enter(regs, scno); diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 76c38f46aec..bed605900d7 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -33,6 +33,7 @@ config ARM64 select HAVE_ARCH_JUMP_LABEL select HAVE_ARCH_KGDB select HAVE_ARCH_AUDITSYSCALL + select HAVE_ARCH_SECCOMP_FILTER select HAVE_ARCH_TRACEHOOK select HAVE_C_RECORDMCOUNT select HAVE_DEBUG_BUGVERBOSE @@ -403,6 +404,20 @@ config FORCE_MAX_ZONEORDER default "14" if (ARM64_64K_PAGES && TRANSPARENT_HUGEPAGE) default "11" +config SECCOMP + bool "Enable seccomp to safely compute untrusted bytecode" + ---help--- + This kernel feature is useful for number crunching applications + that may need to compute untrusted bytecode during their + execution. By using pipes or other transports made available to + the process as file descriptors supporting the read/write + syscalls, it's possible to isolate those applications in + their own address space using seccomp. Once seccomp is + enabled via prctl(PR_SET_SECCOMP), it cannot be disabled + and the task is only allowed to execute a few safe syscalls + defined by each seccomp mode. + + endmenu menu "Boot options" diff --git a/arch/arm64/include/asm/compat.h b/arch/arm64/include/asm/compat.h index 56de5aadede..e94e8dde78b 100644 --- a/arch/arm64/include/asm/compat.h +++ b/arch/arm64/include/asm/compat.h @@ -205,6 +205,13 @@ typedef struct compat_siginfo { compat_long_t _band; /* POLL_IN, POLL_OUT, POLL_MSG */ int _fd; } _sigpoll; + + /* SIGSYS */ + struct { + compat_uptr_t _call_addr; /* calling user insn */ + int _syscall; /* triggering system call number */ + unsigned int _arch; /* AUDIT_ARCH_* of syscall */ + } _sigsys; } _sifields; } compat_siginfo_t; diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h index 2ff89d107d8..c29d8852640 100644 --- a/arch/arm64/include/asm/ptrace.h +++ b/arch/arm64/include/asm/ptrace.h @@ -61,6 +61,15 @@ #define COMPAT_PT_TEXT_ADDR 0x10000 #define COMPAT_PT_DATA_ADDR 0x10004 #define COMPAT_PT_TEXT_END_ADDR 0x10008 + +/* + * used to skip a system call when tracer changes its number to -1 + * with ptrace(PTRACE_SET_SYSCALL) + */ +#define RET_SKIP_SYSCALL -1 +#define RET_SKIP_SYSCALL_TRACE -2 +#define IS_SKIP_SYSCALL(no) ((int)(no & 0xffffffff) == -1) + #ifndef __ASSEMBLY__ /* sizeof(struct user) for AArch32 */ diff --git a/arch/arm64/include/asm/seccomp.h b/arch/arm64/include/asm/seccomp.h new file mode 100644 index 00000000000..bec3a43f7b1 --- /dev/null +++ b/arch/arm64/include/asm/seccomp.h @@ -0,0 +1,25 @@ +/* + * arch/arm64/include/asm/seccomp.h + * + * Copyright (C) 2014 Linaro Limited + * Author: AKASHI Takahiro <takahiro.akashi <at> linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef _ASM_SECCOMP_H +#define _ASM_SECCOMP_H + +#include <asm/unistd.h> + +#ifdef CONFIG_COMPAT +#define __NR_seccomp_read_32 __NR_compat_read +#define __NR_seccomp_write_32 __NR_compat_write +#define __NR_seccomp_exit_32 __NR_compat_exit +#define __NR_seccomp_sigreturn_32 __NR_compat_rt_sigreturn +#endif /* CONFIG_COMPAT */ + +#include <asm-generic/seccomp.h> + +#endif /* _ASM_SECCOMP_H */ diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h index 6e4aaf9aac6..f67d0ec20f9 100644 --- a/arch/arm64/include/asm/unistd.h +++ b/arch/arm64/include/asm/unistd.h @@ -30,6 +30,9 @@ * Compat syscall numbers used by the AArch64 kernel. */ #define __NR_compat_restart_syscall 0 +#define __NR_compat_exit 1 +#define __NR_compat_read 3 +#define __NR_compat_write 4 #define __NR_compat_sigreturn 119 #define __NR_compat_rt_sigreturn 173 @@ -40,7 +43,7 @@ #define __ARM_NR_compat_cacheflush (__ARM_NR_COMPAT_BASE+2) #define __ARM_NR_compat_set_tls (__ARM_NR_COMPAT_BASE+5) -#define __NR_compat_syscalls 378 +#define __NR_compat_syscalls 384 #endif #define __ARCH_WANT_SYS_CLONE diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h index dd336c150f3..76d09456509 100644 --- a/arch/arm64/include/asm/unistd32.h +++ b/arch/arm64/include/asm/unistd32.h @@ -781,3 +781,11 @@ __SYSCALL(__NR_process_vm_writev, compat_sys_process_vm_writev) __SYSCALL(__NR_kcmp, sys_kcmp) #define __NR_finit_module 379 __SYSCALL(__NR_finit_module, sys_finit_module) +/* #define __NR_sched_setattr 380 */ +__SYSCALL(380, sys_ni_syscall) +/* #define __NR_sched_getattr 381 */ +__SYSCALL(381, sys_ni_syscall) +/* #define __NR_renameat2 382 */ +__SYSCALL(382, sys_ni_syscall) +#define __NR_seccomp 383 +__SYSCALL(__NR_seccomp, sys_seccomp) diff --git a/arch/arm64/include/uapi/asm/ptrace.h b/arch/arm64/include/uapi/asm/ptrace.h index 6913643bbe5..49c61746297 100644 --- a/arch/arm64/include/uapi/asm/ptrace.h +++ b/arch/arm64/include/uapi/asm/ptrace.h @@ -23,6 +23,7 @@ #include <asm/hwcap.h> +#define PTRACE_SET_SYSCALL 23 /* * PSR bits diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index dcfb152028a..e6681c26d48 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -25,6 +25,7 @@ #include <asm/asm-offsets.h> #include <asm/errno.h> #include <asm/esr.h> +#include <asm/ptrace.h> #include <asm/thread_info.h> #include <asm/unistd.h> @@ -665,6 +666,10 @@ __sys_trace: mov x0, sp bl syscall_trace_enter adr lr, __sys_trace_return // return address + cmp w0, #RET_SKIP_SYSCALL_TRACE // skip syscall and tracing? + b.eq ret_to_user + cmp w0, #RET_SKIP_SYSCALL // skip syscall? + b.eq __sys_trace_return_skipped uxtw scno, w0 // syscall number (possibly new) mov x1, sp // pointer to regs cmp scno, sc_nr // check upper syscall limit @@ -678,6 +683,7 @@ __sys_trace: __sys_trace_return: str x0, [sp] // save returned x0 +__sys_trace_return_skipped: // x0 already in regs[0] mov x0, sp bl syscall_trace_exit b ret_to_user diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 53e6d633bb8..fdeab212655 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -27,6 +27,7 @@ #include <linux/smp.h> #include <linux/ptrace.h> #include <linux/user.h> +#include <linux/seccomp.h> #include <linux/security.h> #include <linux/init.h> #include <linux/signal.h> @@ -1068,7 +1069,19 @@ const struct user_regset_view *task_user_regset_view(struct task_struct *task) long arch_ptrace(struct task_struct *child, long request, unsigned long addr, unsigned long data) { - return ptrace_request(child, request, addr, data); + int ret; + + switch (request) { + case PTRACE_SET_SYSCALL: + task_pt_regs(child)->syscallno = data; + ret = 0; + break; + default: + ret = ptrace_request(child, request, addr, data); + break; + } + + return ret; } enum ptrace_syscall_dir { @@ -1100,9 +1113,33 @@ static void tracehook_report_syscall(struct pt_regs *regs, asmlinkage int syscall_trace_enter(struct pt_regs *regs) { + unsigned int saved_syscallno = regs->syscallno; + + /* Do the secure computing check first; failures should be fast. */ + if (secure_computing(regs->syscallno) == -1) + return RET_SKIP_SYSCALL_TRACE; + if (test_thread_flag(TIF_SYSCALL_TRACE)) tracehook_report_syscall(regs, PTRACE_SYSCALL_ENTER); + if (IS_SKIP_SYSCALL(regs->syscallno)) { + /* + * RESTRICTION: we can't modify a return value of user + * issued syscall(-1) here. In order to ease this flavor, + * we need to treat whatever value in x0 as a return value, + * but this might result in a bogus value being returned. + */ + /* + * NOTE: syscallno may also be set to -1 if fatal signal is + * detected in tracehook_report_syscall_entry(), but since + * a value set to x0 here is not used in this case, we may + * neglect the case. + */ + if (!test_thread_flag(TIF_SYSCALL_TRACE) || + (IS_SKIP_SYSCALL(saved_syscallno))) + regs->regs[0] = -ENOSYS; + } + audit_syscall_entry(syscall_get_arch(), regs->syscallno, regs->orig_x0, regs->regs[1], regs->regs[2], regs->regs[3]); diff --git a/arch/arm64/kernel/signal32.c b/arch/arm64/kernel/signal32.c index 45ee2c3a218..e5cf0ab84be 100644 --- a/arch/arm64/kernel/signal32.c +++ b/arch/arm64/kernel/signal32.c @@ -183,6 +183,14 @@ int copy_siginfo_to_user32(compat_siginfo_t __user *to, siginfo_t *from) err |= __put_user(from->si_uid, &to->si_uid); err |= __put_user((compat_uptr_t)(unsigned long)from->si_ptr, &to->si_ptr); break; +#ifdef __ARCH_SIGSYS + case __SI_SYS: + err |= __put_user((compat_uptr_t)(unsigned long) + from->si_call_addr, &to->si_call_addr); + err |= __put_user(from->si_syscall, &to->si_syscall); + err |= __put_user(from->si_arch, &to->si_arch); + break; +#endif default: /* this is just in case for now ... */ err |= __put_user(from->si_pid, &to->si_pid); err |= __put_user(from->si_uid, &to->si_uid); diff --git a/arch/ia64/include/asm/barrier.h b/arch/ia64/include/asm/barrier.h index 60576e06b6f..d0a69aa35e2 100644 --- a/arch/ia64/include/asm/barrier.h +++ b/arch/ia64/include/asm/barrier.h @@ -45,14 +45,37 @@ # define smp_rmb() rmb() # define smp_wmb() wmb() # define smp_read_barrier_depends() read_barrier_depends() + #else + # define smp_mb() barrier() # define smp_rmb() barrier() # define smp_wmb() barrier() # define smp_read_barrier_depends() do { } while(0) + #endif /* + * IA64 GCC turns volatile stores into st.rel and volatile loads into ld.acq no + * need for asm trickery! + */ + +#define smp_store_release(p, v) \ +do { \ + compiletime_assert_atomic_type(*p); \ + barrier(); \ + ACCESS_ONCE(*p) = (v); \ +} while (0) + +#define smp_load_acquire(p) \ +({ \ + typeof(*p) ___p1 = ACCESS_ONCE(*p); \ + compiletime_assert_atomic_type(*p); \ + barrier(); \ + ___p1; \ +}) + +/* * XXX check on this ---I suspect what Linus really wants here is * acquire vs release semantics but we can't discuss this stuff with * Linus just yet. Grrr... diff --git a/arch/metag/include/asm/barrier.h b/arch/metag/include/asm/barrier.h index e355a4c1096..2d6f0de7732 100644 --- a/arch/metag/include/asm/barrier.h +++ b/arch/metag/include/asm/barrier.h @@ -85,4 +85,19 @@ static inline void fence(void) #define smp_read_barrier_depends() do { } while (0) #define set_mb(var, value) do { var = value; smp_mb(); } while (0) +#define smp_store_release(p, v) \ +do { \ + compiletime_assert_atomic_type(*p); \ + smp_mb(); \ + ACCESS_ONCE(*p) = (v); \ +} while (0) + +#define smp_load_acquire(p) \ +({ \ + typeof(*p) ___p1 = ACCESS_ONCE(*p); \ + compiletime_assert_atomic_type(*p); \ + smp_mb(); \ + ___p1; \ +}) + #endif /* _ASM_METAG_BARRIER_H */ diff --git a/arch/mips/include/asm/barrier.h b/arch/mips/include/asm/barrier.h index 314ab553201..52c5b61d7ab 100644 --- a/arch/mips/include/asm/barrier.h +++ b/arch/mips/include/asm/barrier.h @@ -180,4 +180,19 @@ #define nudge_writes() mb() #endif +#define smp_store_release(p, v) \ +do { \ + compiletime_assert_atomic_type(*p); \ + smp_mb(); \ + ACCESS_ONCE(*p) = (v); \ +} while (0) + +#define smp_load_acquire(p) \ +({ \ + typeof(*p) ___p1 = ACCESS_ONCE(*p); \ + compiletime_assert_atomic_type(*p); \ + smp_mb(); \ + ___p1; \ +}) + #endif /* __ASM_BARRIER_H */ diff --git a/arch/powerpc/include/asm/barrier.h b/arch/powerpc/include/asm/barrier.h index ae782254e73..f89da808ce3 100644 --- a/arch/powerpc/include/asm/barrier.h +++ b/arch/powerpc/include/asm/barrier.h @@ -45,11 +45,15 @@ # define SMPWMB eieio #endif +#define __lwsync() __asm__ __volatile__ (stringify_in_c(LWSYNC) : : :"memory") + #define smp_mb() mb() -#define smp_rmb() __asm__ __volatile__ (stringify_in_c(LWSYNC) : : :"memory") +#define smp_rmb() __lwsync() #define smp_wmb() __asm__ __volatile__ (stringify_in_c(SMPWMB) : : :"memory") #define smp_read_barrier_depends() read_barrier_depends() #else +#define __lwsync() barrier() + #define smp_mb() barrier() #define smp_rmb() barrier() #define smp_wmb() barrier() @@ -65,4 +69,19 @@ #define data_barrier(x) \ asm volatile("twi 0,%0,0; isync" : : "r" (x) : "memory"); +#define smp_store_release(p, v) \ +do { \ + compiletime_assert_atomic_type(*p); \ + __lwsync(); \ + ACCESS_ONCE(*p) = (v); \ +} while (0) + +#define smp_load_acquire(p) \ +({ \ + typeof(*p) ___p1 = ACCESS_ONCE(*p); \ + compiletime_assert_atomic_type(*p); \ + __lwsync(); \ + ___p1; \ +}) + #endif /* _ASM_POWERPC_BARRIER_H */ diff --git a/arch/s390/include/asm/barrier.h b/arch/s390/include/asm/barrier.h index 16760eeb79b..578680f6207 100644 --- a/arch/s390/include/asm/barrier.h +++ b/arch/s390/include/asm/barrier.h @@ -32,4 +32,19 @@ #define set_mb(var, value) do { var = value; mb(); } while (0) +#define smp_store_release(p, v) \ +do { \ + compiletime_assert_atomic_type(*p); \ + barrier(); \ + ACCESS_ONCE(*p) = (v); \ +} while (0) + +#define smp_load_acquire(p) \ +({ \ + typeof(*p) ___p1 = ACCESS_ONCE(*p); \ + compiletime_assert_atomic_type(*p); \ + barrier(); \ + ___p1; \ +}) + #endif /* __ASM_BARRIER_H */ diff --git a/arch/s390/include/asm/syscall.h b/arch/s390/include/asm/syscall.h index cd29d2f4e4f..bebc0bd8abc 100644 --- a/arch/s390/include/asm/syscall.h +++ b/arch/s390/include/asm/syscall.h @@ -89,11 +89,10 @@ static inline void syscall_set_arguments(struct task_struct *task, regs->orig_gpr2 = args[0]; } -static inline int syscall_get_arch(struct task_struct *task, - struct pt_regs *regs) +static inline int syscall_get_arch(void) { #ifdef CONFIG_COMPAT - if (test_tsk_thread_flag(task, TIF_31BIT)) + if (test_tsk_thread_flag(current, TIF_31BIT)) return AUDIT_ARCH_S390; #endif return sizeof(long) == 8 ? AUDIT_ARCH_S390X : AUDIT_ARCH_S390; diff --git a/arch/sparc/include/asm/barrier_64.h b/arch/sparc/include/asm/barrier_64.h index 95d45986f90..b5aad964558 100644 --- a/arch/sparc/include/asm/barrier_64.h +++ b/arch/sparc/include/asm/barrier_64.h @@ -53,4 +53,19 @@ do { __asm__ __volatile__("ba,pt %%xcc, 1f\n\t" \ #define smp_read_barrier_depends() do { } while(0) +#define smp_store_release(p, v) \ +do { \ + compiletime_assert_atomic_type(*p); \ + barrier(); \ + ACCESS_ONCE(*p) = (v); \ +} while (0) + +#define smp_load_acquire(p) \ +({ \ + typeof(*p) ___p1 = ACCESS_ONCE(*p); \ + compiletime_assert_atomic_type(*p); \ + barrier(); \ + ___p1; \ +}) + #endif /* !(__SPARC64_BARRIER_H) */ diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h index c6cd358a1ee..04a48903b2e 100644 --- a/arch/x86/include/asm/barrier.h +++ b/arch/x86/include/asm/barrier.h @@ -92,12 +92,53 @@ #endif #define smp_read_barrier_depends() read_barrier_depends() #define set_mb(var, value) do { (void)xchg(&var, value); } while (0) -#else +#else /* !SMP */ #define smp_mb() barrier() #define smp_rmb() barrier() #define smp_wmb() barrier() #define smp_read_barrier_depends() do { } while (0) #define set_mb(var, value) do { var = value; barrier(); } while (0) +#endif /* SMP */ + +#if defined(CONFIG_X86_OOSTORE) || defined(CONFIG_X86_PPRO_FENCE) + +/* + * For either of these options x86 doesn't have a strong TSO memory + * model and we should fall back to full barriers. + */ + +#define smp_store_release(p, v) \ +do { \ + compiletime_assert_atomic_type(*p); \ + smp_mb(); \ + ACCESS_ONCE(*p) = (v); \ +} while (0) + +#define smp_load_acquire(p) \ +({ \ + typeof(*p) ___p1 = ACCESS_ONCE(*p); \ + compiletime_assert_atomic_type(*p); \ + smp_mb(); \ + ___p1; \ +}) + +#else /* regular x86 TSO memory ordering */ + +#define smp_store_release(p, v) \ +do { \ + compiletime_assert_atomic_type(*p); \ + barrier(); \ + ACCESS_ONCE(*p) = (v); \ +} while (0) + +#define smp_load_acquire(p) \ +({ \ + typeof(*p) ___p1 = ACCESS_ONCE(*p); \ + compiletime_assert_atomic_type(*p); \ + barrier(); \ + ___p1; \ +}) + #endif /* diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h index 2e188d68397..f106908a12e 100644 --- a/arch/x86/include/asm/syscall.h +++ b/arch/x86/include/asm/syscall.h @@ -90,8 +90,7 @@ static inline void syscall_set_arguments(struct task_struct *task, memcpy(®s->bx + i, args, n * sizeof(args[0])); } -static inline int syscall_get_arch(struct task_struct *task, - struct pt_regs *regs) +static inline int syscall_get_arch(void) { return AUDIT_ARCH_I386; } @@ -220,8 +219,7 @@ static inline void syscall_set_arguments(struct task_struct *task, } } -static inline int syscall_get_arch(struct task_struct *task, - struct pt_regs *regs) +static inline int syscall_get_arch(void) { #ifdef CONFIG_IA32_EMULATION /* @@ -233,7 +231,7 @@ static inline int syscall_get_arch(struct task_struct *task, * * x32 tasks should be considered AUDIT_ARCH_X86_64. */ - if (task_thread_info(task)->status & TS_COMPAT) + if (task_thread_info(current)->status & TS_COMPAT) return AUDIT_ARCH_I386; #endif /* Both x32 and x86_64 are considered "64-bit". */ diff --git a/arch/x86/syscalls/syscall_32.tbl b/arch/x86/syscalls/syscall_32.tbl index aabfb8380a1..01ed5025547 100644 --- a/arch/x86/syscalls/syscall_32.tbl +++ b/arch/x86/syscalls/syscall_32.tbl @@ -357,3 +357,7 @@ 348 i386 process_vm_writev sys_process_vm_writev compat_sys_process_vm_writev 349 i386 kcmp sys_kcmp 350 i386 finit_module sys_finit_module +# 351 i386 sched_setattr sys_sched_setattr +# 352 i386 sched_getattr sys_sched_getattr +# 353 i386 renameat2 sys_renameat2 +354 i386 seccomp sys_seccomp diff --git a/arch/x86/syscalls/syscall_64.tbl b/arch/x86/syscalls/syscall_64.tbl index 63a899304d2..c7b4ac76cd3 100644 --- a/arch/x86/syscalls/syscall_64.tbl +++ b/arch/x86/syscalls/syscall_64.tbl @@ -320,6 +320,10 @@ 311 64 process_vm_writev sys_process_vm_writev 312 common kcmp sys_kcmp 313 common finit_module sys_finit_module +# 314 common sched_setattr sys_sched_setattr +# 315 common sched_getattr sys_sched_getattr +# 316 common renameat2 sys_renameat2 +317 common seccomp sys_seccomp # # x32-specific system call numbers start at 512 to avoid cache impact diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index a42c3548bdd..2c41a74d0ed 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -32,6 +32,7 @@ #include <linux/cpufreq.h> #include <linux/cpuidle.h> #include <linux/timer.h> +#include <linux/wakeup_reason.h> #include "../base.h" #include "power.h" @@ -942,6 +943,7 @@ static int device_suspend_noirq(struct device *dev, pm_message_t state) static int dpm_suspend_noirq(pm_message_t state) { ktime_t starttime = ktime_get(); + char suspend_abort[MAX_SUSPEND_ABORT_LEN]; int error = 0; cpuidle_pause(); @@ -969,6 +971,9 @@ static int dpm_suspend_noirq(pm_message_t state) put_device(dev); if (pm_wakeup_pending()) { + pm_get_active_wakeup_sources(suspend_abort, + MAX_SUSPEND_ABORT_LEN); + log_suspend_abort_reason(suspend_abort); error = -EBUSY; break; } @@ -1027,6 +1032,7 @@ static int device_suspend_late(struct device *dev, pm_message_t state) static int dpm_suspend_late(pm_message_t state) { ktime_t starttime = ktime_get(); + char suspend_abort[MAX_SUSPEND_ABORT_LEN]; int error = 0; mutex_lock(&dpm_list_mtx); @@ -1052,6 +1058,9 @@ static int dpm_suspend_late(pm_message_t state) put_device(dev); if (pm_wakeup_pending()) { + pm_get_active_wakeup_sources(suspend_abort, + MAX_SUSPEND_ABORT_LEN); + log_suspend_abort_reason(suspend_abort); error = -EBUSY; break; } @@ -1119,6 +1128,7 @@ static int __device_suspend(struct device *dev, pm_message_t state, bool async) char *info = NULL; int error = 0; struct dpm_watchdog wd; + char suspend_abort[MAX_SUSPEND_ABORT_LEN]; dpm_wait_for_children(dev, async); @@ -1135,6 +1145,9 @@ static int __device_suspend(struct device *dev, pm_message_t state, bool async) pm_wakeup_event(dev, 0); if (pm_wakeup_pending()) { + pm_get_active_wakeup_sources(suspend_abort, + MAX_SUSPEND_ABORT_LEN); + log_suspend_abort_reason(suspend_abort); async_error = -EBUSY; goto Complete; } diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c index 79715e7fa43..bea700736f2 100644 --- a/drivers/base/power/wakeup.c +++ b/drivers/base/power/wakeup.c @@ -659,6 +659,22 @@ void pm_wakeup_event(struct device *dev, unsigned int msec) } EXPORT_SYMBOL_GPL(pm_wakeup_event); +void pm_get_active_wakeup_sources(char *pending_wakeup_source, size_t max) +{ + struct wakeup_source *ws; + int len = 0; + rcu_read_lock(); + len += snprintf(pending_wakeup_source, max, "Pending Wakeup Sources: "); + list_for_each_entry_rcu(ws, &wakeup_sources, entry) { + if (ws->active) { + len += snprintf(pending_wakeup_source + len, max, + "%s ", ws->name); + } + } + rcu_read_unlock(); +} +EXPORT_SYMBOL_GPL(pm_get_active_wakeup_sources); + static void print_active_wakeup_sources(void) { struct wakeup_source *ws; diff --git a/drivers/base/syscore.c b/drivers/base/syscore.c index e8d11b6630e..0ab546558c4 100644 --- a/drivers/base/syscore.c +++ b/drivers/base/syscore.c @@ -10,6 +10,7 @@ #include <linux/mutex.h> #include <linux/module.h> #include <linux/interrupt.h> +#include <linux/wakeup_reason.h> static LIST_HEAD(syscore_ops_list); static DEFINE_MUTEX(syscore_ops_lock); @@ -73,6 +74,8 @@ int syscore_suspend(void) return 0; err_out: + log_suspend_abort_reason("System core suspend callback %pF failed", + ops->suspend); pr_err("PM: System core suspend callback %pF failed.\n", ops->suspend); list_for_each_entry_continue(ops, &syscore_ops_list, node) diff --git a/drivers/cpufreq/cpufreq_stats.c b/drivers/cpufreq/cpufreq_stats.c index 046491d9f0c..690af18f55a 100644 --- a/drivers/cpufreq/cpufreq_stats.c +++ b/drivers/cpufreq/cpufreq_stats.c @@ -285,19 +285,19 @@ put_ref: static void cpufreq_allstats_free(void) { - int i; + int cpu; struct all_cpufreq_stats *all_stat; sysfs_remove_file(cpufreq_global_kobject, &_attr_all_time_in_state.attr); - for (i = 0; i < total_cpus; i++) { - all_stat = per_cpu(all_cpufreq_stats, i); + for_each_possible_cpu(cpu) { + all_stat = per_cpu(all_cpufreq_stats, cpu); if (!all_stat) continue; kfree(all_stat->time_in_state); kfree(all_stat); - per_cpu(all_cpufreq_stats, i) = NULL; + per_cpu(all_cpufreq_stats, cpu) = NULL; } if (all_freq_table) { kfree(all_freq_table->freq_table); diff --git a/fs/exec.c b/fs/exec.c index dd6aa61c854..cb7f31c71c6 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1220,7 +1220,7 @@ EXPORT_SYMBOL(install_exec_creds); /* * determine how safe it is to execute the proposed program * - the caller must hold ->cred_guard_mutex to protect against - * PTRACE_ATTACH + * PTRACE_ATTACH or seccomp thread-sync */ static int check_unsafe_exec(struct linux_binprm *bprm) { @@ -1239,7 +1239,7 @@ static int check_unsafe_exec(struct linux_binprm *bprm) * This isn't strictly necessary, but it makes it harder for LSMs to * mess up. */ - if (current->no_new_privs) + if (task_no_new_privs(current)) bprm->unsafe |= LSM_UNSAFE_NO_NEW_PRIVS; n_fs = 1; @@ -1286,7 +1286,7 @@ int prepare_binprm(struct linux_binprm *bprm) bprm->cred->egid = current_egid(); if (!(bprm->file->f_path.mnt->mnt_flags & MNT_NOSUID) && - !current->no_new_privs && + !task_no_new_privs(current) && kuid_has_mapping(bprm->cred->user_ns, inode->i_uid) && kgid_has_mapping(bprm->cred->user_ns, inode->i_gid)) { /* Set-uid? */ diff --git a/include/asm-generic/barrier.h b/include/asm-generic/barrier.h index 639d7a4d033..01613b382b0 100644 --- a/include/asm-generic/barrier.h +++ b/include/asm-generic/barrier.h @@ -46,5 +46,20 @@ #define read_barrier_depends() do {} while (0) #define smp_read_barrier_depends() do {} while (0) +#define smp_store_release(p, v) \ +do { \ + compiletime_assert_atomic_type(*p); \ + smp_mb(); \ + ACCESS_ONCE(*p) = (v); \ +} while (0) + +#define smp_load_acquire(p) \ +({ \ + typeof(*p) ___p1 = ACCESS_ONCE(*p); \ + compiletime_assert_atomic_type(*p); \ + smp_mb(); \ + ___p1; \ +}) + #endif /* !__ASSEMBLY__ */ #endif /* __ASM_GENERIC_BARRIER_H */ diff --git a/include/asm-generic/seccomp.h b/include/asm-generic/seccomp.h new file mode 100644 index 00000000000..663ac3dc02e --- /dev/null +++ b/include/asm-generic/seccomp.h @@ -0,0 +1,29 @@ +/* + * include/asm-generic/seccomp.h + * + * Copyright (C) 2014 Linaro Limited + * Author: AKASHI Takahiro <takahiro.akashi <at> linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef _ASM_GENERIC_SECCOMP_H +#define _ASM_GENERIC_SECCOMP_H + +#include <asm-generic/unistd.h> + +#if defined(CONFIG_COMPAT) && !defined(__NR_seccomp_read_32) +#define __NR_seccomp_read_32 __NR_read +#define __NR_seccomp_write_32 __NR_write +#define __NR_seccomp_exit_32 __NR_exit +#define __NR_seccomp_sigreturn_32 __NR_rt_sigreturn +#endif /* CONFIG_COMPAT && ! already defined */ + +#define __NR_seccomp_read __NR_read +#define __NR_seccomp_write __NR_write +#define __NR_seccomp_exit __NR_exit +#define __NR_seccomp_sigreturn __NR_rt_sigreturn + +#endif /* _ASM_GENERIC_SECCOMP_H */ + diff --git a/include/asm-generic/syscall.h b/include/asm-generic/syscall.h index 5b09392db67..d401e5463fb 100644 --- a/include/asm-generic/syscall.h +++ b/include/asm-generic/syscall.h @@ -144,8 +144,6 @@ void syscall_set_arguments(struct task_struct *task, struct pt_regs *regs, /** * syscall_get_arch - return the AUDIT_ARCH for the current system call - * @task: task of interest, must be in system call entry tracing - * @regs: task_pt_regs() of @task * * Returns the AUDIT_ARCH_* based on the system call convention in use. * @@ -155,5 +153,5 @@ void syscall_set_arguments(struct task_struct *task, struct pt_regs *regs, * Architectures which permit CONFIG_HAVE_ARCH_SECCOMP_FILTER must * provide an implementation of this. */ -int syscall_get_arch(struct task_struct *task, struct pt_regs *regs); +int syscall_get_arch(void); #endif /* _ASM_SYSCALL_H */ diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 32a4f95d0bd..a6fc777288a 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -869,6 +869,17 @@ unsigned short css_id(struct cgroup_subsys_state *css); unsigned short css_depth(struct cgroup_subsys_state *css); struct cgroup_subsys_state *cgroup_css_from_dir(struct file *f, int id); +/* + * Default Android check for whether the current process is allowed to move a + * task across cgroups, either because CAP_SYS_NICE is set or because the uid + * of the calling process is the same as the moved task or because we are + * running as root. + * Returns 0 if this is allowed, or -EACCES otherwise. + */ +int subsys_cgroup_allow_attach(struct cgroup *cgrp, + struct cgroup_taskset *tset); + + #else /* !CONFIG_CGROUPS */ static inline int cgroup_init_early(void) { return 0; } @@ -892,6 +903,11 @@ static inline int cgroup_attach_task_all(struct task_struct *from, return 0; } +static inline int subsys_cgroup_allow_attach(struct cgroup *cgrp, + struct cgroup_taskset *tset) +{ + return 0; +} #endif /* !CONFIG_CGROUPS */ #endif /* _LINUX_CGROUP_H */ diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 92669cd182a..fe7a686dfd8 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -298,6 +298,11 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect); # define __same_type(a, b) __builtin_types_compatible_p(typeof(a), typeof(b)) #endif +/* Is this type a native word size -- useful for atomic operations */ +#ifndef __native_word +# define __native_word(t) (sizeof(t) == sizeof(int) || sizeof(t) == sizeof(long)) +#endif + /* Compile time object size, -1 for unknown */ #ifndef __compiletime_object_size # define __compiletime_object_size(obj) -1 @@ -337,6 +342,10 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect); #define compiletime_assert(condition, msg) \ _compiletime_assert(condition, msg, __compiletime_assert_, __LINE__) +#define compiletime_assert_atomic_type(t) \ + compiletime_assert(__native_word(t), \ + "Need native word sized stores/loads for atomicity.") + /* * Prevent the compiler from merging or refetching accesses. The compiler * is also forbidden from reordering successive instances of ACCESS_ONCE(), diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 97e3f0926a4..ebc6306661e 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -41,6 +41,7 @@ struct ipv6_devconf { __s32 accept_source_route; #ifdef CONFIG_IPV6_OPTIMISTIC_DAD __s32 optimistic_dad; + __s32 use_optimistic; #endif #ifdef CONFIG_IPV6_MROUTE __s32 mc_forwarding; diff --git a/include/linux/sched.h b/include/linux/sched.h index c262c4d1000..f2f5979a947 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1143,13 +1143,12 @@ struct task_struct { * execve */ unsigned in_iowait:1; - /* task may not gain privileges */ - unsigned no_new_privs:1; - /* Revert to default priority/policy when forking */ unsigned sched_reset_on_fork:1; unsigned sched_contributes_to_load:1; + unsigned long atomic_flags; /* Flags needing atomic access. */ + pid_t pid; pid_t tgid; @@ -1716,6 +1715,19 @@ static inline void memalloc_noio_restore(unsigned int flags) current->flags = (current->flags & ~PF_MEMALLOC_NOIO) | flags; } +/* Per-process atomic flags. */ +#define PFA_NO_NEW_PRIVS 0x00000001 /* May not gain new privileges. */ + +static inline bool task_no_new_privs(struct task_struct *p) +{ + return test_bit(PFA_NO_NEW_PRIVS, &p->atomic_flags); +} + +static inline void task_set_no_new_privs(struct task_struct *p) +{ + set_bit(PFA_NO_NEW_PRIVS, &p->atomic_flags); +} + /* * task->jobctl flags */ diff --git a/include/linux/seccomp.h b/include/linux/seccomp.h index 6f19cfd1840..9687691799f 100644 --- a/include/linux/seccomp.h +++ b/include/linux/seccomp.h @@ -3,6 +3,8 @@ #include <uapi/linux/seccomp.h> +#define SECCOMP_FILTER_FLAG_MASK (SECCOMP_FILTER_FLAG_TSYNC) + #ifdef CONFIG_SECCOMP #include <linux/thread_info.h> @@ -14,11 +16,11 @@ struct seccomp_filter; * * @mode: indicates one of the valid values above for controlled * system calls available to a process. - * @filter: The metadata and ruleset for determining what system calls - * are allowed for a task. + * @filter: must always point to a valid seccomp-filter or NULL as it is + * accessed without locking during system call entry. * * @filter must only be accessed from the context of current as there - * is no locking. + * is no read locking. */ struct seccomp { int mode; diff --git a/include/linux/suspend.h b/include/linux/suspend.h index d4e3f16d5e8..a34821358ae 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -363,7 +363,7 @@ extern bool pm_wakeup_pending(void); extern bool pm_get_wakeup_count(unsigned int *count, bool block); extern bool pm_save_wakeup_count(unsigned int count); extern void pm_wakep_autosleep_enabled(bool set); - +extern void pm_get_active_wakeup_sources(char *pending_sources, size_t max); static inline void lock_system_sleep(void) { current->flags |= PF_FREEZER_SKIP; diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 84662ecc7b5..4e98d717413 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -846,4 +846,6 @@ asmlinkage long sys_process_vm_writev(pid_t pid, asmlinkage long sys_kcmp(pid_t pid1, pid_t pid2, int type, unsigned long idx1, unsigned long idx2); asmlinkage long sys_finit_module(int fd, const char __user *uargs, int flags); +asmlinkage long sys_seccomp(unsigned int op, unsigned int flags, + const char __user *uargs); #endif diff --git a/include/linux/wakeup_reason.h b/include/linux/wakeup_reason.h index 7ce50f0debc..ad8b76936c7 100644 --- a/include/linux/wakeup_reason.h +++ b/include/linux/wakeup_reason.h @@ -18,6 +18,10 @@ #ifndef _LINUX_WAKEUP_REASON_H #define _LINUX_WAKEUP_REASON_H +#define MAX_SUSPEND_ABORT_LEN 256 + void log_wakeup_reason(int irq); +void log_suspend_abort_reason(const char *fmt, ...); +int check_wakeup_reason(int irq); #endif /* _LINUX_WAKEUP_REASON_H */ diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h index 0cc74c4403e..b422ad5d238 100644 --- a/include/uapi/asm-generic/unistd.h +++ b/include/uapi/asm-generic/unistd.h @@ -692,9 +692,19 @@ __SC_COMP(__NR_process_vm_writev, sys_process_vm_writev, \ __SYSCALL(__NR_kcmp, sys_kcmp) #define __NR_finit_module 273 __SYSCALL(__NR_finit_module, sys_finit_module) +/* Backporting seccomp, skip a few ... + * #define __NR_sched_setattr 274 +__SYSCALL(__NR_sched_setattr, sys_sched_setattr) + * #define __NR_sched_getattr 275 +__SYSCALL(__NR_sched_getattr, sys_sched_getattr) + * #define __NR_renameat2 276 +__SYSCALL(__NR_renameat2, sys_renameat2) + */ +#define __NR_seccomp 277 +__SYSCALL(__NR_seccomp, sys_seccomp) #undef __NR_syscalls -#define __NR_syscalls 274 +#define __NR_syscalls 278 /* * All syscalls below here should go away really, diff --git a/include/uapi/linux/ipv6.h b/include/uapi/linux/ipv6.h index 4214fac1bf4..e9d0f7efde3 100644 --- a/include/uapi/linux/ipv6.h +++ b/include/uapi/linux/ipv6.h @@ -161,6 +161,7 @@ enum { DEVCONF_FORCE_TLLAO, DEVCONF_NDISC_NOTIFY, DEVCONF_ACCEPT_RA_RT_TABLE, + DEVCONF_USE_OPTIMISTIC, DEVCONF_MAX }; diff --git a/include/uapi/linux/seccomp.h b/include/uapi/linux/seccomp.h index ac2dc9f7297..0f238a43ff1 100644 --- a/include/uapi/linux/seccomp.h +++ b/include/uapi/linux/seccomp.h @@ -10,6 +10,13 @@ #define SECCOMP_MODE_STRICT 1 /* uses hard-coded filter. */ #define SECCOMP_MODE_FILTER 2 /* uses user-supplied filter. */ +/* Valid operations for seccomp syscall. */ +#define SECCOMP_SET_MODE_STRICT 0 +#define SECCOMP_SET_MODE_FILTER 1 + +/* Valid flags for SECCOMP_SET_MODE_FILTER */ +#define SECCOMP_FILTER_FLAG_TSYNC 1 + /* * All BPF programs must return a 32-bit value. * The bottom 16-bits are for optional return data. diff --git a/include/uapi/sound/compress_params.h b/include/uapi/sound/compress_params.h index 602dc6c45d1..165e7059de7 100644 --- a/include/uapi/sound/compress_params.h +++ b/include/uapi/sound/compress_params.h @@ -57,6 +57,7 @@ #define MAX_NUM_CODECS 32 #define MAX_NUM_CODEC_DESCRIPTORS 32 #define MAX_NUM_BITRATES 32 +#define MAX_NUM_SAMPLE_RATES 32 /* Codecs are listed linearly to allow for extensibility */ #define SND_AUDIOCODEC_PCM ((__u32) 0x00000001) @@ -324,7 +325,8 @@ union snd_codec_options { /** struct snd_codec_desc - description of codec capabilities * @max_ch: Maximum number of audio channels - * @sample_rates: Sampling rates in Hz, use SNDRV_PCM_RATE_xxx for this + * @sample_rates: Sampling rates in Hz, use values like 48000 for this + * @num_sample_rates: Number of valid values in sample_rates array * @bit_rate: Indexed array containing supported bit rates * @num_bitrates: Number of valid values in bit_rate array * @rate_control: value is specified by SND_RATECONTROLMODE defines. @@ -346,7 +348,8 @@ union snd_codec_options { struct snd_codec_desc { __u32 max_ch; - __u32 sample_rates; + __u32 sample_rates[MAX_NUM_SAMPLE_RATES]; + __u32 num_sample_rates; __u32 bit_rate[MAX_NUM_BITRATES]; __u32 num_bitrates; __u32 rate_control; @@ -364,7 +367,8 @@ struct snd_codec_desc { * @ch_out: Number of output channels. In case of contradiction between * this field and the channelMode field, the channelMode field * overrides. - * @sample_rate: Audio sample rate of input data + * @sample_rate: Audio sample rate of input data in Hz, use values like 48000 + * for this. * @bit_rate: Bitrate of encoded data. May be ignored by decoders * @rate_control: Encoding rate control. See SND_RATECONTROLMODE defines. * Encoders may rely on profiles for quality levels. diff --git a/kernel/cgroup.c b/kernel/cgroup.c index cd1c303214f..e646e870ec5 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -2124,6 +2124,25 @@ static int cgroup_allow_attach(struct cgroup *cgrp, struct cgroup_taskset *tset) return 0; } +int subsys_cgroup_allow_attach(struct cgroup *cgrp, struct cgroup_taskset *tset) +{ + const struct cred *cred = current_cred(), *tcred; + struct task_struct *task; + + if (capable(CAP_SYS_NICE)) + return 0; + + cgroup_taskset_for_each(task, cgrp, tset) { + tcred = __task_cred(task); + + if (current != task && cred->euid != tcred->uid && + cred->euid != tcred->suid) + return -EACCES; + } + + return 0; +} + /* * Find the task_struct of the task to attach by vpid and pass it along to the * function to attach either it or all tasks in its threadgroup. Will lock diff --git a/kernel/fork.c b/kernel/fork.c index ccc4044e6cc..b0663950634 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -327,6 +327,15 @@ static struct task_struct *dup_task_struct(struct task_struct *orig) goto free_ti; tsk->stack = ti; +#ifdef CONFIG_SECCOMP + /* + * We must handle setting up seccomp filters once we're under + * the sighand lock in case orig has changed between now and + * then. Until then, filter must be NULL to avoid messing up + * the usage counts on the error path calling free_task. + */ + tsk->seccomp.filter = NULL; +#endif setup_thread_stack(tsk, orig); clear_user_return_notifier(tsk); @@ -1108,6 +1117,39 @@ static void copy_flags(unsigned long clone_flags, struct task_struct *p) p->flags = new_flags; } +static void copy_seccomp(struct task_struct *p) +{ +#ifdef CONFIG_SECCOMP + /* + * Must be called with sighand->lock held, which is common to + * all threads in the group. Holding cred_guard_mutex is not + * needed because this new task is not yet running and cannot + * be racing exec. + */ + assert_spin_locked(¤t->sighand->siglock); + + /* Ref-count the new filter user, and assign it. */ + get_seccomp_filter(current); + p->seccomp = current->seccomp; + + /* + * Explicitly enable no_new_privs here in case it got set + * between the task_struct being duplicated and holding the + * sighand lock. The seccomp state and nnp must be in sync. + */ + if (task_no_new_privs(current)) + task_set_no_new_privs(p); + + /* + * If the parent gained a seccomp mode after copying thread + * flags and between before we held the sighand lock, we have + * to manually enable the seccomp thread flag here. + */ + if (p->seccomp.mode != SECCOMP_MODE_DISABLED) + set_tsk_thread_flag(p, TIF_SECCOMP); +#endif +} + SYSCALL_DEFINE1(set_tid_address, int __user *, tidptr) { current->clear_child_tid = tidptr; @@ -1212,7 +1254,6 @@ static struct task_struct *copy_process(unsigned long clone_flags, goto fork_out; ftrace_graph_init_task(p); - get_seccomp_filter(p); rt_mutex_init_task(p); @@ -1455,6 +1496,12 @@ static struct task_struct *copy_process(unsigned long clone_flags, spin_lock(¤t->sighand->siglock); /* + * Copy seccomp details explicitly here, in case they were changed + * before holding sighand lock. + */ + copy_seccomp(p); + + /* * Process group and session signals need to be delivered to just the * parent before the fork or both the parent and the child after the * fork. Restart if a signal comes in before we add the new process to diff --git a/kernel/irq/pm.c b/kernel/irq/pm.c index c72b7a43beb..bbb5f65665b 100644 --- a/kernel/irq/pm.c +++ b/kernel/irq/pm.c @@ -10,7 +10,7 @@ #include <linux/module.h> #include <linux/interrupt.h> #include <linux/syscore_ops.h> - +#include <linux/wakeup_reason.h> #include "internals.h" /** @@ -100,11 +100,16 @@ EXPORT_SYMBOL_GPL(resume_device_irqs); int check_wakeup_irqs(void) { struct irq_desc *desc; + char suspend_abort[MAX_SUSPEND_ABORT_LEN]; int irq; for_each_irq_desc(irq, desc) { if (irqd_is_wakeup_set(&desc->irq_data)) { if (desc->istate & IRQS_PENDING) { + log_suspend_abort_reason("Wakeup IRQ %d %s pending", + irq, + desc->action && desc->action->name ? + desc->action->name : ""); pr_info("Wakeup IRQ %d %s pending, suspend aborted\n", irq, desc->action && desc->action->name ? diff --git a/kernel/power/process.c b/kernel/power/process.c index 4ac9ce12679..a7e8a3f1737 100644 --- a/kernel/power/process.c +++ b/kernel/power/process.c @@ -17,7 +17,7 @@ #include <linux/delay.h> #include <linux/workqueue.h> #include <linux/kmod.h> - +#include <linux/wakeup_reason.h> /* * Timeout for stopping processes */ @@ -34,6 +34,7 @@ static int try_to_freeze_tasks(bool user_only) unsigned int elapsed_msecs; bool wakeup = false; int sleep_usecs = USEC_PER_MSEC; + char suspend_abort[MAX_SUSPEND_ABORT_LEN]; do_gettimeofday(&start); @@ -63,6 +64,9 @@ static int try_to_freeze_tasks(bool user_only) break; if (pm_wakeup_pending()) { + pm_get_active_wakeup_sources(suspend_abort, + MAX_SUSPEND_ABORT_LEN); + log_suspend_abort_reason(suspend_abort); wakeup = true; break; } @@ -82,23 +86,24 @@ static int try_to_freeze_tasks(bool user_only) do_div(elapsed_msecs64, NSEC_PER_MSEC); elapsed_msecs = elapsed_msecs64; - if (todo) { + if (wakeup) { + printk("\n"); + printk(KERN_ERR "Freezing of tasks aborted after %d.%03d seconds", + elapsed_msecs / 1000, elapsed_msecs % 1000); + } else if (todo) { printk("\n"); - printk(KERN_ERR "Freezing of tasks %s after %d.%03d seconds " - "(%d tasks refusing to freeze, wq_busy=%d):\n", - wakeup ? "aborted" : "failed", + printk(KERN_ERR "Freezing of tasks failed after %d.%03d seconds" + " (%d tasks refusing to freeze, wq_busy=%d):\n", elapsed_msecs / 1000, elapsed_msecs % 1000, todo - wq_busy, wq_busy); - if (!wakeup) { - read_lock(&tasklist_lock); - do_each_thread(g, p) { - if (p != current && !freezer_should_skip(p) - && freezing(p) && !frozen(p)) - sched_show_task(p); - } while_each_thread(g, p); - read_unlock(&tasklist_lock); - } + read_lock(&tasklist_lock); + do_each_thread(g, p) { + if (p != current && !freezer_should_skip(p) + && freezing(p) && !frozen(p)) + sched_show_task(p); + } while_each_thread(g, p); + read_unlock(&tasklist_lock); } else { printk("(elapsed %d.%03d seconds) ", elapsed_msecs / 1000, elapsed_msecs % 1000); diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index 86e021b76c3..82450c20875 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c @@ -27,6 +27,7 @@ #include <linux/ftrace.h> #include <linux/rtc.h> #include <trace/events/power.h> +#include <linux/wakeup_reason.h> #include "power.h" @@ -139,7 +140,7 @@ static int suspend_prepare(suspend_state_t state) error = suspend_freeze_processes(); if (!error) return 0; - + log_suspend_abort_reason("One or more tasks refusing to freeze"); suspend_stats.failed_freeze++; dpm_save_failed_step(SUSPEND_FREEZE); Finish: @@ -169,7 +170,8 @@ void __attribute__ ((weak)) arch_suspend_enable_irqs(void) */ static int suspend_enter(suspend_state_t state, bool *wakeup) { - int error; + char suspend_abort[MAX_SUSPEND_ABORT_LEN]; + int error, last_dev; if (need_suspend_ops(state) && suspend_ops->prepare) { error = suspend_ops->prepare(); @@ -179,7 +181,11 @@ static int suspend_enter(suspend_state_t state, bool *wakeup) error = dpm_suspend_end(PMSG_SUSPEND); if (error) { + last_dev = suspend_stats.last_failed_dev + REC_FAILED_NUM - 1; + last_dev %= REC_FAILED_NUM; printk(KERN_ERR "PM: Some devices failed to power down\n"); + log_suspend_abort_reason("%s device failed to power down", + suspend_stats.failed_devs[last_dev]); goto Platform_finish; } @@ -204,8 +210,10 @@ static int suspend_enter(suspend_state_t state, bool *wakeup) } error = disable_nonboot_cpus(); - if (error || suspend_test(TEST_CPUS)) + if (error || suspend_test(TEST_CPUS)) { + log_suspend_abort_reason("Disabling non-boot cpus failed"); goto Enable_cpus; + } arch_suspend_disable_irqs(); BUG_ON(!irqs_disabled()); @@ -216,6 +224,10 @@ static int suspend_enter(suspend_state_t state, bool *wakeup) if (!(suspend_test(TEST_CORE) || *wakeup)) { error = suspend_ops->enter(state); events_check_enabled = false; + } else { + pm_get_active_wakeup_sources(suspend_abort, + MAX_SUSPEND_ABORT_LEN); + log_suspend_abort_reason(suspend_abort); } syscore_resume(); } @@ -263,6 +275,7 @@ int suspend_devices_and_enter(suspend_state_t state) error = dpm_suspend_start(PMSG_SUSPEND); if (error) { printk(KERN_ERR "PM: Some devices failed to suspend\n"); + log_suspend_abort_reason("Some devices failed to suspend"); goto Recover_platform; } suspend_test_finish("suspend devices"); diff --git a/kernel/power/wakeup_reason.c b/kernel/power/wakeup_reason.c index 187e4e9105f..085c99edca0 100644 --- a/kernel/power/wakeup_reason.c +++ b/kernel/power/wakeup_reason.c @@ -31,8 +31,10 @@ #define MAX_WAKEUP_REASON_IRQS 32 static int irq_list[MAX_WAKEUP_REASON_IRQS]; static int irqcount; +static bool suspend_abort; +static char abort_reason[MAX_SUSPEND_ABORT_LEN]; static struct kobject *wakeup_reason; -static spinlock_t resume_reason_lock; +static DEFINE_SPINLOCK(resume_reason_lock); static ssize_t last_resume_reason_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) @@ -40,14 +42,18 @@ static ssize_t last_resume_reason_show(struct kobject *kobj, struct kobj_attribu int irq_no, buf_offset = 0; struct irq_desc *desc; spin_lock(&resume_reason_lock); - for (irq_no = 0; irq_no < irqcount; irq_no++) { - desc = irq_to_desc(irq_list[irq_no]); - if (desc && desc->action && desc->action->name) - buf_offset += sprintf(buf + buf_offset, "%d %s\n", - irq_list[irq_no], desc->action->name); - else - buf_offset += sprintf(buf + buf_offset, "%d\n", - irq_list[irq_no]); + if (suspend_abort) { + buf_offset = sprintf(buf, "Abort: %s", abort_reason); + } else { + for (irq_no = 0; irq_no < irqcount; irq_no++) { + desc = irq_to_desc(irq_list[irq_no]); + if (desc && desc->action && desc->action->name) + buf_offset += sprintf(buf + buf_offset, "%d %s\n", + irq_list[irq_no], desc->action->name); + else + buf_offset += sprintf(buf + buf_offset, "%d\n", + irq_list[irq_no]); + } } spin_unlock(&resume_reason_lock); return buf_offset; @@ -89,6 +95,40 @@ void log_wakeup_reason(int irq) spin_unlock(&resume_reason_lock); } +int check_wakeup_reason(int irq) +{ + int irq_no; + int ret = false; + + spin_lock(&resume_reason_lock); + for (irq_no = 0; irq_no < irqcount; irq_no++) + if (irq_list[irq_no] == irq) { + ret = true; + break; + } + spin_unlock(&resume_reason_lock); + return ret; +} + +void log_suspend_abort_reason(const char *fmt, ...) +{ + va_list args; + + spin_lock(&resume_reason_lock); + + //Suspend abort reason has already been logged. + if (suspend_abort) { + spin_unlock(&resume_reason_lock); + return; + } + + suspend_abort = true; + va_start(args, fmt); + snprintf(abort_reason, MAX_SUSPEND_ABORT_LEN, fmt, args); + va_end(args); + spin_unlock(&resume_reason_lock); +} + /* Detects a suspend and clears all the previous wake up reasons*/ static int wakeup_reason_pm_event(struct notifier_block *notifier, unsigned long pm_event, void *unused) @@ -97,6 +137,7 @@ static int wakeup_reason_pm_event(struct notifier_block *notifier, case PM_SUSPEND_PREPARE: spin_lock(&resume_reason_lock); irqcount = 0; + suspend_abort = false; spin_unlock(&resume_reason_lock); break; default: @@ -115,7 +156,7 @@ static struct notifier_block wakeup_reason_pm_notifier_block = { int __init wakeup_reason_init(void) { int retval; - spin_lock_init(&resume_reason_lock); + retval = register_pm_notifier(&wakeup_reason_pm_notifier_block); if (retval) printk(KERN_WARNING "[%s] failed to register PM notifier %d\n", diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 9408d236c78..ea4e780697b 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -7761,23 +7761,6 @@ static void cpu_cgroup_css_offline(struct cgroup *cgrp) sched_offline_group(tg); } -static int -cpu_cgroup_allow_attach(struct cgroup *cgrp, struct cgroup_taskset *tset) -{ - const struct cred *cred = current_cred(), *tcred; - struct task_struct *task; - - cgroup_taskset_for_each(task, cgrp, tset) { - tcred = __task_cred(task); - - if ((current != task) && !capable(CAP_SYS_NICE) && - cred->euid != tcred->uid && cred->euid != tcred->suid) - return -EACCES; - } - - return 0; -} - static int cpu_cgroup_can_attach(struct cgroup *cgrp, struct cgroup_taskset *tset) { @@ -8144,7 +8127,7 @@ struct cgroup_subsys cpu_cgroup_subsys = { .css_offline = cpu_cgroup_css_offline, .can_attach = cpu_cgroup_can_attach, .attach = cpu_cgroup_attach, - .allow_attach = cpu_cgroup_allow_attach, + .allow_attach = subsys_cgroup_allow_attach, .exit = cpu_cgroup_exit, .subsys_id = cpu_cgroup_subsys_id, .base_cftypes = cpu_files, diff --git a/kernel/seccomp.c b/kernel/seccomp.c index b7a10048a32..1fbb1a2bc45 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -18,15 +18,17 @@ #include <linux/compat.h> #include <linux/sched.h> #include <linux/seccomp.h> +#include <linux/slab.h> +#include <linux/syscalls.h> /* #define SECCOMP_DEBUG 1 */ #ifdef CONFIG_SECCOMP_FILTER #include <asm/syscall.h> #include <linux/filter.h> +#include <linux/pid.h> #include <linux/ptrace.h> #include <linux/security.h> -#include <linux/slab.h> #include <linux/tracehook.h> #include <linux/uaccess.h> @@ -95,7 +97,7 @@ u32 seccomp_bpf_load(int off) if (off == BPF_DATA(nr)) return syscall_get_nr(current, regs); if (off == BPF_DATA(arch)) - return syscall_get_arch(current, regs); + return syscall_get_arch(); if (off >= BPF_DATA(args[0]) && off < BPF_DATA(args[6])) { unsigned long value; int arg = (off - BPF_DATA(args[0])) / sizeof(u64); @@ -201,32 +203,170 @@ static int seccomp_check_filter(struct sock_filter *filter, unsigned int flen) */ static u32 seccomp_run_filters(int syscall) { - struct seccomp_filter *f; + struct seccomp_filter *f = ACCESS_ONCE(current->seccomp.filter); u32 ret = SECCOMP_RET_ALLOW; /* Ensure unexpected behavior doesn't result in failing open. */ - if (WARN_ON(current->seccomp.filter == NULL)) + if (unlikely(WARN_ON(f == NULL))) return SECCOMP_RET_KILL; + /* Make sure cross-thread synced filter points somewhere sane. */ + smp_read_barrier_depends(); + /* * All filters in the list are evaluated and the lowest BPF return * value always takes priority (ignoring the DATA). */ - for (f = current->seccomp.filter; f; f = f->prev) { + for (; f; f = f->prev) { u32 cur_ret = sk_run_filter(NULL, f->insns); + if ((cur_ret & SECCOMP_RET_ACTION) < (ret & SECCOMP_RET_ACTION)) ret = cur_ret; } return ret; } +#endif /* CONFIG_SECCOMP_FILTER */ + +static inline bool seccomp_may_assign_mode(unsigned long seccomp_mode) +{ + assert_spin_locked(¤t->sighand->siglock); + + if (current->seccomp.mode && current->seccomp.mode != seccomp_mode) + return false; + + return true; +} + +static inline void seccomp_assign_mode(struct task_struct *task, + unsigned long seccomp_mode) +{ + assert_spin_locked(&task->sighand->siglock); + + task->seccomp.mode = seccomp_mode; + /* + * Make sure TIF_SECCOMP cannot be set before the mode (and + * filter) is set. + */ + smp_mb(); + set_tsk_thread_flag(task, TIF_SECCOMP); +} + +#ifdef CONFIG_SECCOMP_FILTER +/* Returns 1 if the parent is an ancestor of the child. */ +static int is_ancestor(struct seccomp_filter *parent, + struct seccomp_filter *child) +{ + /* NULL is the root ancestor. */ + if (parent == NULL) + return 1; + for (; child; child = child->prev) + if (child == parent) + return 1; + return 0; +} /** - * seccomp_attach_filter: Attaches a seccomp filter to current. + * seccomp_can_sync_threads: checks if all threads can be synchronized + * + * Expects sighand and cred_guard_mutex locks to be held. + * + * Returns 0 on success, -ve on error, or the pid of a thread which was + * either not in the correct seccomp mode or it did not have an ancestral + * seccomp filter. + */ +static inline pid_t seccomp_can_sync_threads(void) +{ + struct task_struct *thread, *caller; + + BUG_ON(!mutex_is_locked(¤t->signal->cred_guard_mutex)); + assert_spin_locked(¤t->sighand->siglock); + + /* Validate all threads being eligible for synchronization. */ + caller = current; + for_each_thread(caller, thread) { + pid_t failed; + + /* Skip current, since it is initiating the sync. */ + if (thread == caller) + continue; + + if (thread->seccomp.mode == SECCOMP_MODE_DISABLED || + (thread->seccomp.mode == SECCOMP_MODE_FILTER && + is_ancestor(thread->seccomp.filter, + caller->seccomp.filter))) + continue; + + /* Return the first thread that cannot be synchronized. */ + failed = task_pid_vnr(thread); + /* If the pid cannot be resolved, then return -ESRCH */ + if (unlikely(WARN_ON(failed == 0))) + failed = -ESRCH; + return failed; + } + + return 0; +} + +/** + * seccomp_sync_threads: sets all threads to use current's filter + * + * Expects sighand and cred_guard_mutex locks to be held, and for + * seccomp_can_sync_threads() to have returned success already + * without dropping the locks. + * + */ +static inline void seccomp_sync_threads(void) +{ + struct task_struct *thread, *caller; + + BUG_ON(!mutex_is_locked(¤t->signal->cred_guard_mutex)); + assert_spin_locked(¤t->sighand->siglock); + + /* Synchronize all threads. */ + caller = current; + for_each_thread(caller, thread) { + /* Skip current, since it needs no changes. */ + if (thread == caller) + continue; + + /* Get a task reference for the new leaf node. */ + get_seccomp_filter(caller); + /* + * Drop the task reference to the shared ancestor since + * current's path will hold a reference. (This also + * allows a put before the assignment.) + */ + put_seccomp_filter(thread); + smp_store_release(&thread->seccomp.filter, + caller->seccomp.filter); + /* + * Opt the other thread into seccomp if needed. + * As threads are considered to be trust-realm + * equivalent (see ptrace_may_access), it is safe to + * allow one thread to transition the other. + */ + if (thread->seccomp.mode == SECCOMP_MODE_DISABLED) { + /* + * Don't let an unprivileged task work around + * the no_new_privs restriction by creating + * a thread that sets it up, enters seccomp, + * then dies. + */ + if (task_no_new_privs(caller)) + task_set_no_new_privs(thread); + + seccomp_assign_mode(thread, SECCOMP_MODE_FILTER); + } + } +} + +/** + * seccomp_prepare_filter: Prepares a seccomp filter for use. * @fprog: BPF program to install * - * Returns 0 on success or an errno on failure. + * Returns filter on success or an ERR_PTR on failure. */ -static long seccomp_attach_filter(struct sock_fprog *fprog) +static struct seccomp_filter *seccomp_prepare_filter(struct sock_fprog *fprog) { struct seccomp_filter *filter; unsigned long fp_size = fprog->len * sizeof(struct sock_filter); @@ -234,12 +374,13 @@ static long seccomp_attach_filter(struct sock_fprog *fprog) long ret; if (fprog->len == 0 || fprog->len > BPF_MAXINSNS) - return -EINVAL; + return ERR_PTR(-EINVAL); + BUG_ON(INT_MAX / fprog->len < sizeof(struct sock_filter)); for (filter = current->seccomp.filter; filter; filter = filter->prev) total_insns += filter->len + 4; /* include a 4 instr penalty */ if (total_insns > MAX_INSNS_PER_PATH) - return -ENOMEM; + return ERR_PTR(-ENOMEM); /* * Installing a seccomp filter requires that the task have @@ -247,16 +388,16 @@ static long seccomp_attach_filter(struct sock_fprog *fprog) * This avoids scenarios where unprivileged tasks can affect the * behavior of privileged children. */ - if (!current->no_new_privs && + if (!task_no_new_privs(current) && security_capable_noaudit(current_cred(), current_user_ns(), CAP_SYS_ADMIN) != 0) - return -EACCES; + return ERR_PTR(-EACCES); /* Allocate a new seccomp_filter */ filter = kzalloc(sizeof(struct seccomp_filter) + fp_size, GFP_KERNEL|__GFP_NOWARN); if (!filter) - return -ENOMEM; + return ERR_PTR(-ENOMEM);; atomic_set(&filter->usage, 1); filter->len = fprog->len; @@ -275,28 +416,24 @@ static long seccomp_attach_filter(struct sock_fprog *fprog) if (ret) goto fail; - /* - * If there is an existing filter, make it the prev and don't drop its - * task reference. - */ - filter->prev = current->seccomp.filter; - current->seccomp.filter = filter; - return 0; + return filter; + fail: kfree(filter); - return ret; + return ERR_PTR(ret); } /** - * seccomp_attach_user_filter - attaches a user-supplied sock_fprog + * seccomp_prepare_user_filter - prepares a user-supplied sock_fprog * @user_filter: pointer to the user data containing a sock_fprog. * * Returns 0 on success and non-zero otherwise. */ -long seccomp_attach_user_filter(char __user *user_filter) +static struct seccomp_filter * +seccomp_prepare_user_filter(const char __user *user_filter) { struct sock_fprog fprog; - long ret = -EFAULT; + struct seccomp_filter *filter = ERR_PTR(-EFAULT); #ifdef CONFIG_COMPAT if (is_compat_task()) { @@ -309,9 +446,56 @@ long seccomp_attach_user_filter(char __user *user_filter) #endif if (copy_from_user(&fprog, user_filter, sizeof(fprog))) goto out; - ret = seccomp_attach_filter(&fprog); + filter = seccomp_prepare_filter(&fprog); out: - return ret; + return filter; +} + +/** + * seccomp_attach_filter: validate and attach filter + * @flags: flags to change filter behavior + * @filter: seccomp filter to add to the current process + * + * Caller must be holding current->sighand->siglock lock. + * + * Returns 0 on success, -ve on error. + */ +static long seccomp_attach_filter(unsigned int flags, + struct seccomp_filter *filter) +{ + unsigned long total_insns; + struct seccomp_filter *walker; + + assert_spin_locked(¤t->sighand->siglock); + + /* Validate resulting filter length. */ + total_insns = filter->len; + for (walker = current->seccomp.filter; walker; walker = walker->prev) + total_insns += walker->len + 4; /* 4 instr penalty */ + if (total_insns > MAX_INSNS_PER_PATH) + return -ENOMEM; + + /* If thread sync has been requested, check that it is possible. */ + if (flags & SECCOMP_FILTER_FLAG_TSYNC) { + int ret; + + ret = seccomp_can_sync_threads(); + if (ret) + return ret; + } + + /* + * If there is an existing filter, make it the prev and don't drop its + * task reference. + */ + filter->prev = current->seccomp.filter; + current->seccomp.filter = filter; + + /* Now that the new filter is in place, synchronize to all threads. */ + if (flags & SECCOMP_FILTER_FLAG_TSYNC) + seccomp_sync_threads(); + + return 0; } /* get_seccomp_filter - increments the reference count of the filter on @tsk */ @@ -324,6 +508,13 @@ void get_seccomp_filter(struct task_struct *tsk) atomic_inc(&orig->usage); } +static inline void seccomp_filter_free(struct seccomp_filter *filter) +{ + if (filter) { + kfree(filter); + } +} + /* put_seccomp_filter - decrements the ref count of tsk->seccomp.filter */ void put_seccomp_filter(struct task_struct *tsk) { @@ -332,7 +523,7 @@ void put_seccomp_filter(struct task_struct *tsk) while (orig && atomic_dec_and_test(&orig->usage)) { struct seccomp_filter *freeme = orig; orig = orig->prev; - kfree(freeme); + seccomp_filter_free(freeme); } } @@ -351,7 +542,7 @@ static void seccomp_send_sigsys(int syscall, int reason) info.si_code = SYS_SECCOMP; info.si_call_addr = (void __user *)KSTK_EIP(current); info.si_errno = reason; - info.si_arch = syscall_get_arch(current, task_pt_regs(current)); + info.si_arch = syscall_get_arch(); info.si_syscall = syscall; force_sig_info(SIGSYS, &info, current); } @@ -376,12 +567,17 @@ static int mode1_syscalls_32[] = { int __secure_computing(int this_syscall) { - int mode = current->seccomp.mode; int exit_sig = 0; int *syscall; u32 ret; - switch (mode) { + /* + * Make sure that any changes to mode from another thread have + * been seen after TIF_SECCOMP was seen. + */ + rmb(); + + switch (current->seccomp.mode) { case SECCOMP_MODE_STRICT: syscall = mode1_syscalls; #ifdef CONFIG_COMPAT @@ -467,47 +663,152 @@ long prctl_get_seccomp(void) } /** - * prctl_set_seccomp: configures current->seccomp.mode - * @seccomp_mode: requested mode to use - * @filter: optional struct sock_fprog for use with SECCOMP_MODE_FILTER + * seccomp_set_mode_strict: internal function for setting strict seccomp + * + * Once current->seccomp.mode is non-zero, it may not be changed. + * + * Returns 0 on success or -EINVAL on failure. + */ +static long seccomp_set_mode_strict(void) +{ + const unsigned long seccomp_mode = SECCOMP_MODE_STRICT; + long ret = -EINVAL; + + spin_lock_irq(¤t->sighand->siglock); + + if (!seccomp_may_assign_mode(seccomp_mode)) + goto out; + +#ifdef TIF_NOTSC + disable_TSC(); +#endif + seccomp_assign_mode(current, seccomp_mode); + ret = 0; + +out: + spin_unlock_irq(¤t->sighand->siglock); + + return ret; +} + +#ifdef CONFIG_SECCOMP_FILTER +/** + * seccomp_set_mode_filter: internal function for setting seccomp filter + * @flags: flags to change filter behavior + * @filter: struct sock_fprog containing filter * - * This function may be called repeatedly with a @seccomp_mode of - * SECCOMP_MODE_FILTER to install additional filters. Every filter - * successfully installed will be evaluated (in reverse order) for each system - * call the task makes. + * This function may be called repeatedly to install additional filters. + * Every filter successfully installed will be evaluated (in reverse order) + * for each system call the task makes. * * Once current->seccomp.mode is non-zero, it may not be changed. * * Returns 0 on success or -EINVAL on failure. */ -long prctl_set_seccomp(unsigned long seccomp_mode, char __user *filter) +static long seccomp_set_mode_filter(unsigned int flags, + const char __user *filter) { + const unsigned long seccomp_mode = SECCOMP_MODE_FILTER; + struct seccomp_filter *prepared = NULL; long ret = -EINVAL; - if (current->seccomp.mode && - current->seccomp.mode != seccomp_mode) + /* Validate flags. */ + if (flags & ~SECCOMP_FILTER_FLAG_MASK) + return -EINVAL; + + /* Prepare the new filter before holding any locks. */ + prepared = seccomp_prepare_user_filter(filter); + if (IS_ERR(prepared)) + return PTR_ERR(prepared); + + /* + * Make sure we cannot change seccomp or nnp state via TSYNC + * while another thread is in the middle of calling exec. + */ + if (flags & SECCOMP_FILTER_FLAG_TSYNC && + mutex_lock_killable(¤t->signal->cred_guard_mutex)) + goto out_free; + + spin_lock_irq(¤t->sighand->siglock); + + if (!seccomp_may_assign_mode(seccomp_mode)) + goto out; + + ret = seccomp_attach_filter(flags, prepared); + if (ret) goto out; + /* Do not free the successfully attached filter. */ + prepared = NULL; + + seccomp_assign_mode(current, seccomp_mode); +out: + spin_unlock_irq(¤t->sighand->siglock); + if (flags & SECCOMP_FILTER_FLAG_TSYNC) + mutex_unlock(¤t->signal->cred_guard_mutex); +out_free: + seccomp_filter_free(prepared); + return ret; +} +#else +static inline long seccomp_set_mode_filter(unsigned int flags, + const char __user *filter) +{ + return -EINVAL; +} +#endif + +/* Common entry point for both prctl and syscall. */ +static long do_seccomp(unsigned int op, unsigned int flags, + const char __user *uargs) +{ + switch (op) { + case SECCOMP_SET_MODE_STRICT: + if (flags != 0 || uargs != NULL) + return -EINVAL; + return seccomp_set_mode_strict(); + case SECCOMP_SET_MODE_FILTER: + return seccomp_set_mode_filter(flags, uargs); + default: + return -EINVAL; + } +} + +SYSCALL_DEFINE3(seccomp, unsigned int, op, unsigned int, flags, + const char __user *, uargs) +{ + return do_seccomp(op, flags, uargs); +} + +/** + * prctl_set_seccomp: configures current->seccomp.mode + * @seccomp_mode: requested mode to use + * @filter: optional struct sock_fprog for use with SECCOMP_MODE_FILTER + * + * Returns 0 on success or -EINVAL on failure. + */ +long prctl_set_seccomp(unsigned long seccomp_mode, char __user *filter) +{ + unsigned int op; + char __user *uargs; switch (seccomp_mode) { case SECCOMP_MODE_STRICT: - ret = 0; -#ifdef TIF_NOTSC - disable_TSC(); -#endif + op = SECCOMP_SET_MODE_STRICT; + /* + * Setting strict mode through prctl always ignored filter, + * so make sure it is always NULL here to pass the internal + * check in do_seccomp(). + */ + uargs = NULL; break; -#ifdef CONFIG_SECCOMP_FILTER case SECCOMP_MODE_FILTER: - ret = seccomp_attach_user_filter(filter); - if (ret) - goto out; + op = SECCOMP_SET_MODE_FILTER; + uargs = filter; break; -#endif default: - goto out; + return -EINVAL; } - current->seccomp.mode = seccomp_mode; - set_thread_flag(TIF_SECCOMP); -out: - return ret; + /* prctl interface doesn't have flags, so they are always zero. */ + return do_seccomp(op, 0, uargs); } diff --git a/kernel/sys.c b/kernel/sys.c index 65d3e55bd28..0b08c9f000f 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -2427,12 +2427,12 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, if (arg2 != 1 || arg3 || arg4 || arg5) return -EINVAL; - current->no_new_privs = 1; + task_set_no_new_privs(current); break; case PR_GET_NO_NEW_PRIVS: if (arg2 || arg3 || arg4 || arg5) return -EINVAL; - return current->no_new_privs ? 1 : 0; + return task_no_new_privs(current) ? 1 : 0; case PR_SET_VMA: error = prctl_set_vma(arg2, arg3, arg4, arg5); break; diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index 7078052284f..7e7fc0a082c 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c @@ -209,3 +209,6 @@ cond_syscall(compat_sys_open_by_handle_at); /* compare kernel pointers */ cond_syscall(sys_kcmp); + +/* operate on Secure Computing state */ +cond_syscall(sys_seccomp); diff --git a/mm/memcontrol.c b/mm/memcontrol.c index f45e21ab9ce..3cc944598f5 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -6763,6 +6763,12 @@ static int mem_cgroup_can_attach(struct cgroup *cgroup, return ret; } +static int mem_cgroup_allow_attach(struct cgroup *cgroup, + struct cgroup_taskset *tset) +{ + return subsys_cgroup_allow_attach(cgroup, tset); +} + static void mem_cgroup_cancel_attach(struct cgroup *cgroup, struct cgroup_taskset *tset) { @@ -6931,6 +6937,11 @@ static int mem_cgroup_can_attach(struct cgroup *cgroup, { return 0; } +static int mem_cgroup_allow_attach(struct cgroup *cgroup, + struct cgroup_taskset *tset) +{ + return 0; +} static void mem_cgroup_cancel_attach(struct cgroup *cgroup, struct cgroup_taskset *tset) { @@ -6966,6 +6977,7 @@ struct cgroup_subsys mem_cgroup_subsys = { .can_attach = mem_cgroup_can_attach, .cancel_attach = mem_cgroup_cancel_attach, .attach = mem_cgroup_move_task, + .allow_attach = mem_cgroup_allow_attach, .bind = mem_cgroup_bind, .base_cftypes = mem_cgroup_files, .early_init = 0, diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index c8a2371d041..08b13803d61 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1175,6 +1175,9 @@ enum { #endif IPV6_SADDR_RULE_ORCHID, IPV6_SADDR_RULE_PREFIX, +#ifdef CONFIG_IPV6_OPTIMISTIC_DAD + IPV6_SADDR_RULE_NOT_OPTIMISTIC, +#endif IPV6_SADDR_RULE_MAX }; @@ -1202,6 +1205,15 @@ static inline int ipv6_saddr_preferred(int type) return 0; } +static inline bool ipv6_use_optimistic_addr(struct inet6_dev *idev) +{ +#ifdef CONFIG_IPV6_OPTIMISTIC_DAD + return idev && idev->cnf.optimistic_dad && idev->cnf.use_optimistic; +#else + return false; +#endif +} + static int ipv6_get_saddr_eval(struct net *net, struct ipv6_saddr_score *score, struct ipv6_saddr_dst *dst, @@ -1262,10 +1274,16 @@ static int ipv6_get_saddr_eval(struct net *net, score->scopedist = ret; break; case IPV6_SADDR_RULE_PREFERRED: + { /* Rule 3: Avoid deprecated and optimistic addresses */ + u8 avoid = IFA_F_DEPRECATED; + + if (!ipv6_use_optimistic_addr(score->ifa->idev)) + avoid |= IFA_F_OPTIMISTIC; ret = ipv6_saddr_preferred(score->addr_type) || - !(score->ifa->flags & (IFA_F_DEPRECATED|IFA_F_OPTIMISTIC)); + !(score->ifa->flags & avoid); break; + } #ifdef CONFIG_IPV6_MIP6 case IPV6_SADDR_RULE_HOA: { @@ -1313,6 +1331,14 @@ static int ipv6_get_saddr_eval(struct net *net, ret = score->ifa->prefix_len; score->matchlen = ret; break; +#ifdef CONFIG_IPV6_OPTIMISTIC_DAD + case IPV6_SADDR_RULE_NOT_OPTIMISTIC: + /* Optimistic addresses still have lower precedence than other + * preferred addresses. + */ + ret = !(score->ifa->flags & IFA_F_OPTIMISTIC); + break; +#endif default: ret = 0; } @@ -3291,8 +3317,15 @@ static void addrconf_dad_start(struct inet6_ifaddr *ifp) * Optimistic nodes can start receiving * Frames right away */ - if (ifp->flags & IFA_F_OPTIMISTIC) + if (ifp->flags & IFA_F_OPTIMISTIC) { ip6_ins_rt(ifp->rt); + if (ipv6_use_optimistic_addr(idev)) { + /* Because optimistic nodes can use this address, + * notify listeners. If DAD fails, RTM_DELADDR is sent. + */ + ipv6_ifa_notify(RTM_NEWADDR, ifp); + } + } addrconf_dad_kick(ifp); out: @@ -4238,6 +4271,7 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf, array[DEVCONF_ACCEPT_SOURCE_ROUTE] = cnf->accept_source_route; #ifdef CONFIG_IPV6_OPTIMISTIC_DAD array[DEVCONF_OPTIMISTIC_DAD] = cnf->optimistic_dad; + array[DEVCONF_USE_OPTIMISTIC] = cnf->use_optimistic; #endif #ifdef CONFIG_IPV6_MROUTE array[DEVCONF_MC_FORWARDING] = cnf->mc_forwarding; @@ -4972,6 +5006,14 @@ static struct addrconf_sysctl_table .proc_handler = proc_dointvec, }, + { + .procname = "use_optimistic", + .data = &ipv6_devconf.use_optimistic, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + + }, #endif #ifdef CONFIG_IPV6_MROUTE { diff --git a/security/apparmor/domain.c b/security/apparmor/domain.c index 859abdaac1e..9aaa4e72cc1 100644 --- a/security/apparmor/domain.c +++ b/security/apparmor/domain.c @@ -629,7 +629,7 @@ int aa_change_hat(const char *hats[], int count, u64 token, bool permtest) * There is no exception for unconfined as change_hat is not * available. */ - if (current->no_new_privs) + if (task_no_new_privs(current)) return -EPERM; /* released below */ @@ -780,7 +780,7 @@ int aa_change_profile(const char *ns_name, const char *hname, bool onexec, * no_new_privs is set because this aways results in a reduction * of permissions. */ - if (current->no_new_privs && !unconfined(profile)) { + if (task_no_new_privs(current) && !unconfined(profile)) { put_cred(cred); return -EPERM; } diff --git a/sound/core/compress_offload.c b/sound/core/compress_offload.c index 3fdf998ad05..0d430adee88 100644 --- a/sound/core/compress_offload.c +++ b/sound/core/compress_offload.c @@ -490,9 +490,6 @@ static int snd_compress_check_input(struct snd_compr_params *params) if (params->codec.ch_in == 0 || params->codec.ch_out == 0) return -EINVAL; - if (!(params->codec.sample_rate & SNDRV_PCM_RATE_8000_192000)) - return -EINVAL; - return 0; } |