diff options
author | Andrew G. Morgan <morgan@kernel.org> | 2021-01-23 17:26:04 -0800 |
---|---|---|
committer | Andrew G. Morgan <morgan@kernel.org> | 2021-01-23 17:42:09 -0800 |
commit | 1377a81281a43369b88248abfdeda33ca0fc442e (patch) | |
tree | f8b346eca1f1326e1860a824e461857e5e314e2e | |
parent | 4e966462eacbd2256032fcfab9d104a8f2cfc378 (diff) | |
download | libcap-1377a81281a43369b88248abfdeda33ca0fc442e.tar.gz |
Capture a standalone example of using psx with PR_SET_NO_NEW_PRIVS.
This also includes some seccomp in-lined code demonstrations.
Signed-off-by: Andrew G. Morgan <morgan@kernel.org>
-rw-r--r-- | contrib/seccomp/explore.go | 276 | ||||
-rw-r--r-- | contrib/seccomp/go.mod | 5 |
2 files changed, 281 insertions, 0 deletions
diff --git a/contrib/seccomp/explore.go b/contrib/seccomp/explore.go new file mode 100644 index 0000000..b8249e0 --- /dev/null +++ b/contrib/seccomp/explore.go @@ -0,0 +1,276 @@ +// Program explore is evolved from the code discussed in more depth +// here: +// +// https://github.com/golang/go/issues/3405 +// +// The code here demonstrates that while PR_SET_NO_NEW_PRIVS only +// applies to the calling thread, since +// https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=103502a35cfce0710909da874f092cb44823ca03 +// the seccomp filter application forces the setting to be mirrored on +// all the threads of a process. +// +// Based on the command line options, we can manipulate the program to +// behave in various ways. Example command lines: +// +// sudo ./explore +// sudo ./explore --kill=false +// sudo ./explore --kill=false --errno=0 +// +// Supported Go toolchains are after go1.10. Those prior to go1.15 +// require this environment variable to be set to build successfully: +// +// export CGO_LDFLAGS_ALLOW="-Wl,-?-wrap[=,][^-.@][^,]*" +// +// Go toolchains go1.16+ can be compiled CGO_ENABLED=0 too, +// demonstrating native nocgo support for seccomp features. +package main + +import ( + "flag" + "fmt" + "log" + "runtime" + "syscall" + "time" + "unsafe" + + "kernel.org/pub/linux/libs/security/libcap/psx" +) + +var ( + withPSX = flag.Bool("psx", false, "use the psx mechanism to invoke prctl syscall") + delays = flag.Bool("delays", false, "use this to pause the program at various places") + kill = flag.Bool("kill", true, "kill the process if setuid attempted") + errno = flag.Int("errno", int(syscall.ENOTSUP), "if kill is false, block syscall and return this errno") +) + +const ( + PR_SET_NO_NEW_PRIVS = 38 + + SYS_SECCOMP = 317 // x86_64 syscall number + SECCOMP_SET_MODE_FILTER = 1 // uses user-supplied filter. + SECCOMP_FILTER_FLAG_TSYNC = (1 << 0) // mirror filtering on all threads. + SECCOMP_RET_ERRNO = 0x00050000 // returns an errno + SECCOMP_RET_DATA = 0x0000ffff // mask for RET data payload (ex. errno) + SECCOMP_RET_KILL_PROCESS = 0x80000000 // kill the whole process immediately + SECCOMP_RET_TRAP = 0x00030000 // disallow and force a SIGSYS + SECCOMP_RET_ALLOW = 0x7fff0000 + + BPF_LD = 0x00 + BPF_JMP = 0x05 + BPF_RET = 0x06 + + BPF_W = 0x00 + + BPF_ABS = 0x20 + BPF_JEQ = 0x10 + + BPF_K = 0x00 + + AUDIT_ARCH_X86_64 = 3221225534 // HACK: I don't understand this value + ARCH_NR = AUDIT_ARCH_X86_64 + + syscall_nr = 0 +) + +// SockFilter is a single filter block. +type SockFilter struct { + // Code is the filter code instruction. + Code uint16 + // Jt is the target for a true result from the code execution. + Jt uint8 + // Jf is the target for a false result from the code execution. + Jf uint8 + // K is a generic multiuse field + K uint32 +} + +// SockFProg is a +type SockFProg struct { + // Len is the number of contiguous SockFilter blocks that can + // be found at *Filter. + Len uint16 + // Filter is the address of the first SockFilter block of a + // program sequence. + Filter *SockFilter +} + +type SockFilterSlice []SockFilter + +func BPF_STMT(code uint16, k uint32) SockFilter { + return SockFilter{code, 0, 0, k} +} + +func BPF_JUMP(code uint16, k uint32, jt uint8, jf uint8) SockFilter { + return SockFilter{code, jt, jf, k} +} + +func ValidateArchitecture() []SockFilter { + return []SockFilter{ + BPF_STMT(BPF_LD+BPF_W+BPF_ABS, 4), // HACK: I don't understand this 4. + BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, ARCH_NR, 1, 0), + BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_KILL_PROCESS), + } +} + +func ExamineSyscall() []SockFilter { + return []SockFilter{ + BPF_STMT(BPF_LD+BPF_W+BPF_ABS, syscall_nr), + } +} + +func AllowSyscall(syscallNum uint32) []SockFilter { + return []SockFilter{ + BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, syscallNum, 0, 1), + BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW), + } +} + +func DisallowSyscall(syscallNum, errno uint32) []SockFilter { + return []SockFilter{ + BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, syscallNum, 0, 1), + BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ERRNO|(errno&SECCOMP_RET_DATA)), + } +} + +func KillProcess() []SockFilter { + return []SockFilter{ + BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_KILL_PROCESS), + } +} + +func NotifyProcessAndDie() []SockFilter { + return []SockFilter{ + BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_TRAP), + } +} + +func TrapOnSyscall(syscallNum uint32) []SockFilter { + return []SockFilter{ + BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, syscallNum, 0, 1), + BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_TRAP), + } +} + +func AllGood() []SockFilter { + return []SockFilter{ + BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW), + } +} + +// prctl executes the prctl - unless the --psx commandline argument is +// used, this is on a single thread. +//go:uintptrescapes +func prctl(option, arg1, arg2, arg3, arg4, arg5 uintptr) error { + var e syscall.Errno + if *withPSX { + _, _, e = psx.Syscall6(syscall.SYS_PRCTL, option, arg1, arg2, arg3, arg4, arg5) + } else { + _, _, e = syscall.RawSyscall6(syscall.SYS_PRCTL, option, arg1, arg2, arg3, arg4, arg5) + } + if e != 0 { + return e + } + if *delays { + fmt.Println("prctl'd - check now") + time.Sleep(1 * time.Minute) + } + return nil +} + +// seccomp_set_mode_filter is our wrapper for performing our seccomp system call. +//go:uintptrescapes +func seccomp_set_mode_filter(prog *SockFProg) error { + if _, _, e := syscall.RawSyscall(SYS_SECCOMP, SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, uintptr(unsafe.Pointer(prog))); e != 0 { + return e + } + return nil +} + +var empty func() + +func lockProcessThread(pick bool) { + // Make sure we are + pid := uintptr(syscall.Getpid()) + runtime.LockOSThread() + for { + tid, _, _ := syscall.RawSyscall(syscall.SYS_GETTID, 0, 0, 0) + if (tid == pid) == pick { + fmt.Println("validated TID:", tid, "== PID:", pid, "is", pick) + break + } + runtime.UnlockOSThread() + go func() { + time.Sleep(1 * time.Microsecond) + }() + runtime.Gosched() + runtime.LockOSThread() + } +} + +// applyPolicy uploads the program sequence. +func applyPolicy(prog *SockFProg) { + // Without PSX we can't guarantee the thread we execute the + // seccomp call on will be the same one that we disabled new + // privs on. With PSX, the disabling of new privs is mirrored + // on all threads. + if !*withPSX { + lockProcessThread(false) + defer runtime.UnlockOSThread() + } + + // This is required to load a filter without privilege. + if err := prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0, 0); err != nil { + log.Fatalf("Prctl(PR_SET_NO_NEW_PRIVS): %v", err) + } + + fmt.Println("Applying syscall policy...") + if err := seccomp_set_mode_filter(prog); err != nil { + log.Fatalf("seccomp_set_mode_filter: %v", err) + } + fmt.Println("...Policy applied") +} + +func main() { + flag.Parse() + + if *delays { + fmt.Println("check first", syscall.Getpid()) + time.Sleep(60 * time.Second) + } + + var filter []SockFilter + filter = append(filter, ValidateArchitecture()...) + + // Grab the system call number. + filter = append(filter, ExamineSyscall()...) + + // List disallowed syscalls. + for _, x := range []uint32{ + syscall.SYS_SETUID, + } { + if *kill { + filter = append(filter, TrapOnSyscall(x)...) + } else { + filter = append(filter, DisallowSyscall(x, uint32(*errno))...) + } + } + + filter = append(filter, AllGood()...) + + prog := &SockFProg{ + Len: uint16(len(filter)), + Filter: &filter[0], + } + + applyPolicy(prog) + + // Ensure we are running on the TID=PID. + lockProcessThread(true) + + log.Print("Now it is time to try to run something privileged...") + if _, _, e := syscall.RawSyscall(syscall.SYS_SETUID, 1, 0, 0); e != 0 { + log.Fatalf("setuid failed with an error: %v", e) + } + log.Print("Looked like that worked, but it really didn't: uid == ", syscall.Getuid(), " != 1") +} diff --git a/contrib/seccomp/go.mod b/contrib/seccomp/go.mod new file mode 100644 index 0000000..182e4e7 --- /dev/null +++ b/contrib/seccomp/go.mod @@ -0,0 +1,5 @@ +module explore + +go 1.14 + +require kernel.org/pub/linux/libs/security/libcap/psx v0.2.46 |