diff options
author | Haibo Huang <hhb@google.com> | 2021-01-26 20:37:45 +0000 |
---|---|---|
committer | Automerger Merge Worker <android-build-automerger-merge-worker@system.gserviceaccount.com> | 2021-01-26 20:37:45 +0000 |
commit | c98ec7111e582b76d9f507b9a17db8b6a3ee278f (patch) | |
tree | 7225b3cdde2611d6f072cd157620b42193b4a8bc | |
parent | 0bb0681c82c22447ec2a8843971a25ab5377a8f5 (diff) | |
parent | 47fac93ec338499b9d6262521aa23e391b671861 (diff) | |
download | libcap-c98ec7111e582b76d9f507b9a17db8b6a3ee278f.tar.gz |
Upgrade libcap to libcap-2.47 am: 47fac93ec3
Original change: https://android-review.googlesource.com/c/platform/external/libcap/+/1559975
MUST ONLY BE SUBMITTED BY AUTOMERGER
Change-Id: Ief9a4220386082cedca6883a17ab07a49dcfcebb
46 files changed, 1135 insertions, 565 deletions
@@ -5,11 +5,11 @@ third_party { type: GIT value: "https://git.kernel.org/pub/scm/linux/kernel/git/morgan/libcap.git" } - version: "libcap-2.45" + version: "libcap-2.47" license_type: NOTICE last_upgrade_date { - year: 2020 - month: 11 - day: 2 + year: 2021 + month: 1 + day: 25 } } @@ -1,7 +1,7 @@ # Common version number defines for libcap LIBTITLE=libcap VERSION=2 -MINOR=45 +MINOR=47 # ## Optional prefixes: @@ -43,9 +43,9 @@ LIBDIR=$(lib_prefix)/$(lib) PKGCONFIGDIR=$(LIBDIR)/pkgconfig GOPKGDIR=$(prefix)/share/gocode/src -# Go modules have their own semantics. I plan to leave this value at 0 -# and keep it there. The Go packages should always remain backwardly -# compatible, but I may have to up it if Go's syntax changes in a +# Once go1.16 is released, I plan to set this value to 1 and keep it +# there. The Go packages should always remain backwardly compatible, +# but I may have to up it if Go's syntax dramatically changes in a # backwards incompatible manner. (Let's hope not.) GOMAJOR=0 @@ -113,10 +113,6 @@ CGO_CFLAGS := -I$(topdir)/libcap/include CGO_LDFLAGS := -L$(topdir)/libcap CGO_LDFLAGS_ALLOW := -Wl,-?-wrap[=,][^-.@][^,]* CGO_REQUIRED=$(shell $(topdir)/go/cgo-required.sh) -ifeq ($(CGO_REQUIRED),0) -# Hopefully this will not be needed at some point. -GOBUILDTAG=-tags allthreadssyscall -endif endif endif @@ -35,7 +35,7 @@ distclean: clean for x in $$(find . -name go.mod); do grep -F -v "module" $$x | fgrep "kernel.org/pub/linux/libs/security/libcap" > /dev/null || continue ; grep -F "v$(GOMAJOR).$(VERSION).$(MINOR)" $$x > /dev/null && continue ; echo "$$x is not updated to v$(GOMAJOR).$(VERSION).$(MINOR)" ; exit 1 ; done @echo "ALL go.mod files updated" @echo "Now validate that everything is checked in to a clean tree.." - test -z "$$(git status -s)" + test -z "$$(git status --ignored -s)" @echo "All good!" release: distclean @@ -42,17 +42,14 @@ // uniformly over the whole Go (and CGo linked) process runtime. // // Note, if the Go runtime syscall interface contains the Linux -// variant syscall.AllThreadsSyscall() API (it is not in go1.15 -// for example, but see https://github.com/golang/go/issues/1435 for -// current status) then this present package can use that to invoke -// Capability setting system calls in pure Go binaries. In such an -// enhanced Go runtime, to force this behavior, use the CGO_ENABLED=0 -// environment variable and, for now, a build tag: +// variant syscall.AllThreadsSyscall() API (it debuted in go1.16 see +// https://github.com/golang/go/issues/1435 for its history) then +// the "psx" package will use that to invoke Capability setting system +// calls in pure Go binaries. In such an enhanced Go runtime, to force +// this behavior, use the CGO_ENABLED=0 environment variable. // -// CGO_ENABLED=0 go build -tags allthreadssyscall ... // -// -// Copyright (c) 2019,20 Andrew G. Morgan <morgan@kernel.org> +// Copyright (c) 2019-21 Andrew G. Morgan <morgan@kernel.org> // // The cap and psx packages are licensed with a (you choose) BSD // 3-clause or GPL2. See LICENSE file for details. diff --git a/cap/convenience.go b/cap/convenience.go index f094e52..9580903 100644 --- a/cap/convenience.go +++ b/cap/convenience.go @@ -36,6 +36,7 @@ const ( prSetKeepCaps = 8 prGetSecureBits = 27 prSetSecureBits = 28 + prSetNoNewPrivs = 38 ) // GetSecbits returns the current setting of the process' Secbits. @@ -163,6 +164,9 @@ func (sc *syscaller) setMode(m Mode) error { } w.ClearFlag(Permitted) + // For good measure. + sc.prctlwcall6(prSetNoNewPrivs, 1, 0, 0, 0, 0) + return nil } @@ -2,4 +2,4 @@ module kernel.org/pub/linux/libs/security/libcap/cap go 1.11 -require kernel.org/pub/linux/libs/security/libcap/psx v0.2.45 +require kernel.org/pub/linux/libs/security/libcap/psx v0.2.47 diff --git a/cap/launch.go b/cap/launch.go index 2f37a2a..4ae449c 100644 --- a/cap/launch.go +++ b/cap/launch.go @@ -106,18 +106,18 @@ var ErrLaunchFailed = errors.New("launch failed") var ErrNoLaunch = errors.New("launch not supported") // ErrAmbiguousChroot indicates that the Launcher is being used in -// addition to callback supplied Chroot. The former should be used +// addition to a callback supplied Chroot. The former should be used // exclusively for this. var ErrAmbiguousChroot = errors.New("use Launcher for chroot") // ErrAmbiguousIDs indicates that the Launcher is being used in -// addition to callback supplied Credentials. The former should be +// addition to a callback supplied Credentials. The former should be // used exclusively for this. var ErrAmbiguousIDs = errors.New("use Launcher for uids and gids") // ErrAmbiguousAmbient indicates that the Launcher is being used in -// addition callback supplied ambient set and the former should be -// used exclusively in a Launch call. +// addition to a callback supplied ambient set and the former should +// be used exclusively in a Launch call. var ErrAmbiguousAmbient = errors.New("use Launcher for ambient caps") // lName is the name we temporarily give to the launcher thread. Note, diff --git a/cap/syscalls.go b/cap/syscalls.go index 9c5dd79..ab4bcef 100644 --- a/cap/syscalls.go +++ b/cap/syscalls.go @@ -1,14 +1,18 @@ -// +build linux,allthreadssyscall,!cgo - package cap -import "syscall" +import ( + "syscall" + + "kernel.org/pub/linux/libs/security/libcap/psx" +) // multisc provides syscalls overridable for testing purposes that // support a single kernel security state for all OS threads. +// We use this version when we are cgo compiling because +// we need to manage the native C pthreads too. var multisc = &syscaller{ - w3: syscall.AllThreadsSyscall, - w6: syscall.AllThreadsSyscall6, + w3: psx.Syscall3, + w6: psx.Syscall6, r3: syscall.RawSyscall, r6: syscall.RawSyscall6, } diff --git a/cap/syscalls_cgo.go b/cap/syscalls_cgo.go deleted file mode 100644 index 0dc6a0c..0000000 --- a/cap/syscalls_cgo.go +++ /dev/null @@ -1,29 +0,0 @@ -// +build linux,cgo - -package cap - -import ( - "syscall" - - "kernel.org/pub/linux/libs/security/libcap/psx" -) - -// multisc provides syscalls overridable for testing purposes that -// support a single kernel security state for all OS threads. -// We use this version when we are cgo compiling because -// we need to manage the native C pthreads too. -var multisc = &syscaller{ - w3: psx.Syscall3, - w6: psx.Syscall6, - r3: syscall.RawSyscall, - r6: syscall.RawSyscall6, -} - -// singlesc provides a single threaded implementation. Users should -// take care to ensure the thread is locked and marked nogc. -var singlesc = &syscaller{ - w3: syscall.RawSyscall, - w6: syscall.RawSyscall6, - r3: syscall.RawSyscall, - r6: syscall.RawSyscall6, -} diff --git a/cap/text.go b/cap/text.go index 11dae90..cf11a2d 100644 --- a/cap/text.go +++ b/cap/text.go @@ -187,7 +187,7 @@ var ErrBadText = errors.New("bad text") // // "=p all+ei" "all=pie" "=pi all+e" "=eip" // -// "cap_chown=p cap_setuid=i" "cap_chown=ip-p" "cap_chown=i" +// "cap_setuid=p cap_chown=i" "cap_chown=ip-p" "cap_chown=i" // // "cap_chown=-p" "all=" "cap_setuid=pie-pie" "=" // diff --git a/contrib/golang/.gitignore b/contrib/golang/.gitignore deleted file mode 100644 index 6b07a71..0000000 --- a/contrib/golang/.gitignore +++ /dev/null @@ -1,4 +0,0 @@ -posix -posix-cgo -posix-cgo.go -posix.go diff --git a/contrib/golang/Makefile b/contrib/golang/Makefile deleted file mode 100644 index 4c497cf..0000000 --- a/contrib/golang/Makefile +++ /dev/null @@ -1,15 +0,0 @@ -GO := go - -all: posix.go posix-cgo.go - CGO_ENABLED=0 $(GO) build posix.go - $(GO) build posix-cgo.go - -posix.go: ptest.go posix.stub_go Makefile - sed -e '/\/\/ main_here/ r posix.stub_go' ptest.go > $@ - -posix-cgo.go: ptest.go posix-cgo.stub_go Makefile - sed -e '/\/\/ main_here/ r posix-cgo.stub_go' ptest.go > $@ - -clean: - rm -f posix.go posix - rm -f posix-cgo.go posix-cgo diff --git a/contrib/golang/README b/contrib/golang/README deleted file mode 100644 index 9c55ad5..0000000 --- a/contrib/golang/README +++ /dev/null @@ -1,23 +0,0 @@ -This directory contains some test code for system calls that need -POSIX semantics to work under Go. There are 9 system calls wrapped in -a nptl:setxid mechanism in glibc, and the following development patch -adds support for these 9 to native Go. - -https://go-review.googlesource.com/c/go/+/210639/ - -The Go support works with or without CGO_ENABLED. - -With a patched Go runtime library: - - make - sudo ./posix - sudo ./posix-cgo - -should validate that all is working as intended. - -The above Go patch also exposes the mechanism that achieves this in -the Go runtime, to ensure that the native Go "libcap/cap" package can -work with and without CGO_ENABLED. - -Andrew G. Morgan <morgan@kernel.org> -2019-12-10 diff --git a/contrib/golang/posix-cgo.stub_go b/contrib/golang/posix-cgo.stub_go deleted file mode 100644 index 2878b1e..0000000 --- a/contrib/golang/posix-cgo.stub_go +++ /dev/null @@ -1,52 +0,0 @@ - -// #include <stdio.h> -// #include <stdlib.h> -// #include <pthread.h> -// #include <unistd.h> -// #include <sys/types.h> -// -// pthread_t *t = NULL; -// pthread_mutex_t mu; -// int nts = 0; -// int all_done = 0; -// -// static void *aFn(void *vargp) { -// int done = 0; -// while (!done) { -// usleep(100); -// pthread_mutex_lock(&mu); -// done = all_done; -// pthread_mutex_unlock(&mu); -// } -// printf("tid=%d done\n", pthread_self()); -// return NULL; -// } -// -// void trial(int argc) { -// nts = argc; -// t = calloc(nts, sizeof(pthread_t)); -// pthread_mutex_init(&mu, NULL); -// for (int i = 0; i < nts; i++) { -// printf("launch C-pthread [%d]\n", i); -// pthread_create(&t[i], NULL, aFn, NULL); -// } -// } -// -// void cleanup(void) { -// pthread_mutex_lock(&mu); -// all_done = 1; -// pthread_mutex_unlock(&mu); -// for (int i = 0; i < nts; i++) { -// printf("join C-pthread [%d]\n", i); -// pthread_join(t[i], NULL); -// } -// pthread_mutex_destroy(&mu); -// } -import "C" - -func main() { - const cts = 3 - C.trial(cts) - defer C.cleanup() - ptest() -} diff --git a/contrib/golang/posix.stub_go b/contrib/golang/posix.stub_go deleted file mode 100644 index d76c469..0000000 --- a/contrib/golang/posix.stub_go +++ /dev/null @@ -1,5 +0,0 @@ - -func main() { - log.Print("Running pure Go test") - ptest() -} diff --git a/contrib/golang/ptest.go b/contrib/golang/ptest.go deleted file mode 100644 index c8d7d71..0000000 --- a/contrib/golang/ptest.go +++ /dev/null @@ -1,140 +0,0 @@ -// Program posix is a test case to confirm that Go is capable of -// exhibiting posix semantics for system calls. -// -// This code is a template for two programs: posix.go and posix-cgo.go -// which are built by the Makefile to using sed. -package main - -import ( - "fmt" - "io/ioutil" - "log" - "os" - "strings" - "syscall" -) - -// main_here - -func dumpStatus(testCase string, err error, filter, expect string) bool { - fmt.Printf("%s [%v]:\n", testCase, err) - var failed bool - pid := syscall.Getpid() - fs, err := ioutil.ReadDir(fmt.Sprintf("/proc/%d/task", pid)) - if err != nil { - log.Fatal(err) - } - for _, f := range fs { - tf := fmt.Sprintf("/proc/%s/status", f.Name()) - d, err := ioutil.ReadFile(tf) - if err != nil { - fmt.Println(tf, err) - failed = true - continue - } - lines := strings.Split(string(d), "\n") - for _, line := range lines { - if strings.HasPrefix(line, filter) { - fails := line != expect - failure := "" - if fails { - failed = fails - failure = " (bad)" - } - fmt.Printf("%s %s%s\n", tf, line, failure) - break - } - } - } - return failed -} - -func ptest() { - var err error - var bad bool - - // egid setting - bad = bad || dumpStatus("initial state", nil, "Gid:", "Gid:\t0\t0\t0\t0") - err = syscall.Setegid(1001) - bad = bad || dumpStatus("setegid(1001) state", err, "Gid:", "Gid:\t0\t1001\t0\t1001") - err = syscall.Setegid(1002) - bad = bad || dumpStatus("setegid(1002) state", err, "Gid:", "Gid:\t0\t1002\t0\t1002") - err = syscall.Setegid(0) - bad = bad || dumpStatus("setegid(0) state", err, "Gid:", "Gid:\t0\t0\t0\t0") - - // euid setting (no way back from this one) - bad = bad || dumpStatus("initial euid", nil, "Uid:", "Uid:\t0\t0\t0\t0") - err = syscall.Seteuid(1) - bad = bad || dumpStatus("seteuid(1)", err, "Uid:", "Uid:\t0\t1\t0\t1") - err = syscall.Seteuid(0) - bad = bad || dumpStatus("seteuid(0)", err, "Uid:", "Uid:\t0\t0\t0\t0") - - // gid setting - bad = bad || dumpStatus("initial state", nil, "Gid:", "Gid:\t0\t0\t0\t0") - err = syscall.Setgid(1001) - bad = bad || dumpStatus("setgid(1001) state", err, "Gid:", "Gid:\t1001\t1001\t1001\t1001") - err = syscall.Setgid(1002) - bad = bad || dumpStatus("setgid(1002) state", err, "Gid:", "Gid:\t1002\t1002\t1002\t1002") - err = syscall.Setgid(0) - bad = bad || dumpStatus("setgid(0) state", err, "Gid:", "Gid:\t0\t0\t0\t0") - - // groups setting - bad = bad || dumpStatus("initial groups", nil, "Groups:", "Groups:\t0 ") - err = syscall.Setgroups([]int{0, 1, 2, 3}) - bad = bad || dumpStatus("setgroups(0,1,2,3)", err, "Groups:", "Groups:\t0 1 2 3 ") - err = syscall.Setgroups([]int{3, 2, 1}) - bad = bad || dumpStatus("setgroups(2,3,1)", err, "Groups:", "Groups:\t1 2 3 ") - err = syscall.Setgroups(nil) - bad = bad || dumpStatus("setgroups(nil)", err, "Groups:", "Groups:\t ") - err = syscall.Setgroups([]int{0}) - bad = bad || dumpStatus("setgroups(0)", err, "Groups:", "Groups:\t0 ") - - // regid setting - bad = bad || dumpStatus("initial state", nil, "Gid:", "Gid:\t0\t0\t0\t0") - err = syscall.Setregid(1001, 0) - bad = bad || dumpStatus("setregid(1001) state", err, "Gid:", "Gid:\t1001\t0\t0\t0") - err = syscall.Setregid(0, 1002) - bad = bad || dumpStatus("setregid(1002) state", err, "Gid:", "Gid:\t0\t1002\t1002\t1002") - err = syscall.Setregid(0, 0) - bad = bad || dumpStatus("setregid(0) state", err, "Gid:", "Gid:\t0\t0\t0\t0") - - // reuid setting - bad = bad || dumpStatus("initial state", nil, "Uid:", "Uid:\t0\t0\t0\t0") - err = syscall.Setreuid(1, 0) - bad = bad || dumpStatus("setreuid(1,0) state", err, "Uid:", "Uid:\t1\t0\t0\t0") - err = syscall.Setreuid(0, 2) - bad = bad || dumpStatus("setreuid(0,2) state", err, "Uid:", "Uid:\t0\t2\t2\t2") - err = syscall.Setreuid(0, 0) - bad = bad || dumpStatus("setreuid(0) state", err, "Uid:", "Uid:\t0\t0\t0\t0") - - // resgid setting - bad = bad || dumpStatus("initial state", nil, "Gid:", "Gid:\t0\t0\t0\t0") - err = syscall.Setresgid(1, 0, 2) - bad = bad || dumpStatus("setresgid(1,0,2) state", err, "Gid:", "Gid:\t1\t0\t2\t0") - err = syscall.Setresgid(0, 2, 1) - bad = bad || dumpStatus("setresgid(0,2,1) state", err, "Gid:", "Gid:\t0\t2\t1\t2") - err = syscall.Setresgid(0, 0, 0) - bad = bad || dumpStatus("setresgid(0) state", err, "Gid:", "Gid:\t0\t0\t0\t0") - - // resuid setting - bad = bad || dumpStatus("initial state", nil, "Uid:", "Uid:\t0\t0\t0\t0") - err = syscall.Setresuid(1, 0, 2) - bad = bad || dumpStatus("setresuid(1,0,2) state", err, "Uid:", "Uid:\t1\t0\t2\t0") - err = syscall.Setresuid(0, 2, 1) - bad = bad || dumpStatus("setresuid(0,2,1) state", err, "Uid:", "Uid:\t0\t2\t1\t2") - err = syscall.Setresuid(0, 0, 0) - bad = bad || dumpStatus("setresuid(0) state", err, "Uid:", "Uid:\t0\t0\t0\t0") - - // uid setting (no way back from this one) - bad = bad || dumpStatus("initial uid", nil, "Uid:", "Uid:\t0\t0\t0\t0") - err = syscall.Setuid(1) - bad = bad || dumpStatus("setuid(1)", err, "Uid:", "Uid:\t1\t1\t1\t1") - err = syscall.Setuid(0) - bad = bad || dumpStatus("setuid(0)", err, "Uid:", "Uid:\t1\t1\t1\t1") - - if bad { - log.Print("TEST FAILED") - os.Exit(1) - } - log.Print("TEST PASSED") -} diff --git a/contrib/seccomp/explore.go b/contrib/seccomp/explore.go new file mode 100644 index 0000000..b8249e0 --- /dev/null +++ b/contrib/seccomp/explore.go @@ -0,0 +1,276 @@ +// Program explore is evolved from the code discussed in more depth +// here: +// +// https://github.com/golang/go/issues/3405 +// +// The code here demonstrates that while PR_SET_NO_NEW_PRIVS only +// applies to the calling thread, since +// https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=103502a35cfce0710909da874f092cb44823ca03 +// the seccomp filter application forces the setting to be mirrored on +// all the threads of a process. +// +// Based on the command line options, we can manipulate the program to +// behave in various ways. Example command lines: +// +// sudo ./explore +// sudo ./explore --kill=false +// sudo ./explore --kill=false --errno=0 +// +// Supported Go toolchains are after go1.10. Those prior to go1.15 +// require this environment variable to be set to build successfully: +// +// export CGO_LDFLAGS_ALLOW="-Wl,-?-wrap[=,][^-.@][^,]*" +// +// Go toolchains go1.16+ can be compiled CGO_ENABLED=0 too, +// demonstrating native nocgo support for seccomp features. +package main + +import ( + "flag" + "fmt" + "log" + "runtime" + "syscall" + "time" + "unsafe" + + "kernel.org/pub/linux/libs/security/libcap/psx" +) + +var ( + withPSX = flag.Bool("psx", false, "use the psx mechanism to invoke prctl syscall") + delays = flag.Bool("delays", false, "use this to pause the program at various places") + kill = flag.Bool("kill", true, "kill the process if setuid attempted") + errno = flag.Int("errno", int(syscall.ENOTSUP), "if kill is false, block syscall and return this errno") +) + +const ( + PR_SET_NO_NEW_PRIVS = 38 + + SYS_SECCOMP = 317 // x86_64 syscall number + SECCOMP_SET_MODE_FILTER = 1 // uses user-supplied filter. + SECCOMP_FILTER_FLAG_TSYNC = (1 << 0) // mirror filtering on all threads. + SECCOMP_RET_ERRNO = 0x00050000 // returns an errno + SECCOMP_RET_DATA = 0x0000ffff // mask for RET data payload (ex. errno) + SECCOMP_RET_KILL_PROCESS = 0x80000000 // kill the whole process immediately + SECCOMP_RET_TRAP = 0x00030000 // disallow and force a SIGSYS + SECCOMP_RET_ALLOW = 0x7fff0000 + + BPF_LD = 0x00 + BPF_JMP = 0x05 + BPF_RET = 0x06 + + BPF_W = 0x00 + + BPF_ABS = 0x20 + BPF_JEQ = 0x10 + + BPF_K = 0x00 + + AUDIT_ARCH_X86_64 = 3221225534 // HACK: I don't understand this value + ARCH_NR = AUDIT_ARCH_X86_64 + + syscall_nr = 0 +) + +// SockFilter is a single filter block. +type SockFilter struct { + // Code is the filter code instruction. + Code uint16 + // Jt is the target for a true result from the code execution. + Jt uint8 + // Jf is the target for a false result from the code execution. + Jf uint8 + // K is a generic multiuse field + K uint32 +} + +// SockFProg is a +type SockFProg struct { + // Len is the number of contiguous SockFilter blocks that can + // be found at *Filter. + Len uint16 + // Filter is the address of the first SockFilter block of a + // program sequence. + Filter *SockFilter +} + +type SockFilterSlice []SockFilter + +func BPF_STMT(code uint16, k uint32) SockFilter { + return SockFilter{code, 0, 0, k} +} + +func BPF_JUMP(code uint16, k uint32, jt uint8, jf uint8) SockFilter { + return SockFilter{code, jt, jf, k} +} + +func ValidateArchitecture() []SockFilter { + return []SockFilter{ + BPF_STMT(BPF_LD+BPF_W+BPF_ABS, 4), // HACK: I don't understand this 4. + BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, ARCH_NR, 1, 0), + BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_KILL_PROCESS), + } +} + +func ExamineSyscall() []SockFilter { + return []SockFilter{ + BPF_STMT(BPF_LD+BPF_W+BPF_ABS, syscall_nr), + } +} + +func AllowSyscall(syscallNum uint32) []SockFilter { + return []SockFilter{ + BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, syscallNum, 0, 1), + BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW), + } +} + +func DisallowSyscall(syscallNum, errno uint32) []SockFilter { + return []SockFilter{ + BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, syscallNum, 0, 1), + BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ERRNO|(errno&SECCOMP_RET_DATA)), + } +} + +func KillProcess() []SockFilter { + return []SockFilter{ + BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_KILL_PROCESS), + } +} + +func NotifyProcessAndDie() []SockFilter { + return []SockFilter{ + BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_TRAP), + } +} + +func TrapOnSyscall(syscallNum uint32) []SockFilter { + return []SockFilter{ + BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, syscallNum, 0, 1), + BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_TRAP), + } +} + +func AllGood() []SockFilter { + return []SockFilter{ + BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW), + } +} + +// prctl executes the prctl - unless the --psx commandline argument is +// used, this is on a single thread. +//go:uintptrescapes +func prctl(option, arg1, arg2, arg3, arg4, arg5 uintptr) error { + var e syscall.Errno + if *withPSX { + _, _, e = psx.Syscall6(syscall.SYS_PRCTL, option, arg1, arg2, arg3, arg4, arg5) + } else { + _, _, e = syscall.RawSyscall6(syscall.SYS_PRCTL, option, arg1, arg2, arg3, arg4, arg5) + } + if e != 0 { + return e + } + if *delays { + fmt.Println("prctl'd - check now") + time.Sleep(1 * time.Minute) + } + return nil +} + +// seccomp_set_mode_filter is our wrapper for performing our seccomp system call. +//go:uintptrescapes +func seccomp_set_mode_filter(prog *SockFProg) error { + if _, _, e := syscall.RawSyscall(SYS_SECCOMP, SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, uintptr(unsafe.Pointer(prog))); e != 0 { + return e + } + return nil +} + +var empty func() + +func lockProcessThread(pick bool) { + // Make sure we are + pid := uintptr(syscall.Getpid()) + runtime.LockOSThread() + for { + tid, _, _ := syscall.RawSyscall(syscall.SYS_GETTID, 0, 0, 0) + if (tid == pid) == pick { + fmt.Println("validated TID:", tid, "== PID:", pid, "is", pick) + break + } + runtime.UnlockOSThread() + go func() { + time.Sleep(1 * time.Microsecond) + }() + runtime.Gosched() + runtime.LockOSThread() + } +} + +// applyPolicy uploads the program sequence. +func applyPolicy(prog *SockFProg) { + // Without PSX we can't guarantee the thread we execute the + // seccomp call on will be the same one that we disabled new + // privs on. With PSX, the disabling of new privs is mirrored + // on all threads. + if !*withPSX { + lockProcessThread(false) + defer runtime.UnlockOSThread() + } + + // This is required to load a filter without privilege. + if err := prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0, 0); err != nil { + log.Fatalf("Prctl(PR_SET_NO_NEW_PRIVS): %v", err) + } + + fmt.Println("Applying syscall policy...") + if err := seccomp_set_mode_filter(prog); err != nil { + log.Fatalf("seccomp_set_mode_filter: %v", err) + } + fmt.Println("...Policy applied") +} + +func main() { + flag.Parse() + + if *delays { + fmt.Println("check first", syscall.Getpid()) + time.Sleep(60 * time.Second) + } + + var filter []SockFilter + filter = append(filter, ValidateArchitecture()...) + + // Grab the system call number. + filter = append(filter, ExamineSyscall()...) + + // List disallowed syscalls. + for _, x := range []uint32{ + syscall.SYS_SETUID, + } { + if *kill { + filter = append(filter, TrapOnSyscall(x)...) + } else { + filter = append(filter, DisallowSyscall(x, uint32(*errno))...) + } + } + + filter = append(filter, AllGood()...) + + prog := &SockFProg{ + Len: uint16(len(filter)), + Filter: &filter[0], + } + + applyPolicy(prog) + + // Ensure we are running on the TID=PID. + lockProcessThread(true) + + log.Print("Now it is time to try to run something privileged...") + if _, _, e := syscall.RawSyscall(syscall.SYS_SETUID, 1, 0, 0); e != 0 { + log.Fatalf("setuid failed with an error: %v", e) + } + log.Print("Looked like that worked, but it really didn't: uid == ", syscall.Getuid(), " != 1") +} diff --git a/contrib/seccomp/go.mod b/contrib/seccomp/go.mod new file mode 100644 index 0000000..d9efa2e --- /dev/null +++ b/contrib/seccomp/go.mod @@ -0,0 +1,5 @@ +module explore + +go 1.14 + +require kernel.org/pub/linux/libs/security/libcap/psx v0.2.47 diff --git a/go/.gitignore b/go/.gitignore index 322297c..30ae0b6 100644 --- a/go/.gitignore +++ b/go/.gitignore @@ -2,8 +2,12 @@ good-names.go compare-cap try-launching try-launching-cgo +psx-signals +b210613 mknames web +setid +gowns ok pkg src diff --git a/go/Makefile b/go/Makefile index eee379e..b8745f1 100644 --- a/go/Makefile +++ b/go/Makefile @@ -18,7 +18,7 @@ CAPGOPACKAGE=$(PKGDIR)/cap.a DEPS=../libcap/libcap.a ../libcap/libpsx.a -all: $(PSXGOPACKAGE) $(CAPGOPACKAGE) web compare-cap try-launching +all: $(PSXGOPACKAGE) $(CAPGOPACKAGE) web setid gowns compare-cap try-launching psx-signals $(DEPS): make -C ../libcap all @@ -54,28 +54,51 @@ compare-cap: compare-cap.go $(CAPGOPACKAGE) GO111MODULE=off CGO_LDFLAGS_ALLOW="$(CGO_LDFLAGS_ALLOW)" CGO_CFLAGS="$(CGO_CFLAGS)" CGO_LDFLAGS="$(CGO_LDFLAGS)" GOPATH=$(GOPATH) $(GO) build $< web: ../goapps/web/web.go $(CAPGOPACKAGE) - GO111MODULE=off CGO_ENABLED="$(CGO_REQUIRED)" CGO_LDFLAGS_ALLOW="$(CGO_LDFLAGS_ALLOW)" GOPATH=$(GOPATH) $(GO) build -o $@ $(GOBUILDTAG) $< + GO111MODULE=off CGO_ENABLED="$(CGO_REQUIRED)" CGO_LDFLAGS_ALLOW="$(CGO_LDFLAGS_ALLOW)" GOPATH=$(GOPATH) $(GO) build -o $@ $< ifeq ($(RAISE_GO_FILECAP),yes) make -C ../progs setcap sudo ../progs/setcap cap_setpcap,cap_net_bind_service=p web @echo "NOTE: RAISED cap_setpcap,cap_net_bind_service ON web binary" endif +setid: ../goapps/setid/setid.go $(CAPGOPACKAGE) $(PSXGOPACKAGE) + GO111MODULE=off CGO_ENABLED="$(CGO_REQUIRED)" CGO_LDFLAGS_ALLOW="$(CGO_LDFLAGS_ALLOW)" GOPATH=$(GOPATH) $(GO) build -o $@ $< + +gowns: ../goapps/gowns/gowns.go $(CAPGOPACKAGE) + GO111MODULE=off CGO_ENABLED="$(CGO_REQUIRED)" CGO_LDFLAGS_ALLOW="$(CGO_LDFLAGS_ALLOW)" GOPATH=$(GOPATH) $(GO) build -o $@ $< + ok: ok.go GO111MODULE=off CGO_ENABLED=0 GOPATH=$(GOPATH) $(GO) build $< try-launching: try-launching.go $(CAPGOPACKAGE) ok - GO111MODULE=off CGO_ENABLED="$(CGO_REQUIRED)" CGO_LDFLAGS_ALLOW="$(CGO_LDFLAGS_ALLOW)" GOPATH=$(GOPATH) $(GO) build $(GOBUILDTAG) $< + GO111MODULE=off CGO_ENABLED="$(CGO_REQUIRED)" CGO_LDFLAGS_ALLOW="$(CGO_LDFLAGS_ALLOW)" GOPATH=$(GOPATH) $(GO) build $< ifeq ($(CGO_REQUIRED),0) GO111MODULE=off CGO_ENABLED="1" CGO_LDFLAGS_ALLOW="$(CGO_LDFLAGS_ALLOW)" GOPATH=$(GOPATH) $(GO) build -o $@-cgo $< endif +# Bug reported issues: +# https://bugzilla.kernel.org/show_bug.cgi?id=210533 (cgo - fixed) +# https://github.com/golang/go/issues/43149 (nocgo - not fixed yet) +# When the latter is fixed we can replace CGO_ENABLED=1 with ="$(CGO_REQUIRED)" +psx-signals: psx-signals.go $(PSXGOPACKAGE) + GO111MODULE=off CGO_ENABLED=1 CGO_LDFLAGS_ALLOW="$(CGO_LDFLAGS_ALLOW)" CGO_CFLAGS="$(CGO_CFLAGS)" CGO_LDFLAGS="$(CGO_LDFLAGS)" GOPATH=$(GOPATH) $(GO) build $< + +b210613: b210613.go $(CAPGOPACKAGE) + GO111MODULE=off CGO_ENABLED="$(CGO_REQUIRED)" CGO_LDFLAGS_ALLOW="$(CGO_LDFLAGS_ALLOW)" CGO_CFLAGS="$(CGO_CFLAGS)" CGO_LDFLAGS="$(CGO_LDFLAGS)" GOPATH=$(GOPATH) $(GO) build $< + test: all - GO111MODULE=off CGO_LDFLAGS_ALLOW="$(CGO_LDFLAGS_ALLOW)" GOPATH="$(GOPATH)" $(GO) test $(IMPORTDIR)/psx - GO111MODULE=off CGO_LDFLAGS_ALLOW="$(CGO_LDFLAGS_ALLOW)" GOPATH="$(GOPATH)" $(GO) test $(IMPORTDIR)/cap + GO111MODULE=off CGO_ENABLED="$(CGO_REQUIRED)" CGO_LDFLAGS_ALLOW="$(CGO_LDFLAGS_ALLOW)" GOPATH="$(GOPATH)" $(GO) test $(IMPORTDIR)/psx + GO111MODULE=off CGO_ENABLED="$(CGO_REQUIRED)" CGO_LDFLAGS_ALLOW="$(CGO_LDFLAGS_ALLOW)" GOPATH="$(GOPATH)" $(GO) test $(IMPORTDIR)/cap LD_LIBRARY_PATH=../libcap ./compare-cap - -sudotest: test ../progs/tcapsh-static + ./psx-signals + ./setid --caps=false + ./gowns -- -c "echo gowns runs" + +# Note, the user namespace doesn't require sudo, but I wanted to avoid +# requiring that the hosting kernel supports user namespaces for the +# regular test case. +sudotest: test ../progs/tcapsh-static b210613 + ./gowns --ns -- -c "echo gowns runs with user namespace" ./try-launching ifeq ($(CGO_REQUIRED),0) ./try-launching-cgo @@ -84,18 +107,20 @@ endif ifeq ($(CGO_REQUIRED),0) sudo ./try-launching-cgo endif + sudo ../progs/tcapsh-static --cap-uid=$$(id -u) --caps="cap_setpcap=ep" --iab="^cap_setpcap" -- -c ./b210613 install: all rm -rf $(FAKEROOT)$(GOPKGDIR)/$(IMPORTDIR)/psx - mkdir -p $(FAKEROOT)$(GOPKGDIR)/$(IMPORTDIR)/psx/include/sys - for x in src/$(IMPORTDIR)/psx/* ; do if [ -d $$x ]; then continue; fi; install -m 0644 $$x $(FAKEROOT)$(GOPKGDIR)/$(IMPORTDIR)/psx; done - install -m 0644 src/$(IMPORTDIR)/psx/include/sys/psx_syscall.h $(FAKEROOT)$(GOPKGDIR)/$(IMPORTDIR)/psx/include/sys/psx_syscall.h + mkdir -p $(FAKEROOT)$(GOPKGDIR)/$(IMPORTDIR)/psx + install -m 0644 src/$(IMPORTDIR)/psx/* $(FAKEROOT)$(GOPKGDIR)/$(IMPORTDIR)/psx mkdir -p $(FAKEROOT)$(GOPKGDIR)/$(IMPORTDIR)/cap rm -rf $(FAKEROOT)$(GOPKGDIR)/$(IMPORTDIR)/cap/* install -m 0644 src/$(IMPORTDIR)/cap/* $(FAKEROOT)$(GOPKGDIR)/$(IMPORTDIR)/cap clean: - rm -f *.o *.so *~ mknames web ok good-names.go + rm -f *.o *.so *~ mknames ok good-names.go + rm -f web setid gowns rm -f compare-cap try-launching try-launching-cgo rm -f $(topdir)/cap/*~ $(topdir)/psx/*~ + rm -f psx-signals b210613 rm -fr pkg src diff --git a/go/b210613.go b/go/b210613.go new file mode 100644 index 0000000..2bced06 --- /dev/null +++ b/go/b210613.go @@ -0,0 +1,21 @@ +// Program b210613 reproduces the code reported in: +// +// https://bugzilla.kernel.org/show_bug.cgi?id=210613 +// +// This file is evolved directly from the reproducer attached to that +// bug report originally authored by Lorenz Bauer. +package main + +import ( + "fmt" + "log" + + "kernel.org/pub/linux/libs/security/libcap/cap" +) + +func main() { + if err := cap.ModeNoPriv.Set(); err != nil { + log.Fatalf("error dropping privilege: %v", err) + } + fmt.Println("b210613: PASSED") +} diff --git a/go/psx-signals.go b/go/psx-signals.go new file mode 100644 index 0000000..486f284 --- /dev/null +++ b/go/psx-signals.go @@ -0,0 +1,46 @@ +// Program psx-signals validates that the psx mechanism can coexist +// with Go use of signals. This is an unprivilaged program derived +// from the sample code provided in this bug report: +// +// https://bugzilla.kernel.org/show_bug.cgi?id=210533 +package main + +import ( + "fmt" + "log" + "os" + "os/signal" + "syscall" + "time" + + "kernel.org/pub/linux/libs/security/libcap/psx" +) + +const maxSig = 10 +const prSetKeepCaps = 8 + +func main() { + sig := make(chan os.Signal, maxSig) + signal.Notify(sig, os.Interrupt) + + fmt.Print("Toggling KEEP_CAPS ") + for i := 0; i < maxSig; i++ { + fmt.Print(".") + _, _, err := psx.Syscall3(syscall.SYS_PRCTL, prSetKeepCaps, uintptr(i&1), 0) + if err != 0 { + log.Fatalf("[%d] attempt to set KEEPCAPS (to %d) failed: %v", i, i%2, err) + } + } + + fmt.Println(" done") + fmt.Print("Wait 1 second to see if unwanted signals arrive...") + // Confirm no signals are delivered. + select { + case <-time.After(1 * time.Second): + break + case info := <-sig: + log.Fatalf("signal received: %v", info) + } + fmt.Println(" none arrived") + fmt.Println("PASSED") +} diff --git a/go/try-launching.go b/go/try-launching.go index 272fd0a..9f20e6b 100644 --- a/go/try-launching.go +++ b/go/try-launching.go @@ -28,6 +28,7 @@ func tryLaunching() { iab string uid int gid int + mode cap.Mode groups []int }{ {args: []string{root + "/go/ok"}}, @@ -44,6 +45,11 @@ func tryLaunching() { chroot: root + "/go", fail: syscall.Getuid() != 0, }, + { + args: []string{root + "/progs/tcapsh-static", "--inmode=NOPRIV", "--has-no-new-privs"}, + mode: cap.ModeNoPriv, + fail: syscall.Getuid() != 0, + }, } ps := make([]int, len(vs)) @@ -61,6 +67,9 @@ func tryLaunching() { if v.gid != 0 { e.SetGroups(v.gid, v.groups) } + if v.mode != 0 { + e.SetMode(v.mode) + } if v.iab != "" { if iab, err := cap.IABFromText(v.iab); err != nil { log.Fatalf("failed to parse iab=%q: %v", v.iab, err) @@ -68,6 +77,7 @@ func tryLaunching() { e.SetIAB(iab) } } + log.Printf("[%d] trying: %q\n", i, v.args) if ps[i], err = e.Launch(nil); err != nil { if v.fail { continue diff --git a/goapps/gowns/go.mod b/goapps/gowns/go.mod new file mode 100644 index 0000000..3863fbb --- /dev/null +++ b/goapps/gowns/go.mod @@ -0,0 +1,5 @@ +module gowns + +go 1.15 + +require kernel.org/pub/linux/libs/security/libcap/cap v0.2.47 diff --git a/goapps/gowns/gowns.go b/goapps/gowns/gowns.go new file mode 100644 index 0000000..b9a14cd --- /dev/null +++ b/goapps/gowns/gowns.go @@ -0,0 +1,249 @@ +// Program gowns is a small program to explore and demonstrate using +// Go to Wrap a child in a NameSpace under Linux. +package main + +import ( + "errors" + "flag" + "fmt" + "log" + "os" + "strings" + "syscall" + + "kernel.org/pub/linux/libs/security/libcap/cap" +) + +// nsDetail is how we summarize the type of namespace we want to +// enter. +type nsDetail struct { + // uid holds the uid for the base user in this namespace (defaults to getuid). + uid int + + // uidMap holds the namespace mapping of uid values. + uidMap []syscall.SysProcIDMap + + // gid holds the gid for the base user in this namespace (defaults to getgid). + gid int + + // uidMap holds the namespace mapping of gid values. + gidMap []syscall.SysProcIDMap +} + +var ( + baseID = flag.Int("base", -1, "base id for uids and gids (-1 = invoker's uid)") + uid = flag.Int("uid", -1, "uid of the hosting user") + gid = flag.Int("gid", -1, "gid of the hosting user") + iab = flag.String("iab", "", "IAB string for inheritable capabilities") + mode = flag.String("mode", "", "force a libcap mode (capsh --modes for list)") + + ns = flag.Bool("ns", false, "enable user namespace features") + uids = flag.String("uids", "", "comma separated UID ranges to map contiguously (req. CAP_SETUID)") + gids = flag.String("gids", "", "comma separated GID ranges to map contiguously (req. CAP_SETGID)") + + shell = flag.String("shell", "/bin/bash", "shell to be launched") + debug = flag.Bool("verbose", false, "more verbose output") +) + +// r holds a base and count for a contiguous range. +type r struct { + base, count int +} + +// ranges unpacks numerical ranges. +func ranges(s string) []r { + if s == "" { + return nil + } + var rs []r + for _, n := range strings.Split(s, ",") { + var base, upper int + if _, err := fmt.Sscanf(n, "%d-%d", &base, &upper); err == nil { + if upper < base { + log.Fatalf("invalid range: [%d-%d]", base, upper) + } + rs = append(rs, r{ + base: base, + count: 1 + upper - base, + }) + } else if _, err := fmt.Sscanf(n, "%d", &base); err == nil { + rs = append(rs, r{ + base: base, + count: 1, + }) + } else { + log.Fatalf("unable to parse range [%s]", n) + } + } + return rs +} + +// restart launches the program again with the remaining arguments. +func restart() { + log.Fatalf("failed to restart: flags: %q %q", os.Args[0], flag.Args()[1:]) +} + +// errUnableToSetup is how nsSetup fails. +var errUnableToSetup = errors.New("data was not in supported format") + +// nsSetup is the callback used to enter the namespace for the user +// via callback in the cap.Launcher mechanism. +func nsSetup(pa *syscall.ProcAttr, data interface{}) error { + nsD, ok := data.(nsDetail) + if !ok { + return errUnableToSetup + } + + if pa.Sys == nil { + pa.Sys = &syscall.SysProcAttr{} + } + pa.Sys.Cloneflags |= syscall.CLONE_NEWUSER + pa.Sys.UidMappings = nsD.uidMap + pa.Sys.GidMappings = nsD.gidMap + return nil +} + +func parseRanges(detail *nsDetail, ids string, id int) []syscall.SysProcIDMap { + base := *baseID + if base < 0 { + base = detail.uid + } + + list := []syscall.SysProcIDMap{ + syscall.SysProcIDMap{ + ContainerID: base, + HostID: id, + Size: 1, + }, + } + + base++ + for _, next := range ranges(ids) { + fmt.Println("next:", next) + list = append(list, + syscall.SysProcIDMap{ + ContainerID: base, + HostID: next.base, + Size: next.count, + }) + base += next.count + } + return list +} + +func main() { + flag.Parse() + + detail := nsDetail{ + gid: syscall.Getgid(), + } + + thisUID := syscall.Getuid() + switch *uid { + case -1: + detail.uid = thisUID + default: + detail.uid = *uid + } + detail.uidMap = parseRanges(&detail, *uids, detail.uid) + + thisGID := syscall.Getgid() + switch *gid { + case -1: + detail.gid = thisGID + default: + detail.gid = *gid + } + detail.gidMap = parseRanges(&detail, *gids, detail.gid) + + unparsed := flag.Args() + + arg0 := *shell + skip := 0 + var w *cap.Launcher + if len(unparsed) > 0 { + switch unparsed[0] { + case "==": + arg0 = os.Args[0] + skip++ + } + } + + w = cap.NewLauncher(arg0, append([]string{arg0}, unparsed[skip:]...), nil) + if *ns { + // Include the namespace setup callback with the launcher. + w.Callback(nsSetup) + } + + if thisUID != detail.uid { + w.SetUID(detail.uid) + } + + if thisGID != detail.gid { + w.SetGroups(detail.gid, nil) + } + + if *iab != "" { + ins, err := cap.IABFromText(*iab) + if err != nil { + log.Fatalf("--iab=%q parsing issue: %v", err) + } + w.SetIAB(ins) + } + + if *mode != "" { + for m := cap.Mode(1); ; m++ { + if s := m.String(); s == "UNKNOWN" { + log.Fatalf("mode %q is unknown", *mode) + } else if s == *mode { + w.SetMode(m) + break + } + } + } + + // The launcher can enable more functionality if involked with + // effective capabilities. + have := cap.GetProc() + for _, c := range []cap.Value{cap.SETUID, cap.SETGID} { + if canDo, err := have.GetFlag(cap.Permitted, c); err != nil { + log.Fatalf("failed to explore process capabilities, %q for %q", have, c) + } else if canDo { + if err := have.SetFlag(cap.Effective, true, c); err != nil { + log.Fatalf("failed to raise effective capability: \"%v e+%v\"", have, c) + } + } + } + if err := have.SetProc(); err != nil { + log.Fatalf("privilege assertion %q failed: %v", have, err) + } + + if *debug { + if *ns { + fmt.Println("launching namespace") + } else { + fmt.Println("launching without namespace") + } + } + + pid, err := w.Launch(detail) + if err != nil { + log.Fatalf("launch failed: %v", err) + } + if err := cap.NewSet().SetProc(); err != nil { + log.Fatalf("gowns could not drop privilege: %v", err) + } + + p, err := os.FindProcess(pid) + if err != nil { + log.Fatalf("cannot find process: %v", err) + } + state, err := p.Wait() + if err != nil { + log.Fatalf("waiting failed: %v", err) + } + + if *debug { + fmt.Println("process exited:", state) + } +} diff --git a/goapps/setid/go.mod b/goapps/setid/go.mod index 881f9e9..b227144 100644 --- a/goapps/setid/go.mod +++ b/goapps/setid/go.mod @@ -3,6 +3,6 @@ module setid go 1.11 require ( - kernel.org/pub/linux/libs/security/libcap/cap v0.2.45 - kernel.org/pub/linux/libs/security/libcap/psx v0.2.45 + kernel.org/pub/linux/libs/security/libcap/cap v0.2.47 + kernel.org/pub/linux/libs/security/libcap/psx v0.2.47 ) diff --git a/goapps/web/go.mod b/goapps/web/go.mod index 4b2eb4f..a2dac7d 100644 --- a/goapps/web/go.mod +++ b/goapps/web/go.mod @@ -2,4 +2,4 @@ module web go 1.11 -require kernel.org/pub/linux/libs/security/libcap/cap v0.2.45 +require kernel.org/pub/linux/libs/security/libcap/cap v0.2.47 diff --git a/kdebug/test-kernel.sh b/kdebug/test-kernel.sh index d480a63..1326cd7 100755 --- a/kdebug/test-kernel.sh +++ b/kdebug/test-kernel.sh @@ -13,7 +13,8 @@ function die { } pushd .. -make || die "failed to make libcap tree" +make test || die "failed to make test of libcap tree" +make -C progs tcapsh-static || die "failed to make progs/tcapsh-static" popd # Assumes desired make *config (eg. make defconfig) is already done. @@ -45,9 +46,16 @@ file /root/setcap $HERE/../progs/setcap 0755 0 0 file /root/getcap $HERE/../progs/getcap 0755 0 0 file /root/capsh $HERE/../progs/capsh 0755 0 0 file /root/getpcaps $HERE/../progs/getpcaps 0755 0 0 +file /root/tcapsh-static $HERE/../progs/tcapsh-static 0755 0 0 EOF -COMMANDS="ls ln cp dmesg id pwd mkdir rmdir cat rm sh mount umount chmod less vi" +# convenience for some local experiments +if [ -f "$HERE/extras.sh" ]; then + echo "local, uncommitted enhancements to kernel test" + . "$HERE/extras.sh" +fi + +COMMANDS="awk cat chmod cp dmesg fgrep id less ln ls mkdir mount pwd rm rmdir sh sort umount uniq vi" for f in $COMMANDS; do echo slink /bin/$f /sbin/busybox 0755 0 0 >> fs.conf done @@ -59,9 +67,10 @@ done $KBASE/usr/gen_init_cpio fs.conf | gzip -9 > initramfs.img -KERNEL=$KBASE/arch/x86_64/boot/bzImage +KERNEL=$KBASE/arch/$(uname -m)/boot/bzImage qemu-system-$(uname -m) -m 1024 \ -kernel $KERNEL \ -initrd initramfs.img \ - -append "$APPEND" + -append "$APPEND" \ + -smp sockets=2,dies=1,cores=4 diff --git a/kdebug/test-passwd b/kdebug/test-passwd index 4fa92a4..0ff71df 100644 --- a/kdebug/test-passwd +++ b/kdebug/test-passwd @@ -1,2 +1,3 @@ root:x:0:0:root:/root:/bin/bash +luser:x:1:1:Luser:/:/bin/bash nobody:x:99:99:Nobody:/:/sbin/nologin diff --git a/libcap/cap_proc.c b/libcap/cap_proc.c index 3929f66..1329f94 100644 --- a/libcap/cap_proc.c +++ b/libcap/cap_proc.c @@ -390,7 +390,7 @@ static int _cap_set_secbits(struct syscaller_s *sc, unsigned bits) } /* - * Set the security mode of the current process. + * Set the secbits of the current process. */ int cap_set_secbits(unsigned bits) { @@ -398,6 +398,14 @@ int cap_set_secbits(unsigned bits) } /* + * Attempt to raise the no new privs prctl value. + */ +static void _cap_set_no_new_privs(struct syscaller_s *sc) +{ + (void) _libcap_wprctl6(sc, PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0, 0); +} + +/* * Some predefined constants */ #define CAP_SECURED_BITS_BASIC \ @@ -448,7 +456,11 @@ static int _cap_set_mode(struct syscaller_s *sc, cap_mode_t flavor) (void) _cap_drop_bound(sc, c); } (void) cap_clear_flag(working, CAP_PERMITTED); + + /* for good measure */ + _cap_set_no_new_privs(sc); break; + default: errno = EINVAL; ret = -1; diff --git a/libcap/include/sys/psx_syscall.h b/libcap/include/sys/psx_syscall.h index 1578765..ebac5fe 120000 --- a/libcap/include/sys/psx_syscall.h +++ b/libcap/include/sys/psx_syscall.h @@ -1 +1 @@ -../../../psx/include/sys/psx_syscall.h
\ No newline at end of file +../../../psx/psx_syscall.h
\ No newline at end of file diff --git a/libcap/psx_syscall.h b/libcap/psx_syscall.h new file mode 120000 index 0000000..dc748bb --- /dev/null +++ b/libcap/psx_syscall.h @@ -0,0 +1 @@ +include/sys/psx_syscall.h
\ No newline at end of file diff --git a/progs/capsh.c b/progs/capsh.c index 899f79c..a39ceeb 100644 --- a/progs/capsh.c +++ b/progs/capsh.c @@ -1,9 +1,10 @@ /* * Copyright (c) 2008-11,16,19,2020 Andrew G. Morgan <morgan@kernel.org> * - * This is a simple 'bash' (-DSHELL) wrapper program that can be used - * to raise and lower both the bset and pI capabilities before - * invoking /bin/bash. + * This is a multifunction shell wrapper tool that can be used to + * launch capable files in various ways with a variety of settings. It + * also supports some testing modes, which are used extensively as + * part of the libcap build system. * * The --print option can be used as a quick test whether various * capability manipulations work as expected (or not). @@ -107,8 +108,9 @@ static void arg_print(void) set = cap_get_secbits(); if (set >= 0) { const char *b = binary(set); /* verilog convention for binary string */ - printf("Securebits: 0%lo/0x%lx/%u'b%s\n", set, set, - (unsigned) strlen(b), b); + printf("Securebits: 0%lo/0x%lx/%u'b%s (no-new-privs=%d)\n", set, set, + (unsigned) strlen(b), b, + prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0, 0)); printf(" secure-noroot: %s (%s)\n", (set & SECBIT_NOROOT) ? "yes":"no", (set & SECBIT_NOROOT_LOCKED) ? "locked":"unlocked"); @@ -909,47 +911,66 @@ int main(int argc, char *argv[], char *envp[]) exit(1); } cap_free(iab); + } else if (!strcmp("--no-new-privs", argv[i])) { + if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0, 0) != 0) { + perror("unable to set no-new-privs"); + exit(1); + } + } else if (!strcmp("--has-no-new-privs", argv[i])) { + if (prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0, 0) != 1) { + fprintf(stderr, "no-new-privs not set\n"); + exit(1); + } + } else if (!strcmp("--license", argv[i])) { + printf( + "%s has a you choose license: BSD 3-clause or GPL2\n" + "Copyright (c) 2008-11,16,19,2020 Andrew G. Morgan" + " <morgan@kernel.org>\n", argv[0]); + exit(0); } else { usage: printf("usage: %s [args ...]\n" - " --help this message (or try 'man capsh')\n" - " --print display capability relevant state\n" - " --decode=xxx decode a hex string to a list of caps\n" - " --supports=xxx exit 1 if capability xxx unsupported\n" - " --has-p=xxx exit 1 if capability xxx not permitted\n" - " --has-i=xxx exit 1 if capability xxx not inheritable\n" - " --drop=xxx remove xxx,.. capabilities from bset\n" - " --dropped=xxx exit 1 unless bounding cap xxx dropped\n" - " --has-ambient exit 1 unless ambient vector supported\n" " --has-a=xxx exit 1 if capability xxx not ambient\n" + " --has-ambient exit 1 unless ambient vector supported\n" " --addamb=xxx add xxx,... capabilities to ambient set\n" - " --delamb=xxx remove xxx,... capabilities from ambient\n" - " --noamb reset (drop) all ambient capabilities\n" + " --cap-uid=<n> use libcap cap_setuid() to change uid\n" " --caps=xxx set caps as per cap_from_text()\n" - " --inh=xxx set xxx,.. inheritable set\n" - " --secbits=<n> write a new value for securebits\n" + " --chroot=path chroot(2) to this path\n" + " --decode=xxx decode a hex string to a list of caps\n" + " --delamb=xxx remove xxx,... capabilities from ambient\n" + " --forkfor=<n> fork and make child sleep for <n> sec\n" + " --gid=<n> set gid to <n> (hint: id <username>)\n" + " --groups=g,... set the supplemental groups\n" + " --has-p=xxx exit 1 if capability xxx not permitted\n" + " --has-i=xxx exit 1 if capability xxx not inheritable\n" + " --has-no-new-privs exit 1 if privs not limited\n" + " --help, -h this message (or try 'man capsh')\n" " --iab=... use cap_iab_from_text() to set iab\n" - " --keep=<n> set keep-capability bit to <n>\n" - " --uid=<n> set uid to <n> (hint: id <username>)\n" - " --cap-uid=<n> libcap cap_setuid() to change uid\n" + " --inh=xxx set xxx,.. inheritable set\n" + " --inmode=<xxx> exit 1 if current mode is not <xxx>\n" " --is-uid=<n> exit 1 if uid != <n>\n" - " --gid=<n> set gid to <n> (hint: id <username>)\n" " --is-gid=<n> exit 1 if gid != <n>\n" - " --groups=g,... set the supplemental groups\n" - " --user=<name> set uid,gid and groups to that of user\n" - " --chroot=path chroot(2) to this path\n" + " --keep=<n> set keep-capability bit to <n>\n" + " --killit=<n> send signal(n) to child\n" + " --license display license info\n" " --modes list libcap named capability modes\n" " --mode=<xxx> set capability mode to <xxx>\n" - " --inmode=<xxx> exit 1 if current mode is not <xxx>\n" - " --killit=<n> send signal(n) to child\n" - " --forkfor=<n> fork and make child sleep for <n> sec\n" + " --no-new-privs set sticky process privilege limiter\n" + " --noamb reset (drop) all ambient capabilities\n" + " --print display capability relevant state\n" + " --secbits=<n> write a new value for securebits\n" " --shell=/xx/yy use /xx/yy instead of " SHELL " for --\n" + " --supports=xxx exit 1 if capability xxx unsupported\n" + " --uid=<n> set uid to <n> (hint: id <username>)\n" + " --user=<name> set uid,gid and groups to that of user\n" " == re-exec(capsh) with args as for --\n" " -- remaining arguments are for " SHELL "\n" " (without -- [%s] will simply exit(0))\n", argv[0], argv[0]); - - exit(strcmp("--help", argv[i]) != 0); + if (strcmp("--help", argv[1]) && strcmp("-h", argv[1])) { + exit(1); + } + exit(0); } } diff --git a/progs/getcap.c b/progs/getcap.c index 225207f..208bd6a 100644 --- a/progs/getcap.c +++ b/progs/getcap.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997,2007 Andrew G. Morgan <morgan@kernel.org> + * Copyright (c) 1997,2007 Andrew G. Morgan <morgan@kernel.org> * * This displays the capabilities of a given file. */ @@ -23,14 +23,14 @@ static int verbose = 0; static int recursive = 0; static int namespace = 0; -static void usage(void) +static void usage(int code) { fprintf(stderr, - "usage: getcap [-v] [-r] [-h] [-n] <filename> [<filename> ...]\n" - "\n" - "\tdisplays the capabilities on the queried file(s).\n" + "usage: getcap [-h] [-l] [-n] [-r] [-v] <filename> [<filename> ...]\n" + "\n" + "\tdisplays the capabilities on the queried file(s).\n" ); - exit(1); + exit(code); } static int do_getcap(const char *fname, const struct stat *stbuf, @@ -82,7 +82,7 @@ int main(int argc, char **argv) { int i, c; - while ((c = getopt(argc, argv, "rvhn")) > 0) { + while ((c = getopt(argc, argv, "rvhnl")) > 0) { switch(c) { case 'r': recursive = 1; @@ -93,13 +93,20 @@ int main(int argc, char **argv) case 'n': namespace = 1; break; + case 'h': + usage(0); + case 'l': + printf("%s has a you choose license: BSD 3-clause or GPL2\n" + "Copyright (c) 1997,2007 Andrew G. Morgan" + " <morgan@kernel.org>\n", argv[0]); + exit(0); default: - usage(); + usage(1); } } if (!argv[optind]) - usage(); + usage(1); for (i=optind; argv[i] != NULL; i++) { struct stat stbuf; diff --git a/progs/getpcaps.c b/progs/getpcaps.c index 497abcd..5bc511e 100644 --- a/progs/getpcaps.c +++ b/progs/getpcaps.c @@ -11,19 +11,19 @@ #include <stdlib.h> #include <sys/capability.h> -static void usage(int exiter) +static void usage(int code) { fprintf(stderr, "usage: getcaps <pid> [<pid> ...]\n\n" " This program displays the capabilities on the queried process(es).\n" -" The capabilities are displayed in the cap_from_text(3) format.\n\n" -" Optional arguments:\n" -" --help or --usage display this message.\n" -" --verbose use a more verbose output format.\n" -" --ugly or --legacy use the archaic legacy output format.\n\n" -"[Copyright (c) 1997-8,2007,2019 Andrew G. Morgan <morgan@kernel.org>]\n" - ); - exit(exiter); + " The capabilities are displayed in the cap_from_text(3) format.\n" + "\n" + " Optional arguments:\n" + " --help, -h or --usage display this message.\n" + " --verbose use a more verbose output format.\n" + " --ugly or --legacy use the archaic legacy output format.\n" + " --license display license info\n"); + exit(code); } int main(int argc, char **argv) @@ -40,8 +40,14 @@ int main(int argc, char **argv) int pid; cap_t cap_d; - if (!strcmp(argv[0], "--help") || !strcmp(argv[0], "--usage")) { + if (!strcmp(argv[0], "--help") || !strcmp(argv[0], "--usage") || + !strcmp(argv[0], "-h")) { usage(0); + } else if (!strcmp(argv[0], "--license")) { + printf("%s has a you choose license: BSD 3-clause or GPL2\n" +"[Copyright (c) 1997-8,2007,2019 Andrew G. Morgan <morgan@kernel.org>]\n", + argv[0]); + exit(0); } else if (!strcmp(argv[0], "--verbose")) { verbose = 1; continue; diff --git a/progs/quicktest.sh b/progs/quicktest.sh index 1c21bb4..6aa2598 100755 --- a/progs/quicktest.sh +++ b/progs/quicktest.sh @@ -255,6 +255,8 @@ if [ -f ../go/compare-cap ]; then fi echo "PASSED" else - echo "no Go support compiled" + echo "no Go support compiled, so skipping Go tests" fi rm -f compare-cap + +echo "ALL TESTS PASSED!" diff --git a/progs/setcap.c b/progs/setcap.c index 442685d..930429a 100644 --- a/progs/setcap.c +++ b/progs/setcap.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997,2007-8 Andrew G. Morgan <morgan@kernel.org> + * Copyright (c) 1997,2007-8,2020 Andrew G. Morgan <morgan@kernel.org> * * This sets/verifies the capabilities of a given file. */ @@ -11,15 +11,24 @@ #include <sys/capability.h> #include <unistd.h> -static void usage(void) +static void usage(int status) { fprintf(stderr, - "usage: setcap [-q] [-v] [-n <rootid>] (-r|-|<caps>) <filename> " + "usage: setcap [-h] [-q] [-v] [-n <rootid>] (-r|-|<caps>) <filename> " "[ ... (-r|-|<capsN>) <filenameN> ]\n" "\n" " Note <filename> must be a regular (non-symlink) file.\n" + " -r remove capability from file\n" + " - read capability text from stdin\n" + " <capsN> cap_from_text(3) formatted file capability\n" + "\n" + " -h this message and exit status 0\n" + " -q quietly\n" + " -v validate supplied capability matches file\n" + " -n <rootid> write a user namespace limited capability\n" + " --license display the license info\n" ); - exit(1); + exit(status); } #define MAXCAP 2048 @@ -65,8 +74,8 @@ int main(int argc, char **argv) cap_value_t capflag; uid_t rootid = 0, f_rootid; - if (argc < 3) { - usage(); + if (argc < 2) { + usage(1); } mycaps = cap_get_proc(); @@ -83,6 +92,16 @@ int main(int argc, char **argv) quiet = 1; continue; } + if (!strcmp("--license", *argv)) { + printf( + "%s has a you choose license: BSD 3-clause or GPL2\n" + "Copyright (c) 1997,2007-8,2020 Andrew G. Morgan" + " <morgan@kernel.org>\n", argv[0]); + exit(0); + } + if (!strcmp(*argv, "-h")) { + usage(0); + } if (!strcmp(*argv, "-v")) { verify = 1; continue; @@ -107,7 +126,7 @@ int main(int argc, char **argv) if (!strcmp(*argv,"-")) { retval = read_caps(quiet, *argv, buffer); if (retval) - usage(); + usage(1); text = buffer; } else { text = *argv; @@ -116,7 +135,7 @@ int main(int argc, char **argv) cap_d = cap_from_text(text); if (cap_d == NULL) { perror("fatal error"); - usage(); + usage(1); } if (cap_set_nsowner(cap_d, rootid)) { perror("unable to set nsowner"); @@ -134,7 +153,7 @@ int main(int argc, char **argv) } if (--argc <= 0) - usage(); + usage(1); /* * Set the filesystem capability for this file. */ @@ -194,6 +213,7 @@ int main(int argc, char **argv) if (retval != 0) { int explained = 0; int oerrno = errno; + int somebits = 0; #ifdef linux cap_value_t cap; cap_flag_value_t per_state; @@ -201,24 +221,28 @@ int main(int argc, char **argv) for (cap = 0; cap_get_flag(cap_d, cap, CAP_PERMITTED, &per_state) != -1; cap++) { - cap_flag_value_t inh_state, eff_state; + cap_flag_value_t inh_state, eff_state, combined; cap_get_flag(cap_d, cap, CAP_INHERITABLE, &inh_state); cap_get_flag(cap_d, cap, CAP_EFFECTIVE, &eff_state); - if ((inh_state | per_state) != eff_state) { - fprintf(stderr, "NOTE: Under Linux, effective file capabilities must either be empty, or\n" - " exactly match the union of selected permitted and inheritable bits.\n"); + combined = (inh_state | per_state); + somebits |= !!eff_state; + if (combined != eff_state) { explained = 1; break; } } + if (somebits && explained) { + fprintf(stderr, "NOTE: Under Linux, effective file capabilities must either be empty, or\n" + " exactly match the union of selected permitted and inheritable bits.\n"); + } #endif /* def linux */ - + fprintf(stderr, "Failed to set capabilities on file `%s' (%s)\n", argv[0], strerror(oerrno)); if (!explained) { - usage(); + usage(1); } } } diff --git a/psx/doc.go b/psx/doc.go new file mode 100644 index 0000000..e6f9013 --- /dev/null +++ b/psx/doc.go @@ -0,0 +1,60 @@ +// Package psx provides support for system calls that are run +// simultanously on all threads under Linux. +// +// This property can be used to work around a historical lack of +// native Go support for such a feature. Something that is the subject +// of: +// +// https://github.com/golang/go/issues/1435 +// +// The package works differently depending on whether or not +// CGO_ENABLED is 0 or 1. +// +// In the former case, psx is a low overhead wrapper for the two +// native go calls: syscall.AllThreadsSyscall() and +// syscall.AllThreadsSyscall6() [expected to be] introduced in +// go1.16. We provide this wrapping to minimize client source code +// changes when compiling with or without CGo enabled. +// +// In the latter case, and toolchains prior to go1.16, it works via +// CGo wrappers for system call functions that call the C [lib]psx +// functions of these names. This ensures that the system calls +// execute simultaneously on all the pthreads of the Go (and CGo) +// combined runtime. +// +// With CGo, the psx support works in the following way: the pthread +// that is first asked to execute the syscall does so, and determines +// if it succeeds or fails. If it fails, it returns immediately +// without attempting the syscall on other pthreads. If the initial +// attempt succeeds, however, then the runtime is stopped in order for +// the same system call to be performed on all the remaining pthreads +// of the runtime. Once all pthreads have completed the syscall, the +// return codes are those obtained by the first pthread's invocation +// of the syscall. +// +// Note, there is no need to use this variant of syscall where the +// syscalls only read state from the kernel. However, since Go's +// runtime freely migrates code execution between pthreads, support of +// this type is required for any successful attempt to fully drop or +// modify the privilege of a running Go program under Linux. +// +// More info on how Linux privilege works and examples of using this +// package can be found here: +// +// https://sites.google.com/site/fullycapable +// +// WARNING: For older go toolchains (prior to go1.15), correct +// compilation of this package may require an extra workaround step: +// +// The workaround is to build with the following CGO_LDFLAGS_ALLOW in +// effect (here the syntax is that of bash for defining an environment +// variable): +// +// export CGO_LDFLAGS_ALLOW="-Wl,-?-wrap[=,][^-.@][^,]*" +// +// +// Copyright (c) 2019,20 Andrew G. Morgan <morgan@kernel.org> +// +// The psx package is licensed with a (you choose) BSD 3-clause or +// GPL2. See LICENSE file for details. +package psx // import "kernel.org/pub/linux/libs/security/libcap/psx" @@ -25,9 +25,10 @@ #include <stdlib.h> #include <string.h> #include <unistd.h> -#include <sys/psx_syscall.h> #include <sys/syscall.h> +#include "psx_syscall.h" + /* * psx_load_syscalls() is weakly defined so we can have it overridden * by libpsx if it is linked. Specifically, when libcap calls @@ -89,6 +90,7 @@ static struct psx_tracker_s { } cmd; struct sigaction sig_action; + struct sigaction chained_action; registered_thread_t *root; } psx_tracker; @@ -123,6 +125,9 @@ static void psx_posix_syscall_actor(int signum, siginfo_t *info, void *ignore) { /* bail early if this isn't something we recognize */ if (signum != psx_tracker.psx_sig || !psx_tracker.cmd.active || info == NULL || info->si_code != SI_TKILL || info->si_pid != getpid()) { + if (psx_tracker.chained_action.sa_sigaction != 0) { + psx_tracker.chained_action.sa_sigaction(signum, info, ignore); + } return; } @@ -174,6 +179,34 @@ extern int __real_pthread_create(pthread_t *thread, const pthread_attr_t *attr, void *(*start_routine) (void *), void *arg); /* + * psx_confirm_sigaction reconfirms that the psx handler is the first + * handler to respond to the psx signal. It assumes that + * psx_tracker.psx_sig has been set. + */ +static void psx_confirm_sigaction(void) { + sigset_t mask, orig; + struct sigaction existing_sa; + + /* + * Block interrupts while potentially rewriting the handler. + */ + sigemptyset(&mask); + sigaddset(&mask, psx_tracker.psx_sig); + sigprocmask(SIG_BLOCK, &mask, &orig); + + sigaction(psx_tracker.psx_sig, NULL, &existing_sa); + if (existing_sa.sa_sigaction != psx_posix_syscall_actor) { + memcpy(&psx_tracker.chained_action, &existing_sa, sizeof(struct sigaction)); + psx_tracker.sig_action.sa_sigaction = psx_posix_syscall_actor; + sigemptyset(&psx_tracker.sig_action.sa_mask); + psx_tracker.sig_action.sa_flags = SA_SIGINFO | SA_ONSTACK | SA_RESTART; + sigaction(psx_tracker.psx_sig, &psx_tracker.sig_action, NULL); + } + + sigprocmask(SIG_SETMASK, &orig, NULL); +} + +/* * psx_syscall_start initializes the subsystem including initializing * the mutex. */ @@ -184,15 +217,17 @@ static void psx_syscall_start(void) { pthread_atfork(_psx_prepare_fork, _psx_fork_completed, _psx_forked_child); /* - * glibc nptl picks from the SIGRTMIN end, so we pick from the - * SIGRTMAX end + * All sorts of things are assumed by Linux and glibc and/or musl + * about signal handlers and which can be blocked. Go has its own + * idiosyncrasies too. We tried SIGRTMAX until + * + * https://bugzilla.kernel.org/show_bug.cgi?id=210533 + * + * Our current strategy is to aggressively intercept SIGSYS. */ - psx_tracker.psx_sig = SIGRTMAX; - psx_tracker.sig_action.sa_sigaction = psx_posix_syscall_actor; - sigemptyset(&psx_tracker.sig_action.sa_mask); - psx_tracker.sig_action.sa_flags = SA_SIGINFO | SA_RESTART;; - sigaction(psx_tracker.psx_sig, &psx_tracker.sig_action, NULL); + psx_tracker.psx_sig = SIGSYS; + psx_confirm_sigaction(); psx_do_registration(); // register the main thread. psx_tracker.initialized = 1; @@ -201,7 +236,8 @@ static void psx_syscall_start(void) { /* * This is the only way this library globally locks. Note, this is not * to be confused with psx_sig (interrupt) blocking - which is - * performed around thread creation. + * performed around thread creation and when the signal handler is + * being confirmed. */ static void psx_lock(void) { @@ -336,11 +372,45 @@ typedef struct { * https://sourceware.org/bugzilla/show_bug.cgi?id=12889 */ static void _psx_exiting(void *node) { + /* + * Until we are in the _PSX_EXITING state, we must not block the + * psx_sig interrupt for this dying thread. That is, until this + * exiting thread can set ref->gone to 1, this dying thread is + * still participating in the psx syscall distribution. + * + * See https://github.com/golang/go/issues/42494 for a situation + * where this code is called with psx_tracker.psx_sig blocked. + */ + sigset_t sigbit, orig_sigbits; + sigemptyset(&sigbit); + pthread_sigmask(SIG_UNBLOCK, &sigbit, &orig_sigbits); + sigaddset(&sigbit, psx_tracker.psx_sig); + pthread_sigmask(SIG_UNBLOCK, &sigbit, NULL); + + /* + * With psx_tracker.psx_sig unblocked we can wait until this + * thread can enter the _PSX_EXITING state. + */ psx_new_state(_PSX_IDLE, _PSX_EXITING); + + /* + * We now indicate that this thread is no longer participating in + * the psx mechanism. + */ registered_thread_t *ref = node; pthread_mutex_lock(&ref->mu); ref->gone = 1; pthread_mutex_unlock(&ref->mu); + + /* + * At this point, we can restore the calling sigmask to whatever + * the caller thought was appropriate for a dying thread to have. + */ + pthread_sigmask(SIG_SETMASK, &orig_sigbits, NULL); + + /* + * Allow the rest of the psx system carry on as per normal. + */ psx_new_state(_PSX_EXITING, _PSX_IDLE); } @@ -420,16 +490,6 @@ int __wrap_pthread_create(pthread_t *thread, const pthread_attr_t *attr, } /* - * psx_pthread_create is a wrapper for pthread_create() that registers - * the newly created thread. If your threads are created already, they - * can be individually registered with psx_register(). - */ -int psx_pthread_create(pthread_t *thread, const pthread_attr_t *attr, - void *(*start_routine) (void *), void *arg) { - return __wrap_pthread_create(thread, attr, start_routine, arg); -} - -/* * __psx_immediate_syscall does one syscall using the current * process. */ @@ -442,9 +502,9 @@ static long int __psx_immediate_syscall(long int syscall_nr, if (count > 3) { psx_tracker.cmd.six = 1; - psx_tracker.cmd.arg1 = arg[3]; - psx_tracker.cmd.arg2 = count > 4 ? arg[4] : 0; - psx_tracker.cmd.arg3 = count > 5 ? arg[5] : 0; + psx_tracker.cmd.arg4 = arg[3]; + psx_tracker.cmd.arg5 = count > 4 ? arg[4] : 0; + psx_tracker.cmd.arg6 = count > 5 ? arg[5] : 0; return syscall(syscall_nr, psx_tracker.cmd.arg1, psx_tracker.cmd.arg2, @@ -497,10 +557,11 @@ long int __psx_syscall(long int syscall_nr, ...) { } psx_new_state(_PSX_IDLE, _PSX_SETUP); + psx_confirm_sigaction(); long int ret; - ret = __psx_immediate_syscall(syscall_nr, count, arg);; + ret = __psx_immediate_syscall(syscall_nr, count, arg); if (ret == -1 || !psx_tracker.initialized) { psx_new_state(_PSX_SETUP, _PSX_IDLE); goto defer; @@ -1,114 +1,13 @@ -// Package psx provides support for system calls that are run -// simultanously on all pthreads. -// -// This property can be used to work around a lack of native Go -// support for such a feature. Something that is the subject of: -// -// https://github.com/golang/go/issues/1435 -// -// The package works via CGo wrappers for system call functions that -// call the C [lib]psx functions of these names. This ensures that the -// system calls execute simultaneously on all the pthreads of the Go -// (and CGo) combined runtime. -// -// The psx support works in the following way: the pthread that is -// first asked to execute the syscall does so, and determines if it -// succeeds or fails. If it fails, it returns immediately without -// attempting the syscall on other pthreads. If the initial attempt -// succeeds, however, then the runtime is stopped in order for the -// same system call to be performed on all the remaining pthreads of -// the runtime. Once all pthreads have completed the syscall, the -// return codes are those obtained by the first pthread's invocation -// of the syscall. -// -// Note, there is no need to use this variant of syscall where the -// syscalls only read state from the kernel. However, since Go's -// runtime freely migrates code execution between pthreads, support of -// this type is required for any successful attempt to fully drop or -// modify the privilege of a running Go program under Linux. -// -// More info on how Linux privilege works can be found here: -// -// https://sites.google.com/site/fullycapable -// -// WARNING: Correct compilation of this package may require an extra -// step: -// -// If your Go compiler is older than go1.15, a workaround may be -// required to be able to link this package. In order to do what it -// needs to this package employs some unusual linking flags. -// -// The workaround is to build with the following CGO_LDFLAGS_ALLOW -// in effect: -// -// export CGO_LDFLAGS_ALLOW="-Wl,-?-wrap[=,][^-.@][^,]*" -// -// -// Copyright (c) 2019,20 Andrew G. Morgan <morgan@kernel.org> -// -// The psx package is licensed with a (you choose) BSD 3-clause or -// GPL2. See LICENSE file for details. +// +build linux,!cgo +// +build go1.16 + package psx // import "kernel.org/pub/linux/libs/security/libcap/psx" import ( - "runtime" "syscall" ) -// #cgo CFLAGS: -I${SRCDIR}/include -// #cgo LDFLAGS: -lpthread -Wl,-wrap,pthread_create -// -// #include <errno.h> -// #include <sys/psx_syscall.h> -// -// long __errno_too(long set_errno) { -// long v = errno; -// if (set_errno >= 0) { -// errno = set_errno; -// } -// return v; -// } -import "C" - -// setErrno returns the current C.errno value and, if v >= 0, sets the -// CGo errno for a random pthread to value v. If you want some -// consistency, this needs to be called from runtime.LockOSThread() -// code. This function is only defined for testing purposes. The psx.c -// code should properly handle the case that a non-zero errno is saved -// and restored independently of what these Syscall[36]() functions -// observe. -func setErrno(v int) int { - return int(C.__errno_too(C.long(v))) -} - -// Syscall3 performs a 3 argument syscall using the libpsx C function -// psx_syscall3(). Syscall3 differs from syscall.[Raw]Syscall() -// insofar as it is simultaneously executed on every pthread of the -// combined Go and CGo runtimes. -func Syscall3(syscallnr, arg1, arg2, arg3 uintptr) (uintptr, uintptr, syscall.Errno) { - runtime.LockOSThread() - defer runtime.UnlockOSThread() - - v := C.psx_syscall3(C.long(syscallnr), C.long(arg1), C.long(arg2), C.long(arg3)) - var errno syscall.Errno - if v < 0 { - errno = syscall.Errno(C.__errno_too(-1)) - } - return uintptr(v), uintptr(v), errno -} - -// Syscall6 performs a 6 argument syscall using the libpsx C function -// psx_syscall6(). Syscall6 differs from syscall.[Raw]Syscall6() insofar as -// it is simultaneously executed on every pthread of the combined Go -// and CGo runtimes. -func Syscall6(syscallnr, arg1, arg2, arg3, arg4, arg5, arg6 uintptr) (uintptr, uintptr, syscall.Errno) { - runtime.LockOSThread() - defer runtime.UnlockOSThread() - - v := C.psx_syscall6(C.long(syscallnr), C.long(arg1), C.long(arg2), C.long(arg3), C.long(arg4), C.long(arg5), C.long(arg6)) - var errno syscall.Errno - if v < 0 { - errno = syscall.Errno(C.__errno_too(-1)) - } - return uintptr(v), uintptr(v), errno -} +var ( + Syscall3 = syscall.AllThreadsSyscall + Syscall6 = syscall.AllThreadsSyscall6 +) diff --git a/psx/psx_cgo.go b/psx/psx_cgo.go new file mode 100644 index 0000000..c17b4f3 --- /dev/null +++ b/psx/psx_cgo.go @@ -0,0 +1,65 @@ +// +build linux,cgo + +package psx // import "kernel.org/pub/linux/libs/security/libcap/psx" + +import ( + "runtime" + "syscall" +) + +// #cgo LDFLAGS: -lpthread -Wl,-wrap,pthread_create +// +// #include <errno.h> +// #include "psx_syscall.h" +// +// long __errno_too(long set_errno) { +// long v = errno; +// if (set_errno >= 0) { +// errno = set_errno; +// } +// return v; +// } +import "C" + +// setErrno returns the current C.errno value and, if v >= 0, sets the +// CGo errno for a random pthread to value v. If you want some +// consistency, this needs to be called from runtime.LockOSThread() +// code. This function is only defined for testing purposes. The psx.c +// code should properly handle the case that a non-zero errno is saved +// and restored independently of what these Syscall[36]() functions +// observe. +func setErrno(v int) int { + return int(C.__errno_too(C.long(v))) +} + +// Syscall3 performs a 3 argument syscall using the libpsx C function +// psx_syscall3(). Syscall3 differs from syscall.[Raw]Syscall() +// insofar as it is simultaneously executed on every pthread of the +// combined Go and CGo runtimes. +func Syscall3(syscallnr, arg1, arg2, arg3 uintptr) (uintptr, uintptr, syscall.Errno) { + runtime.LockOSThread() + defer runtime.UnlockOSThread() + + v := C.psx_syscall3(C.long(syscallnr), C.long(arg1), C.long(arg2), C.long(arg3)) + var errno syscall.Errno + if v < 0 { + errno = syscall.Errno(C.__errno_too(-1)) + } + return uintptr(v), uintptr(v), errno +} + +// Syscall6 performs a 6 argument syscall using the libpsx C function +// psx_syscall6(). Syscall6 differs from syscall.[Raw]Syscall6() insofar as +// it is simultaneously executed on every pthread of the combined Go +// and CGo runtimes. +func Syscall6(syscallnr, arg1, arg2, arg3, arg4, arg5, arg6 uintptr) (uintptr, uintptr, syscall.Errno) { + runtime.LockOSThread() + defer runtime.UnlockOSThread() + + v := C.psx_syscall6(C.long(syscallnr), C.long(arg1), C.long(arg2), C.long(arg3), C.long(arg4), C.long(arg5), C.long(arg6)) + var errno syscall.Errno + if v < 0 { + errno = syscall.Errno(C.__errno_too(-1)) + } + return uintptr(v), uintptr(v), errno +} diff --git a/psx/psx_cgo_test.go b/psx/psx_cgo_test.go new file mode 100644 index 0000000..090a96a --- /dev/null +++ b/psx/psx_cgo_test.go @@ -0,0 +1,40 @@ +// +build cgo + +package psx + +import ( + "runtime" + "syscall" + "testing" +) + +// The man page for errno indicates that it is never set to zero, so +// validate that it retains its value over a successful Syscall[36]() +// and is overwritten on a failing syscall. +func TestErrno(t *testing.T) { + // This testing is much easier if we don't have to guess which + // thread is running this Go code. + runtime.LockOSThread() + defer runtime.UnlockOSThread() + + // Start from a known bad state and clean up afterwards. + setErrno(int(syscall.EPERM)) + defer setErrno(0) + + v3, _, errno := Syscall3(syscall.SYS_GETUID, 0, 0, 0) + if errno != 0 { + t.Fatalf("psx getuid failed: %v", errno) + } + v6, _, errno := Syscall6(syscall.SYS_GETUID, 0, 0, 0, 0, 0, 0) + if errno != 0 { + t.Fatalf("psx getuid failed: %v", errno) + } + + if v3 != v6 { + t.Errorf("psx getuid failed to match v3=%d, v6=%d", v3, v6) + } + + if v := setErrno(-1); v != int(syscall.EPERM) { + t.Errorf("psx changes prevailing errno got=%v(%d) want=%v", syscall.Errno(v), v, syscall.EPERM) + } +} diff --git a/psx/include/sys/psx_syscall.h b/psx/psx_syscall.h index d1159e2..4aacfab 100644 --- a/psx/include/sys/psx_syscall.h +++ b/psx/psx_syscall.h @@ -43,7 +43,9 @@ extern "C" { * and psx_syscall6(). */ #define psx_syscall(syscall_nr, ...) \ - __psx_syscall(syscall_nr, __VA_ARGS__, 6, 5, 4, 3, 2, 1, 0) + __psx_syscall(syscall_nr, __VA_ARGS__, (long int) 6, (long int) 5, \ + (long int) 4, (long int) 3, (long int) 2, \ + (long int) 1, (long int) 0) long int __psx_syscall(long int syscall_nr, ...); long int psx_syscall3(long int syscall_nr, long int arg1, long int arg2, long int arg3); @@ -52,23 +54,6 @@ long int psx_syscall6(long int syscall_nr, long int arg4, long int arg5, long int arg6); /* - * psx_pthread_create() wraps the -lpthread pthread_create() function - * call and registers the generated thread with the psx_syscall - * infrastructure. - * - * Note, to transparently redirect all the pthread_create() calls in - * your binary to psx_pthread_create(), link with: - * - * gcc ... -lpsx -lpthread -Wl,-wrap,pthread_create - * - * [That is, libpsx contains an internal definition for the - * __wrap_pthread_create function to invoke psx_pthread_create - * functionality instead.] - */ -int psx_pthread_create(pthread_t *thread, const pthread_attr_t *attr, - void *(*start_routine) (void *), void *arg); - -/* * This function should be used by systems to obtain pointers to the * two syscall functions provided by the PSX library. A linkage trick * is to define this function as weak in a library that can optionally diff --git a/psx/psx_test.go b/psx/psx_test.go index ae6ccd2..4b90f63 100644 --- a/psx/psx_test.go +++ b/psx/psx_test.go @@ -34,33 +34,35 @@ func TestSyscall6(t *testing.T) { } } -// The man page for errno indicates that it is never set to zero, so -// validate that it retains its value over a successful Syscall[36]() -// and is overwritten on a failing syscall. -func TestErrno(t *testing.T) { - // This testing is much easier if we don't have to guess which - // thread is running this Go code. +// killAThread locks the goroutine to a thread and exits. This has the +// effect of making the go runtime terminate the thread. +func killAThread(c <-chan struct{}) { runtime.LockOSThread() - defer runtime.UnlockOSThread() - - // Start from a known bad state and clean up afterwards. - setErrno(int(syscall.EPERM)) - defer setErrno(0) - - v3, _, errno := Syscall3(syscall.SYS_GETUID, 0, 0, 0) - if errno != 0 { - t.Fatalf("psx getuid failed: %v", errno) - } - v6, _, errno := Syscall6(syscall.SYS_GETUID, 0, 0, 0, 0, 0, 0) - if errno != 0 { - t.Fatalf("psx getuid failed: %v", errno) - } + <-c +} - if v3 != v6 { - t.Errorf("psx getuid failed to match v3=%d, v6=%d", v3, v6) - } +// Test to confirm no regression against: +// +// https://github.com/golang/go/issues/42494 +func TestThreadChurn(t *testing.T) { + const prSetKeepCaps = 8 - if v := setErrno(-1); v != int(syscall.EPERM) { - t.Errorf("psx changes prevailing errno got=%v(%d) want=%v", syscall.Errno(v), v, syscall.EPERM) + for j := 0; j < 4; j++ { + kill := (j & 1) != 0 + sysc := (j & 2) != 0 + t.Logf("[%d] testing kill=%v, sysc=%v", j, kill, sysc) + for i := 50; i > 0; i-- { + if kill { + c := make(chan struct{}) + go killAThread(c) + close(c) + } + if sysc { + if _, _, e := Syscall3(syscall.SYS_PRCTL, prSetKeepCaps, uintptr(i&1), 0); e != 0 { + t.Fatalf("[%d] psx:prctl(SET_KEEPCAPS, %d) failed: %v", i, i&1, syscall.Errno(e)) + } + } + } + t.Logf("[%d] PASSED kill=%v, sysc=%v", j, kill, sysc) } } diff --git a/tests/Makefile b/tests/Makefile index fc39fee..1e7039d 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -17,13 +17,13 @@ install: all ifeq ($(DYNAMIC),yes) LINKEXTRA=-Wl,-rpath,../libcap -DEPS=../libcap/libcap.so ../progs/tcapsh-static +DEPS=../libcap/libcap.so ifeq ($(PTHREADS),yes) DEPS += ../libcap/libpsx.so endif else LDFLAGS += --static -DEPS=../libcap/libcap.a ../progs/tcapsh-static +DEPS=../libcap/libcap.a ifeq ($(PTHREADS),yes) DEPS += ../libcap/libpsx.a endif @@ -71,10 +71,10 @@ libcap_psx_test: libcap_psx_test.c $(DEPS) $(CC) $(CFLAGS) $(IPATH) $< -o $@ $(LINKEXTRA) $(LIBCAPLIB) $(LIBPSXLIB) $(LDFLAGS) # privileged -run_libcap_launch_test: libcap_launch_test noop +run_libcap_launch_test: libcap_launch_test noop ../progs/tcapsh-static sudo ./libcap_launch_test -run_libcap_psx_launch_test: libcap_psx_launch_test +run_libcap_psx_launch_test: libcap_psx_launch_test ../progs/tcapsh-static sudo ./libcap_psx_launch_test libcap_launch_test: libcap_launch_test.c $(DEPS) diff --git a/tests/libcap_launch_test.c b/tests/libcap_launch_test.c index c9ef205..bba38c6 100644 --- a/tests/libcap_launch_test.c +++ b/tests/libcap_launch_test.c @@ -70,7 +70,8 @@ int main(int argc, char **argv) { .iab = "!^cap_chown" }, { - .args = { "../progs/tcapsh-static", "--inmode=NOPRIV" }, + .args = { "../progs/tcapsh-static", "--inmode=NOPRIV", + "--has-no-new-privs" }, .result = 0, .mode = CAP_MODE_NOPRIV }, |