aboutsummaryrefslogtreecommitdiff
path: root/libminijail.c
diff options
context:
space:
mode:
Diffstat (limited to 'libminijail.c')
-rw-r--r--libminijail.c411
1 files changed, 347 insertions, 64 deletions
diff --git a/libminijail.c b/libminijail.c
index aab1294..bb60904 100644
--- a/libminijail.c
+++ b/libminijail.c
@@ -1,4 +1,4 @@
-/* Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
+/* Copyright 2012 The ChromiumOS Authors
* Use of this source code is governed by a BSD-style license that can be
* found in the LICENSE file.
*/
@@ -36,6 +36,7 @@
#include <syscall.h>
#include <unistd.h>
+#include "landlock_util.h"
#include "libminijail-private.h"
#include "libminijail.h"
@@ -72,6 +73,15 @@
(MS_NOSUID | MS_NODEV | MS_NOEXEC | MS_NOATIME | MS_NODIRATIME | \
MS_RELATIME | MS_RDONLY)
+/*
+ * TODO(b/235960683): Drop this after CrOS upgrades to glibc >= 2.34
+ * because MS_NOSYMFOLLOW will be defined in sys/mount.h.
+ */
+#ifndef MS_NOSYMFOLLOW
+/* Added locally in kernels 4.x+. */
+#define MS_NOSYMFOLLOW 256
+#endif
+
struct minijail_rlimit {
int type;
rlim_t cur;
@@ -101,6 +111,12 @@ struct hook {
struct hook *next;
};
+struct fs_rule {
+ char *path;
+ uint64_t landlock_flags;
+ struct fs_rule *next;
+};
+
struct preserved_fd {
int parent_fd;
int child_fd;
@@ -112,46 +128,46 @@ struct minijail {
* accounted for in minijail_pre{enter|exec}() below.
*/
struct {
- int uid : 1;
- int gid : 1;
- int inherit_suppl_gids : 1;
- int set_suppl_gids : 1;
- int keep_suppl_gids : 1;
- int use_caps : 1;
- int capbset_drop : 1;
- int set_ambient_caps : 1;
- int vfs : 1;
- int enter_vfs : 1;
- int pids : 1;
- int ipc : 1;
- int uts : 1;
- int net : 1;
- int enter_net : 1;
- int ns_cgroups : 1;
- int userns : 1;
- int disable_setgroups : 1;
- int seccomp : 1;
- int remount_proc_ro : 1;
- int no_new_privs : 1;
- int seccomp_filter : 1;
- int seccomp_filter_tsync : 1;
- int seccomp_filter_logging : 1;
- int seccomp_filter_allow_speculation : 1;
- int chroot : 1;
- int pivot_root : 1;
- int mount_dev : 1;
- int mount_tmp : 1;
- int do_init : 1;
- int run_as_init : 1;
- int pid_file : 1;
- int cgroups : 1;
- int alt_syscall : 1;
- int reset_signal_mask : 1;
- int reset_signal_handlers : 1;
- int close_open_fds : 1;
- int new_session_keyring : 1;
- int forward_signals : 1;
- int setsid : 1;
+ bool uid : 1;
+ bool gid : 1;
+ bool inherit_suppl_gids : 1;
+ bool set_suppl_gids : 1;
+ bool keep_suppl_gids : 1;
+ bool use_caps : 1;
+ bool capbset_drop : 1;
+ bool set_ambient_caps : 1;
+ bool vfs : 1;
+ bool enter_vfs : 1;
+ bool pids : 1;
+ bool ipc : 1;
+ bool uts : 1;
+ bool net : 1;
+ bool enter_net : 1;
+ bool ns_cgroups : 1;
+ bool userns : 1;
+ bool disable_setgroups : 1;
+ bool seccomp : 1;
+ bool remount_proc_ro : 1;
+ bool no_new_privs : 1;
+ bool seccomp_filter : 1;
+ bool seccomp_filter_tsync : 1;
+ bool seccomp_filter_logging : 1;
+ bool seccomp_filter_allow_speculation : 1;
+ bool chroot : 1;
+ bool pivot_root : 1;
+ bool mount_dev : 1;
+ bool mount_tmp : 1;
+ bool do_init : 1;
+ bool run_as_init : 1;
+ bool pid_file : 1;
+ bool cgroups : 1;
+ bool alt_syscall : 1;
+ bool reset_signal_mask : 1;
+ bool reset_signal_handlers : 1;
+ bool close_open_fds : 1;
+ bool new_session_keyring : 1;
+ bool forward_signals : 1;
+ bool setsid : 1;
} flags;
uid_t uid;
gid_t gid;
@@ -180,6 +196,9 @@ struct minijail {
struct minijail_remount *remounts_head;
struct minijail_remount *remounts_tail;
size_t tmpfs_size;
+ bool using_minimalistic_mountns;
+ struct fs_rule *fs_rules_head;
+ struct fs_rule *fs_rules_tail;
char *cgroups[MAX_CGROUPS];
size_t cgroup_count;
struct minijail_rlimit rlimits[MAX_RLIMITS];
@@ -282,6 +301,40 @@ void minijail_preenter(struct minijail *j)
free_remounts_list(j);
}
+/* Adds a rule for a given path to apply once minijail is entered. */
+int add_fs_restriction_path(struct minijail *j,
+ const char *path,
+ uint64_t landlock_flags)
+{
+ struct fs_rule *r = calloc(1, sizeof(*r));
+ if (!r)
+ return -ENOMEM;
+ r->path = strdup(path);
+ r->landlock_flags = landlock_flags;
+
+ if (j->fs_rules_tail) {
+ j->fs_rules_tail->next = r;
+ j->fs_rules_tail = r;
+ } else {
+ j->fs_rules_head = r;
+ j->fs_rules_tail = r;
+ }
+
+ return 0;
+}
+
+bool mount_has_bind_flag(struct mountpoint *m) {
+ return !!(m->flags & MS_BIND);
+}
+
+bool mount_has_readonly_flag(struct mountpoint *m) {
+ return !!(m->flags & MS_RDONLY);
+}
+
+bool mount_events_allowed(struct mountpoint *m) {
+ return !!(m->flags & MS_SHARED) || !!(m->flags & MS_SLAVE);
+}
+
/*
* Strip out flags meant for the child.
* We keep things that are inherited across execve(2).
@@ -324,6 +377,7 @@ struct minijail API *minijail_new(void)
struct minijail *j = calloc(1, sizeof(struct minijail));
if (j) {
j->remount_mode = MS_PRIVATE;
+ j->using_minimalistic_mountns = false;
}
return j;
}
@@ -474,6 +528,50 @@ void API minijail_log_seccomp_filter_failures(struct minijail *j)
}
}
+void API minijail_set_using_minimalistic_mountns(struct minijail *j)
+{
+ j->using_minimalistic_mountns = true;
+}
+
+void API minijail_add_minimalistic_mountns_fs_rules(struct minijail *j)
+{
+ struct mountpoint *m = j->mounts_head;
+ bool landlock_enabled_by_profile = false;
+ if (!j->using_minimalistic_mountns)
+ return;
+
+ /* Apply Landlock rules. */
+ while (m) {
+ landlock_enabled_by_profile = true;
+ minijail_add_fs_restriction_rx(j, m->dest);
+ /* Allow rw if mounted as writable, or mount flags allow mount events.*/
+ if (!mount_has_readonly_flag(m) || mount_events_allowed(m))
+ minijail_add_fs_restriction_rw(j, m->dest);
+ m = m->next;
+ }
+ if (landlock_enabled_by_profile) {
+ minijail_enable_default_fs_restrictions(j);
+ minijail_add_fs_restriction_edit(j, "/dev");
+ minijail_add_fs_restriction_ro(j, "/proc");
+ if (j->flags.vfs)
+ minijail_add_fs_restriction_rw(j, "/tmp");
+ }
+}
+
+void API minijail_enable_default_fs_restrictions(struct minijail *j)
+{
+ // Common library locations.
+ minijail_add_fs_restriction_rx(j, "/lib");
+ minijail_add_fs_restriction_rx(j, "/lib64");
+ minijail_add_fs_restriction_rx(j, "/usr/lib");
+ minijail_add_fs_restriction_rx(j, "/usr/lib64");
+ // Common locations for services invoking Minijail.
+ minijail_add_fs_restriction_rx(j, "/bin");
+ minijail_add_fs_restriction_rx(j, "/sbin");
+ minijail_add_fs_restriction_rx(j, "/usr/sbin");
+ minijail_add_fs_restriction_rx(j, "/usr/bin");
+}
+
void API minijail_use_caps(struct minijail *j, uint64_t capmask)
{
/*
@@ -719,7 +817,7 @@ char API *minijail_get_original_path(struct minijail *j,
* "/chroot/path/exe", the source of that mount,
* "/some/path/exe" is what should be returned.
*/
- if (!strcmp(b->dest, path_inside_chroot))
+ if (streq(b->dest, path_inside_chroot))
return strdup(b->src);
/*
@@ -812,6 +910,74 @@ int API minijail_create_session(struct minijail *j)
return 0;
}
+int API minijail_add_fs_restriction_rx(struct minijail *j, const char *path)
+{
+ return !add_fs_restriction_path(j, path,
+ ACCESS_FS_ROUGHLY_READ_EXECUTE);
+}
+
+int API minijail_add_fs_restriction_ro(struct minijail *j, const char *path)
+{
+ return !add_fs_restriction_path(j, path, ACCESS_FS_ROUGHLY_READ);
+}
+
+int API minijail_add_fs_restriction_rw(struct minijail *j, const char *path)
+{
+ return !add_fs_restriction_path(j, path,
+ ACCESS_FS_ROUGHLY_READ | ACCESS_FS_ROUGHLY_BASIC_WRITE);
+}
+
+int API minijail_add_fs_restriction_advanced_rw(struct minijail *j,
+ const char *path)
+{
+ return !add_fs_restriction_path(j, path,
+ ACCESS_FS_ROUGHLY_READ | ACCESS_FS_ROUGHLY_FULL_WRITE);
+}
+
+int API minijail_add_fs_restriction_edit(struct minijail *j,
+ const char *path)
+{
+ return !add_fs_restriction_path(j, path,
+ ACCESS_FS_ROUGHLY_READ | ACCESS_FS_ROUGHLY_EDIT);
+}
+
+static bool is_valid_bind_path(const char *path)
+{
+ if (!block_symlinks_in_bindmount_paths()) {
+ return true;
+ }
+
+ /*
+ * tokenize() will modify both the |prefixes| pointer and the contents
+ * of the string, so:
+ * -Copy |BINDMOUNT_ALLOWED_PREFIXES| since it lives in .rodata.
+ * -Save the original pointer for free()ing.
+ */
+ char *prefixes = strdup(BINDMOUNT_ALLOWED_PREFIXES);
+ attribute_cleanup_str char *orig_prefixes = prefixes;
+ (void)orig_prefixes;
+
+ char *prefix = NULL;
+ bool found_prefix = false;
+ if (!is_canonical_path(path)) {
+ while ((prefix = tokenize(&prefixes, ",")) != NULL) {
+ if (path_is_parent(prefix, path)) {
+ found_prefix = true;
+ break;
+ }
+ }
+ if (!found_prefix) {
+ /*
+ * If the path does not include one of the allowed
+ * prefixes, fail.
+ */
+ warn("path '%s' is not a canonical path", path);
+ return false;
+ }
+ }
+ return true;
+}
+
int API minijail_mount_with_data(struct minijail *j, const char *src,
const char *dest, const char *type,
unsigned long flags, const char *data)
@@ -840,7 +1006,7 @@ int API minijail_mount_with_data(struct minijail *j, const char *src,
* people use these in practice, it's probably OK. If they want
* the kernel defaults, they can pass data="" instead of NULL.
*/
- if (!strcmp(type, "tmpfs")) {
+ if (streq(type, "tmpfs")) {
/* tmpfs defaults to mode=1777 and size=50%. */
data = "mode=0755,size=10M";
}
@@ -895,10 +1061,30 @@ int API minijail_bind(struct minijail *j, const char *src, const char *dest,
{
unsigned long flags = MS_BIND;
+ /*
+ * Check for symlinks in bind-mount source paths to warn the user early.
+ * Minijail will perform one final check immediately before the mount()
+ * call.
+ */
+ if (!is_valid_bind_path(src)) {
+ warn("src '%s' is not a valid bind mount path", src);
+ return -ELOOP;
+ }
+
+ /*
+ * Symlinks in |dest| are blocked by the ChromiumOS LSM:
+ * <kernel>/security/chromiumos/lsm.c#77
+ */
+
if (!writeable)
flags |= MS_RDONLY;
- return minijail_mount(j, src, dest, "", flags);
+ /*
+ * |type| is ignored for bind mounts, use it to signal that this mount
+ * came from minijail_bind().
+ * TODO(b/238362528): Implement a better way to signal this.
+ */
+ return minijail_mount(j, src, dest, "minijail_bind", flags);
}
int API minijail_add_remount(struct minijail *j, const char *mount_name,
@@ -1337,6 +1523,8 @@ int minijail_unmarshal(struct minijail *j, char *serialized, size_t length)
j->filter_prog = NULL;
j->hooks_head = NULL;
j->hooks_tail = NULL;
+ j->fs_rules_head = NULL;
+ j->fs_rules_tail = NULL;
if (j->user) { /* stale pointer */
char *user = consumestr(&serialized, &length);
@@ -1693,7 +1881,9 @@ static int mount_one(const struct minijail *j, struct mountpoint *m,
{
int ret;
char *dest;
- int remount = 0;
+ bool do_remount = false;
+ bool has_bind_flag = mount_has_bind_flag(m);
+ bool has_remount_flag = !!(m->flags & MS_REMOUNT);
unsigned long original_mnt_flags = 0;
/* We assume |dest| has a leading "/". */
@@ -1708,39 +1898,73 @@ static int mount_one(const struct minijail *j, struct mountpoint *m,
return -ENOMEM;
}
- ret =
- setup_mount_destination(m->src, dest, j->uid, j->gid,
- (m->flags & MS_BIND), &original_mnt_flags);
+ ret = setup_mount_destination(m->src, dest, j->uid, j->gid,
+ has_bind_flag);
if (ret) {
warn("cannot create mount target '%s'", dest);
goto error;
}
/*
- * Bind mounts that change the 'ro' flag have to be remounted since
- * 'bind' and other flags can't both be specified in the same command.
- * Remount after the initial mount.
+ * Remount bind mounts that:
+ * - Come from the minijail_bind() API, and
+ * - Add the 'ro' flag
+ * since 'bind' and other flags can't both be specified in the same
+ * mount(2) call.
+ * Callers using minijail_mount() to perform bind mounts are expected to
+ * know what they're doing and call minijail_mount() with MS_REMOUNT as
+ * needed.
+ * Therefore, if the caller is asking for a remount (using MS_REMOUNT),
+ * there is no need to do an extra remount here.
*/
- if ((m->flags & MS_BIND) &&
- ((m->flags & MS_RDONLY) != (original_mnt_flags & MS_RDONLY))) {
- remount = 1;
+ if (has_bind_flag && strcmp(m->type, "minijail_bind") == 0 &&
+ !has_remount_flag) {
/*
- * Restrict the mount flags to those that are user-settable in a
- * MS_REMOUNT request, but excluding MS_RDONLY. The
- * user-requested mount flags will dictate whether the remount
- * will have that flag or not.
+ * Grab the mount flags of the source. These are used to figure
+ * out whether the bind mount needs to be remounted read-only.
*/
- original_mnt_flags &= (MS_USER_SETTABLE_MASK & ~MS_RDONLY);
+ if (get_mount_flags(m->src, &original_mnt_flags)) {
+ warn("cannot get mount flags for '%s'", m->src);
+ goto error;
+ }
+
+ if ((m->flags & MS_RDONLY) !=
+ (original_mnt_flags & MS_RDONLY)) {
+ do_remount = 1;
+ /*
+ * Restrict the mount flags to those that are
+ * user-settable in a MS_REMOUNT request, but excluding
+ * MS_RDONLY. The user-requested mount flags will
+ * dictate whether the remount will have that flag or
+ * not.
+ */
+ original_mnt_flags &=
+ (MS_USER_SETTABLE_MASK & ~MS_RDONLY);
+ }
+ }
+
+ /*
+ * Do a final check for symlinks in |m->src|.
+ * |m->src| will only contain a valid path when purely bind-mounting
+ * (but not when remounting a bind mount).
+ *
+ * Short of having a version of mount(2) that can take fd's, this is the
+ * smallest we can make the TOCTOU window.
+ */
+ if (has_bind_flag && !has_remount_flag && !is_valid_bind_path(m->src)) {
+ warn("src '%s' is not a valid bind mount path", m->src);
+ goto error;
}
ret = mount(m->src, dest, m->type, m->flags, m->data);
if (ret) {
- pwarn("cannot bind-mount '%s' as '%s' with flags %#lx", m->src,
- dest, m->flags);
+ pwarn("cannot mount '%s' as '%s' with flags %#lx", m->src, dest,
+ m->flags);
goto error;
}
- if (remount) {
+ /* Remount *after* the initial mount. */
+ if (do_remount) {
ret =
mount(m->src, dest, NULL,
m->flags | original_mnt_flags | MS_REMOUNT, m->data);
@@ -1774,6 +1998,8 @@ static void process_mounts_or_die(const struct minijail *j)
pdie("mount_dev failed");
if (j->mounts_head && mount_one(j, j->mounts_head, dev_path)) {
+ warn("mount_one failed with /dev at '%s'", dev_path);
+
if (dev_path)
mount_dev_cleanup(dev_path);
@@ -1876,8 +2102,14 @@ static int mount_tmp(const struct minijail *j)
pdie("tmpfs size spec error");
else if ((size_t)ret >= sizeof(data))
pdie("tmpfs size spec too large");
- return mount("none", "/tmp", "tmpfs", MS_NODEV | MS_NOEXEC | MS_NOSUID,
- data);
+
+ unsigned long flags = MS_NODEV | MS_NOEXEC | MS_NOSUID;
+
+ if (block_symlinks_in_noninit_mountns_tmp()) {
+ flags |= MS_NOSYMFOLLOW;
+ }
+
+ return mount("none", "/tmp", "tmpfs", flags, data);
}
static int remount_proc_readonly(const struct minijail *j)
@@ -2164,6 +2396,45 @@ static void drop_caps(const struct minijail *j, unsigned int last_valid_cap)
cap_free(caps);
}
+/* Creates a ruleset for current inodes then calls landlock_restrict_self(). */
+static void apply_landlock_restrictions(const struct minijail *j)
+{
+ struct fs_rule *r;
+ attribute_cleanup_fd int ruleset_fd = -1;
+
+ r = j->fs_rules_head;
+ while (r) {
+ if (ruleset_fd < 0) {
+ struct minijail_landlock_ruleset_attr ruleset_attr = {
+ .handled_access_fs = HANDLED_ACCESS_TYPES
+ };
+ ruleset_fd = landlock_create_ruleset(
+ &ruleset_attr, sizeof(ruleset_attr), 0);
+ if (ruleset_fd < 0) {
+ const int err = errno;
+ pwarn("Failed to create a ruleset");
+ switch (err) {
+ case ENOSYS:
+ pwarn("Landlock is not supported by the current kernel");
+ break;
+ case EOPNOTSUPP:
+ pwarn("Landlock is currently disabled by kernel config");
+ break;
+ }
+ return;
+ }
+ }
+ populate_ruleset_internal(r->path, ruleset_fd, r->landlock_flags);
+ r = r->next;
+ }
+
+ if (ruleset_fd >= 0) {
+ if (landlock_restrict_self(ruleset_fd, 0)) {
+ pdie("Failed to enforce ruleset");
+ }
+ }
+}
+
static void set_seccomp_filter(const struct minijail *j)
{
/*
@@ -2457,8 +2728,14 @@ void API minijail_enter(const struct minijail *j)
*/
drop_ugid(j);
drop_caps(j, last_valid_cap);
+
+ // Landlock is applied as late as possible. If no_new_privs is
+ // set, then it can be applied after dropping caps.
+ apply_landlock_restrictions(j);
set_seccomp_filter(j);
} else {
+ apply_landlock_restrictions(j);
+
/*
* If we're not setting no_new_privs,
* we need to set seccomp filter *before* dropping privileges.
@@ -3659,6 +3936,12 @@ void API minijail_destroy(struct minijail *j)
free(c);
}
j->hooks_tail = NULL;
+ while (j->fs_rules_head) {
+ struct fs_rule *r = j->fs_rules_head;
+ j->fs_rules_head = r->next;
+ free(r);
+ }
+ j->fs_rules_tail = NULL;
if (j->user)
free(j->user);
if (j->suppl_gid_list)